In [None]:
# Install necessary packages
!pip install tensorflow matplotlib pandas scikit-learn

# Step 1: Import Required Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Step 2: Load the Dataset
# Example Dataset: https://archive.ics.uci.edu/ml/datasets/Beijing+PM2.5+Data

data = pd.read_csv('C:\Users\Francis Musoke\Downloads\Air Quality.csv')

# Step 3: Data Preprocessing
# Select relevant features and drop missing values
data = data[['pm2.5', 'TEMP', 'PRES', 'DEWP', 'WSPM']].dropna()

# Visualize PM2.5 concentrations over time
plt.figure(figsize=(10, 4))
plt.plot(data['pm2.5'], label='PM2.5 Concentration')
plt.title("PM2.5 Concentration Over Time")
plt.xlabel("Time")
plt.ylabel("PM2.5")
plt.legend()
plt.show()

# Step 4: Normalize the Data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)

# Step 5: Prepare the Data for the LSTM Model
def create_dataset(data, time_steps=1):
    X, y = [], []
    for i in range(len(data) - time_steps - 1):
        X.append(data[i:(i + time_steps), :-1])
        y.append(data[i + time_steps, 0])
    return np.array(X), np.array(y)

time_steps = 24  # Use the past 24 hours to predict the next hour
X, y = create_dataset(scaled_data, time_steps)

# Split the data into training and testing sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

# Step 6: Build the LSTM Model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))  # Output: PM2.5 prediction

model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

# Step 7: Train the Model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Step 8: Evaluate the Model
train_pred = model.predict(X_train)
test_pred = model.predict(X_test)

# Inverse transform predictions and actual values to original scale
train_pred = scaler.inverse_transform(np.hstack((train_pred, X_train[:, -1, 1:])))[:, 0]
test_pred = scaler.inverse_transform(np.hstack((test_pred, X_test[:, -1, 1:])))[:, 0]
y_train_actual = scaler.inverse_transform(np.hstack((y_train.reshape(-1, 1), X_train[:, -1, 1:])))[:, 0]
y_test_actual = scaler.inverse_transform(np.hstack((y_test.reshape(-1, 1), X_test[:, -1, 1:])))[:, 0]

# Step 9: Calculate Error Metrics
train_rmse = np.sqrt(mean_squared_error(y_train_actual, train_pred))
test_rmse = np.sqrt(mean_squared_error(y_test_actual, test_pred))
print(f"Train RMSE: {train_rmse:.2f}")
print(f"Test RMSE: {test_rmse:.2f}")

# Step 10: Plot Predictions vs Actual Values
plt.figure(figsize=(12, 6))
plt.plot(y_test_actual, label='Actual PM2.5')
plt.plot(test_pred, label='Predicted PM2.5')
plt.title("PM2.5 Prediction vs Actual")
plt.xlabel("Time")
plt.ylabel("PM2.5")
plt.legend()
plt.show()
