<a href="https://colab.research.google.com/github/shyakx/Air-Quality-Forecasting/blob/main/Deeper_Stacked_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Architecture 1: Simple Bidirectional LSTM

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from math import sqrt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping

# Load data
train_df = pd.read_csv('/content/train.csv')
test_df = pd.read_csv('/content/test.csv')
sample_submission = pd.read_csv('/content/sample_submission.csv')

# Preprocessing
train_df.dropna(subset=['pm2.5'], inplace=True)
for df in [train_df, test_df]:
    df['datetime'] = pd.to_datetime(df['datetime'])
    df['hour'] = df['datetime'].dt.hour
    df['dayofweek'] = df['datetime'].dt.dayofweek
    df['month'] = df['datetime'].dt.month

features = ['TEMP', 'PRES', 'Iws', 'hour', 'dayofweek', 'month']
target = 'pm2.5'

feature_scaler = MinMaxScaler()
target_scaler = MinMaxScaler()

scaled_features = feature_scaler.fit_transform(train_df[features])
scaled_target = target_scaler.fit_transform(train_df[[target]])

scaled_df = pd.DataFrame(scaled_features, columns=features)
scaled_df['pm2.5'] = scaled_target

# Sequence creation
def create_sequences(data, target_column, window_size):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data.iloc[i:i+window_size][features].values)
        y.append(data.iloc[i+window_size][target_column])
    return np.array(X), np.array(y)

window_size = 48
X, y = create_sequences(scaled_df, 'pm2.5', window_size=window_size)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, shuffle=False)

# Model:  LSTM + Dense Layers
model = Sequential()
model.add(LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
# Training
history = model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stop])

# Evaluation
val_preds = model.predict(X_val)
val_preds_original = target_scaler.inverse_transform(val_preds)
y_val_original = target_scaler.inverse_transform(y_val.reshape(-1, 1))
rmse = sqrt(mean_squared_error(y_val_original, val_preds_original))
print(f'Validation RMSE: {rmse:.2f}')

# Test predictions
test_scaled = feature_scaler.transform(test_df[features])
test_scaled_df = pd.DataFrame(test_scaled, columns=features)
X_test = []
for i in range(len(test_scaled_df) - window_size):
    X_test.append(test_scaled_df.iloc[i:i+window_size][features].values)
X_test = np.array(X_test)

test_preds_scaled = model.predict(X_test)
test_preds = target_scaler.inverse_transform(test_preds_scaled)

# Submission
submission = sample_submission.copy()
submission['pm2.5'] = np.concatenate([
    test_preds.flatten(),
    np.full(len(sample_submission) - len(test_preds), test_preds.flatten()[-1])
])
submission.to_csv('/content/submissionB.csv', index=False)
print("✅ Submission file saved as submission.csv")

  super().__init__(**kwargs)


Epoch 1/50
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 77ms/step - loss: 0.0088 - val_loss: 0.0058
Epoch 2/50
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 77ms/step - loss: 0.0067 - val_loss: 0.0048
Epoch 3/50
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 82ms/step - loss: 0.0064 - val_loss: 0.0046
Epoch 4/50
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 78ms/step - loss: 0.0061 - val_loss: 0.0045
Epoch 5/50
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 77ms/step - loss: 0.0058 - val_loss: 0.0046
Epoch 6/50
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 76ms/step - loss: 0.0057 - val_loss: 0.0046
Epoch 7/50
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 82ms/step - loss: 0.0057 - val_loss: 0.0043
Epoch 8/50
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 80ms/step - loss: 0.0056 - val_loss: 0.0043
Epoch 9/50
[1m404/404[