In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU, SimpleRNN
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.metrics import RootMeanSquaredError,R2Score, MeanAbsoluteError
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

In [2]:
df = pd.read_csv('clean.csv',parse_dates=['datetime'])
df.drop(columns = ['Unnamed: 0'], inplace =True)

In [3]:
# Normalizing the data

feature_cols = df.drop(columns=['datetime'])

scaler = MinMaxScaler()

normalized_values = scaler.fit_transform(feature_cols)

df_normalized = pd.DataFrame(normalized_values, columns=feature_cols.columns)
df_normalized.head()


Unnamed: 0,CO(GT),PT08.S1(CO),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH,AH
0,0.211864,0.511845,0.185535,0.362097,0.111036,0.310885,0.328402,0.51304,0.454822,0.333333,0.499371,0.280066
1,0.161017,0.463029,0.146226,0.312398,0.068382,0.360864,0.266272,0.453237,0.326238,0.326882,0.484277,0.264282
2,0.177966,0.541996,0.139937,0.303659,0.087339,0.346463,0.331361,0.451439,0.370547,0.296774,0.563522,0.276352
3,0.177966,0.523331,0.143082,0.308575,0.115098,0.326133,0.35503,0.464478,0.426586,0.277419,0.638994,0.29419
4,0.127119,0.448672,0.100629,0.247406,0.087339,0.373994,0.337278,0.422212,0.386186,0.28172,0.633962,0.295216


In [4]:
full_features = df_normalized.values

In [5]:
def create_sequences_multivariate(data, lookback):
    X, y = [], []
    for i in range(len(data) - lookback):
        X.append(data[i:i + lookback, :])
        y.append(data[i + lookback, :])  
    return np.array(X), np.array(y)

In [6]:
full_data,target_data = create_sequences_multivariate(full_features,24)
full_data.shape,target_data.shape

((9333, 24, 12), (9333, 12))

In [7]:
model_final = Sequential([
    LSTM(128, activation='relu', input_shape=(24, 12)),
    Dense(64, activation='relu'),
    Dense(12)  
])

# Compile the model
model_final.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse', metrics=[RootMeanSquaredError(),R2Score(),MeanAbsoluteError()])

# Train the model
history_lstm_final = model_final.fit(full_data, target_data, epochs=20, batch_size=32, verbose=1)

Epoch 1/20


  super().__init__(**kwargs)


[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - loss: 0.0237 - mean_absolute_error: 0.1080 - r2_score: -0.1054 - root_mean_squared_error: 0.1475
Epoch 2/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.0044 - mean_absolute_error: 0.0480 - r2_score: 0.7775 - root_mean_squared_error: 0.0660
Epoch 3/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.0037 - mean_absolute_error: 0.0436 - r2_score: 0.8140 - root_mean_squared_error: 0.0604
Epoch 4/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.0034 - mean_absolute_error: 0.0418 - r2_score: 0.8254 - root_mean_squared_error: 0.0586
Epoch 5/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.0032 - mean_absolute_error: 0.0403 - r2_score: 0.8340 - root_mean_squared_error: 0.0565
Epoch 6/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/ste

- I tried different values of epochs,batch_size,learning rate,number of layers, nodes in each layers and the above configuration gave the best results.


In [37]:
model_final.save('final_lstm_model.keras')

Unnamed: 0,CO(GT),PT08.S1(CO),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH,AH
0,1.701468,999.685364,8.095629,896.913818,190.05249,764.319519,132.35881,1068.100586,721.455017,28.699749,11.720532,0.531706
1,1.675672,1022.055969,8.593225,902.765625,190.703644,768.839539,132.638641,1095.693359,746.137817,28.877226,12.428581,0.574789
2,1.877954,1065.333496,10.023025,944.852722,216.009705,728.923035,142.69487,1163.437988,849.260437,28.703487,14.173519,0.63199
3,2.437061,1152.130981,12.863725,1051.777222,274.976807,652.527222,162.898529,1279.423218,1056.112305,27.316498,17.923574,0.695719
4,3.05928,1241.721069,15.441494,1151.030396,328.665466,597.817017,182.912827,1402.939331,1299.695435,24.56698,23.966228,0.772307
5,3.333701,1282.110962,15.511293,1171.074341,358.502899,575.200867,194.596451,1437.176025,1456.183716,21.559021,30.812418,0.827816
6,2.975811,1244.270874,12.566154,1076.862427,329.185089,603.05719,185.223877,1360.612915,1429.459473,19.18627,37.265171,0.875887
7,2.361332,1179.297241,9.250385,949.955261,267.627716,646.035889,165.121979,1271.550781,1302.135132,17.715708,42.774139,0.920317
8,1.888164,1124.384277,6.724687,846.342651,218.759537,702.922363,147.276978,1210.843994,1166.346436,16.990179,47.786041,0.959125
9,1.595312,1079.502197,4.969759,772.177979,185.875061,768.914673,131.730774,1178.225464,1048.602783,16.552807,52.313118,0.995493


In [8]:

# Prediction function.

def predict_future_vals2(future_steps):
   
    last_sequence = df_normalized.tail(24).to_numpy()
    future_predictions = []
    
    
    for a in range(future_steps):
        # Reshape to 3D for model input: (1, timesteps, num_features)
        last_sequence_reshaped = last_sequence[np.newaxis, :, :]
    
        # Predict the next time step
        next_prediction = model_final.predict(last_sequence_reshaped, verbose=0)
    
       # Add the predicted value to the results
        future_predictions.append(next_prediction.flatten())
    
        # Shift the input sequence by adding the new prediction and removing the oldest observation
        last_sequence = np.vstack([last_sequence[1:], next_prediction])
    
    
    future_predictions = np.array(future_predictions)

    col_name = df_normalized.columns
    df_pred = pd.DataFrame(scaler.inverse_transform(future_predictions),columns=col_name)
    return df_pred

    

In [9]:
predict_future_vals2(10)

Unnamed: 0,CO(GT),PT08.S1(CO),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH,AH
0,2.587135,1050.086792,12.131021,1053.112305,278.666321,772.925171,158.680603,1198.363037,914.440125,30.029268,12.781702,0.513243
1,2.792143,1069.411865,12.681636,1071.356567,311.432098,775.786133,165.119003,1215.897827,980.665955,30.057627,12.373153,0.512185
2,3.32602,1122.42041,15.873676,1160.710815,373.005524,735.148193,181.004028,1322.829712,1133.657715,28.751713,13.96735,0.534945
3,4.065794,1155.695312,18.791346,1239.394409,446.230194,679.927979,194.521423,1420.196777,1313.03186,25.644115,18.669231,0.574302
4,4.194112,1149.552124,18.228504,1210.708008,475.467499,652.088684,199.105423,1411.634155,1404.261108,21.726015,23.721498,0.612243
5,3.184861,1045.875,11.482675,1023.097961,416.410797,723.080322,182.245071,1221.267456,1278.805908,18.805088,29.831713,0.64694
6,2.152197,940.309204,6.097339,855.534973,319.88562,809.764648,154.974182,1080.463745,1103.323975,16.629881,36.068573,0.658936
7,1.686832,891.365234,4.200397,768.779297,251.309036,875.115479,132.274704,1017.539307,1006.123718,14.762623,40.863594,0.665064
8,1.409122,855.220581,3.430373,699.497314,210.954361,960.473816,113.960197,983.919739,930.018005,13.226983,44.52972,0.669255
9,1.223116,821.022217,2.711929,638.306152,176.392609,1046.113647,95.772758,956.074524,853.164978,11.876616,47.570538,0.673641
