In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Load the data
data = pd.read_csv('updateddata.csv')


# Assuming NO2 and SO2 are columns in the dataset, we'll select them
no2_data = data['NO2'].values
so2_data = data['SO2'].values

In [2]:
# Scaling the data for LSTM
scaler = MinMaxScaler(feature_range=(0, 1))
no2_data_scaled = scaler.fit_transform(no2_data.reshape(-1, 1))
so2_data_scaled = scaler.fit_transform(so2_data.reshape(-1, 1))

In [3]:
def create_dataset(dataset, time_step=1):
    X, Y = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), 0]
        X.append(a)
        Y.append(dataset[i + time_step, 0])
    return np.array(X), np.array(Y)

time_step = 59  # Using 59 months to predict the 60th month

# NO2 Data
X_no2, y_no2 = create_dataset(no2_data_scaled, time_step)
X_no2 = X_no2.reshape(X_no2.shape[0], X_no2.shape[1], 1)

In [4]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

def build_model():
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=(time_step, 1)))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

model_no2 = build_model()

  super().__init__(**kwargs)


In [5]:
# Train NO2 model
model_no2.fit(X_no2, y_no2, epochs=50, batch_size=1, verbose=1)

Epoch 1/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 27ms/step - loss: 0.0072
Epoch 2/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 27ms/step - loss: 0.0068
Epoch 3/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 14ms/step - loss: 0.0061
Epoch 4/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 13ms/step - loss: 0.0060
Epoch 5/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 12ms/step - loss: 0.0059
Epoch 6/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 13ms/step - loss: 0.0056
Epoch 7/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 12ms/step - loss: 0.0062
Epoch 8/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0058
Epoch 9/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 10ms/step - loss: 0.0074
Epoch 10/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x19397a458d0>

In [6]:
# Predict NO2
no2_pred = model_no2.predict(X_no2)

[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step


In [7]:
# Inverse transform the predictions
no2_pred = scaler.inverse_transform(no2_pred)
y_no2 = scaler.inverse_transform(y_no2.reshape(-1, 1))

In [8]:
# Calculate RMSE
from sklearn.metrics import mean_squared_error

rmse_no2 = np.sqrt(mean_squared_error(y_no2, no2_pred))

print(f'NO2 RMSE: {rmse_no2}')

NO2 RMSE: 0.0003277267447792922


In [9]:
results = []

# Training on different subsets and predicting
for months in range(1, 6):
    time_step = 59 - months + 1  # Adjust time step
    
    # Prepare the data
    X_no2, y_no2 = create_dataset(no2_data_scaled, time_step)
    X_no2 = X_no2.reshape(X_no2.shape[0], X_no2.shape[1], 1)
    
    
    # Build and train models
    model_no2 = build_model()
    model_no2.fit(X_no2, y_no2, epochs=50, batch_size=1, verbose=1)
    
    # Predictions
    no2_pred = model_no2.predict(X_no2)
    no2_pred = scaler.inverse_transform(no2_pred)
    y_no2 = scaler.inverse_transform(y_no2.reshape(-1, 1))
    
    # RMSE
    rmse_no2 = np.sqrt(mean_squared_error(y_no2, no2_pred))

    # Store results
    results.append({'Months Trained': 60 - months, 'Months Predicted': months, 'NO2 RMSE': rmse_no2})

results_df = pd.DataFrame(results)
print(results_df)

Epoch 1/50


  super().__init__(**kwargs)


[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 10ms/step - loss: 0.0068
Epoch 2/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0067
Epoch 3/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0058
Epoch 4/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0084
Epoch 5/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0064
Epoch 6/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0068
Epoch 7/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0059
Epoch 8/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0067
Epoch 9/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0077
Epoch 10/50
[1m1766/1766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

  super().__init__(**kwargs)


[1m1767/1767[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 10ms/step - loss: 0.0088
Epoch 2/50
[1m1767/1767[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0082
Epoch 3/50
[1m1767/1767[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 11ms/step - loss: 0.0073
Epoch 4/50
[1m1767/1767[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0066
Epoch 5/50
[1m1767/1767[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0050
Epoch 6/50
[1m1767/1767[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0065
Epoch 7/50
[1m1767/1767[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0061
Epoch 8/50
[1m1767/1767[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0070
Epoch 9/50
[1m1767/1767[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0058
Epoch 10/50
[1m1767/1767[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

  super().__init__(**kwargs)


[1m1768/1768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 10ms/step - loss: 0.0074
Epoch 2/50
[1m1768/1768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0069
Epoch 3/50
[1m1768/1768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0056
Epoch 4/50
[1m1768/1768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0067
Epoch 5/50
[1m1768/1768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0059
Epoch 6/50
[1m1768/1768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0063
Epoch 7/50
[1m1768/1768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0069
Epoch 8/50
[1m1768/1768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0067
Epoch 9/50
[1m1768/1768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0064
Epoch 10/50
[1m1768/1768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

  super().__init__(**kwargs)


[1m1769/1769[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 10ms/step - loss: 0.0074
Epoch 2/50
[1m1769/1769[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0073
Epoch 3/50
[1m1769/1769[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0058
Epoch 4/50
[1m1769/1769[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0055
Epoch 5/50
[1m1769/1769[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0058
Epoch 6/50
[1m1769/1769[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0067
Epoch 7/50
[1m1769/1769[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0061
Epoch 8/50
[1m1769/1769[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0068
Epoch 9/50
[1m1769/1769[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0052
Epoch 10/50
[1m1769/1769[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

  super().__init__(**kwargs)


Epoch 1/50
[1m1770/1770[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 10ms/step - loss: 0.0076
Epoch 2/50
[1m1770/1770[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0065
Epoch 3/50
[1m1770/1770[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0066
Epoch 4/50
[1m1770/1770[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 10ms/step - loss: 0.0067
Epoch 5/50
[1m1770/1770[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0086
Epoch 6/50
[1m1770/1770[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0078
Epoch 7/50
[1m1770/1770[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0068
Epoch 8/50
[1m1770/1770[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0057
Epoch 9/50
[1m1770/1770[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - loss: 0.0074
Epoch 10/50
[1m1770/1770[0m [32m━━━━━━━━━━━━━━━━━━━━

In [10]:
from sklearn.metrics import mean_absolute_error, r2_score, explained_variance_score
# Mean Absolute error
mae = mean_absolute_error(y_no2, no2_pred)
r2=r2_score(y_no2, no2_pred)
varience_score=explained_variance_score(y_no2, no2_pred)
print(mae)
print(r2)
print(varience_score)

0.00020891239269139888
0.2492509366332728
0.24975055056131001


In [13]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score, explained_variance_score, mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense


# Select columns related to NO2 and SO2
no2_data = data['NO2'].values.reshape(-1, 1)
so2_data = data['SO2'].values.reshape(-1, 1)

# Normalize the data
scaler_no2 = MinMaxScaler(feature_range=(0, 1))
scaler_so2 = MinMaxScaler(feature_range=(0, 1))

no2_data_scaled = scaler_no2.fit_transform(no2_data)
so2_data_scaled = scaler_so2.fit_transform(so2_data)

# Function to create dataset
def create_dataset(dataset, time_step):
    X, Y = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), 0]
        X.append(a)
        Y.append(dataset[i + time_step, 0])
    return np.array(X), np.array(Y)

# Build LSTM model
def build_model(time_step):
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=(time_step, 1)))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Set training and prediction periods
train_months = 50
predict_months = 10
time_step = train_months

# Prepare the data
X_no2, y_no2 = create_dataset(no2_data_scaled, time_step)
X_no2 = X_no2.reshape(X_no2.shape[0], X_no2.shape[1], 1)

X_so2, y_so2 = create_dataset(so2_data_scaled, time_step)
X_so2 = X_so2.reshape(X_so2.shape[0], X_so2.shape[1], 1)

# Build and train models
model_no2 = build_model(time_step)
model_no2.fit(X_no2, y_no2, epochs=50, batch_size=1, verbose=1)

model_so2 = build_model(time_step)
model_so2.fit(X_so2, y_so2, epochs=50, batch_size=1, verbose=1)

# Predictions
no2_pred = model_no2.predict(X_no2)
no2_pred = scaler_no2.inverse_transform(no2_pred)
y_no2 = scaler_no2.inverse_transform(y_no2.reshape(-1, 1))

so2_pred = model_so2.predict(X_so2)
so2_pred = scaler_so2.inverse_transform(so2_pred)
y_so2 = scaler_so2.inverse_transform(y_so2.reshape(-1, 1))

# Calculate metrics for NO2
rmse_no2 = np.sqrt(mean_squared_error(y_no2, no2_pred))
mae_no2 = mean_absolute_error(y_no2, no2_pred)
r2_no2 = r2_score(y_no2, no2_pred)

# Calculate metrics for SO2
rmse_so2 = np.sqrt(mean_squared_error(y_so2, so2_pred))
mae_so2 = mean_absolute_error(y_so2, so2_pred)
r2_so2 = r2_score(y_so2, so2_pred)

# Print results
results = {
    'Train Months': train_months,
    'Predict Months': predict_months,
    'NO2 RMSE': rmse_no2,
    'NO2 MAE': mae_no2,
    'NO2 R²': r2_no2,
    'SO2 RMSE': rmse_so2,
    'SO2 MAE': mae_so2,
    'SO2 R²': r2_so2
}

results_df = pd.DataFrame([results])
print(results_df)


Epoch 1/50


  super().__init__(**kwargs)


[1m1775/1775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 20ms/step - loss: 0.0074
Epoch 2/50
[1m1775/1775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 19ms/step - loss: 0.0068
Epoch 3/50
[1m1775/1775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 19ms/step - loss: 0.0062
Epoch 4/50
[1m1775/1775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 20ms/step - loss: 0.0061
Epoch 5/50
[1m1775/1775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 20ms/step - loss: 0.0072
Epoch 6/50
[1m1775/1775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 19ms/step - loss: 0.0069
Epoch 7/50
[1m1775/1775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 18ms/step - loss: 0.0059
Epoch 8/50
[1m1775/1775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 19ms/step - loss: 0.0062
Epoch 9/50
[1m1775/1775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 19ms/step - loss: 0.0056
Epoch 10/50
[1m1775/1775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[