In [1]:
import glob as gb
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from ipywidgets import widgets, interactive
from IPython.display import display
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense,LSTM,Conv1D,MaxPooling1D,Flatten
from prophet import Prophet
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
df=pd.read_csv("Ankleshwar.csv")
df1=df.iloc[:,1:]
df1

Unnamed: 0,Date,pm25
0,04-02-2019 19:00,70.52
1,04-02-2019 20:00,61.39
2,04-02-2019 21:00,59.94
3,04-02-2019 22:00,76.44
4,04-02-2019 23:00,98.54
...,...,...
33531,02-12-2022 22:00,115.37
33532,02-12-2022 23:00,104.06
33533,03-12-2022 00:00,110.59
33534,03-12-2022 01:00,122.87


In [3]:
# --------------------------------------------------
# STEP 2: Scale and Train ARIMA and SARIMA
# --------------------------------------------------
scaler = MinMaxScaler()
df1['pm25_scaled'] = scaler.fit_transform(df1[['pm25']])
series = df1['pm25_scaled'].dropna().reset_index(drop=True)

arima_model = ARIMA(series, order=(3,1,2)).fit()
sarima_model = SARIMAX(series, order=(2,1,2), seasonal_order=(1,1,1,24)).fit()

# --------------------------------------------------
# STEP 3: Train Transformer
# --------------------------------------------------
def create_sequences(data, window=24):
    X, y = [], []
    for i in range(len(data) - window):
        if not np.isnan(data[i + window]):
            seq = data[i:i+window]
            if not np.any(np.isnan(seq)):
                X.append(seq)
                y.append(data[i + window])
    return np.array(X), np.array(y)

pm25_array = df1['pm25_scaled'].values
window = 24
X, y = create_sequences(pm25_array, window)
X = X.reshape((X.shape[0], window, 1))

def transformer_block(inputs, head_size, num_heads, ff_dim, dropout=0.1):
    x = layers.LayerNormalization()(inputs)
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=head_size, dropout=dropout)(x, x)
    x = layers.Dropout(dropout)(x)
    x = layers.Add()([x, inputs])
    x = layers.LayerNormalization()(x)
    x = layers.Dense(ff_dim, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    return layers.Add()([x, inputs])

def build_transformer(input_shape):
    inputs = layers.Input(shape=input_shape)
    x = transformer_block(inputs, 64, 2, 64)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(32, activation='relu')(x)
    outputs = layers.Dense(1)(x)
    return models.Model(inputs, outputs)

transformer = build_transformer((window, 1))
transformer.compile(optimizer='adam', loss='mse')
transformer.fit(X, y, epochs=20, batch_size=32, verbose=1)

# --------------------------------------------------
# STEP 4: Impute Missing Values
# --------------------------------------------------
filled = df1['pm25_scaled'].copy()

for i in range(window, len(filled)):
    if np.isnan(filled[i]):
        context = filled[i - window:i]
        if not np.any(np.isnan(context)):
            input_seq = np.array(context).reshape((1, window, 1))

            pred_arima = arima_model.predict(start=i, end=i).iloc[0]
            pred_sarima = sarima_model.predict(start=i, end=i).iloc[0]
            pred_transformer = transformer.predict(input_seq, verbose=0)[0, 0]

            # Weighted ensemble
            combined_pred = (0.3 * pred_arima + 0.3 * pred_sarima + 0.4 * pred_transformer)
            filled[i] = combined_pred

# --------------------------------------------------
# STEP 5: Inverse Transform and Save
# --------------------------------------------------
df1['pm25_hybrid_arima_sarima_transformer'] = scaler.inverse_transform(filled.values.reshape(-1, 1))



Epoch 1/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 6.3166e-04
Epoch 2/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 6.1505e-04
Epoch 3/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 6.4907e-04
Epoch 4/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 6.0780e-04
Epoch 5/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 5.0971e-04
Epoch 6/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 6.1341e-04
Epoch 7/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 6.5219e-04
Epoch 8/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 5.4400e-04
Epoch 9/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 6.4398e-04
Epoch 10/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[