In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from math import sqrt
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [2]:
# Load Data
data_1 = pd.read_csv('Data_bengaluru/combined_cleaned.csv')
data_1 = data_1[data_1['PM25'] < 200]


In [3]:
# Define train and test splits
X = data_1['PM25'].values
train, test = X[:140000], X[140000:]


In [4]:
# ARIMA Model in PyTorch
class ARIMA(nn.Module):
    def __init__(self, p, d, q):
        super(ARIMA, self).__init__()
        self.p = p
        self.d = d
        self.q = q
        self.ar_weights = nn.Parameter(torch.randn(p, dtype=torch.float32))
        self.ma_weights = nn.Parameter(torch.randn(q, dtype=torch.float32))
        self.bias = nn.Parameter(torch.randn(1, dtype=torch.float32))

    def forward(self, history, lag):
        # Compute AR component
        ar_component = torch.sum(self.ar_weights * history[-self.p:])
        # Compute MA component
        ma_component = torch.sum(self.ma_weights * lag[-self.q:])
        # Combine with bias
        return self.bias + ar_component + ma_component


In [5]:
# Initialize the ARIMA model
p, d, q = 3, 1, 5
model = ARIMA(p, d, q)
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.MSELoss()


In [6]:
# Prepare training data (differencing for integration order `d`)
def difference(data, order):
    diff = []
    for i in range(order, len(data)):
        diff.append(data[i] - data[i - order])
    return torch.tensor(diff, dtype=torch.float32)

train_diff = difference(train, d)
test_diff = difference(test, d)

In [None]:
# Training Loop
history = train_diff[:p].tolist()
lag = torch.zeros(q, dtype=torch.float32)
predictions = []

for epoch in range(100):  # Train for 10 epochs
    total_loss = 0.0
    for t in range(len(train_diff) - p):
        optimizer.zero_grad()

        # Prepare input
        input_history = torch.tensor(history[-p:], dtype=torch.float32)
        input_lag = torch.tensor(lag, dtype=torch.float32)

        # Forward pass
        yhat = model(input_history, input_lag)
        loss = criterion(yhat, train_diff[t])

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Update history and lag
        obs = train_diff[t].item()
        history.append(obs)
        lag = torch.cat((lag[1:], torch.tensor([obs])))

        total_loss += loss.item()
    print(f'Epoch {epoch + 1}, Loss: {total_loss:.4f}')


  input_lag = torch.tensor(lag, dtype=torch.float32)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1, Loss: 24982269.4223


In [None]:
# Prediction
history = train_diff[-p:].tolist()
lag = torch.zeros(q, dtype=torch.float32)
for t in range(len(test_diff)):
    with torch.no_grad():
        input_history = torch.tensor(history[-p:], dtype=torch.float32)
        input_lag = torch.tensor(lag, dtype=torch.float32)

        yhat = model(input_history, input_lag).item()
        predictions.append(yhat)

        obs = test_diff[t].item()
        history.append(obs)
        lag = torch.cat((lag[1:], torch.tensor([obs])))


In [None]:
# Reverse differencing to restore actual values
def reverse_difference(original, diff, order):
    restored = list(original[:order])
    for i in range(len(diff)):
        restored.append(restored[-1] + diff[i])
    return restored

predictions = reverse_difference(test, predictions, d)


In [None]:
# Evaluation
error = sqrt(mean_squared_error(test, predictions))
print(f'Test RMSE: {error:.3f}')


In [None]:
# Visualization
plt.figure(figsize=(17, 8))
plt.plot(test[100:400], label='True Data', color='green')
plt.plot(predictions[100:400], label='Prediction', color='red')
plt.xlabel("Index", fontsize=20)
plt.ylabel("PM2.5 Concentration $(mu gm^{-3})$", fontsize=20)
plt.legend()
plt.savefig("arima_pm25_torch.pdf")
plt.show()