In [None]:
# Connect to Google Drive for necessary files like dataset
from google.colab import drive
drive.mount('/content/drive')

In [2]:
# Important variables. This may change as environment changes
dataset_path = "/content/drive/MyDrive/airquality_aam_aca/data/"

In [14]:
import pandas as pd

# Load the dataset into the program
df = pd.read_csv(dataset_path + "delhi.csv", parse_dates=True)

# As the records are collected on daily basis, the index is the date of collection of record
df.set_index('date')

print(df.head())

         date   PM25
0  2014-12-10  231.0
1  2014-12-11  201.0
2  2014-12-12  237.0
3  2014-12-13  295.0
4  2014-12-14  185.0


In [15]:
# Extract the series, calculate and subtract mean from it
import numpy as np

# Calculate the mean of whole series to detrend the data
mean = np.mean(df.PM25.values)

# Following line of code is two step process
# i) Subtract mean from each record of dataframe ii) round off each record to 2 precisions
data = np.array(list(map(lambda x: round(x, 2), df.PM25.values - mean)))

print('===== Original series =====')
print(df.PM25.values[:10])

print('===== Modified series =====')
print(data[:10])

===== Original series =====
[231. 201. 237. 295. 185. 160. 184. 216. 239. 336.]
===== Modified series =====
[ 60.05  30.05  66.05 124.05  14.05 -10.95  13.05  45.05  68.05 165.05]


In [16]:
# Prepare the supervised dataset and reshape it for LSTM model
# The utility.py must be uploaded 
from utility import sequence_to_table

# sequence_to_table method converts a list or array into tabular format
data = sequence_to_table(data, look_back=30)

# divide data into feature and target
X = data.drop(columns=['next']).values  # column 'next' is expected outcome of forecast so not a part of features to be trained
y = data.next.values

# first reshape the data to make it compatible for LSTM
X_reshaped = X.reshape((X.shape[0], 1, X.shape[1]))

LSTM Model
------

In [17]:
# Proposed LSTM model

import tensorflow as tf
from keras.models import Sequential
tf.random.set_seed(42)

model = Sequential(name="Proposed_LSTM")
model.add(tf.keras.layers.LSTM(units=512, 
                              activation='relu', 
                              input_shape=(1, X_reshaped.shape[2]), return_sequences=True, name="input"))
model.add(tf.keras.layers.LSTM(units=512, 
                              activation='relu', name="lstm"))
model.add(tf.keras.layers.Dense(1, name="output"))

model.compile(loss='mse', optimizer='adam')

In [18]:
model.summary()

Model: "Proposed_LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (LSTM)                (None, 1, 512)            1112064   
                                                                 
 lstm (LSTM)                 (None, 512)               2099200   
                                                                 
 output (Dense)              (None, 1)                 513       
                                                                 
Total params: 3,211,777
Trainable params: 3,211,777
Non-trainable params: 0
_________________________________________________________________


In [19]:
history = model.fit(X_reshaped, y, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [20]:
from sklearn.metrics import mean_squared_error as mse, r2_score

from math import sqrt

fitted_values = model.predict(X_reshaped)

# Calculate the RMSE with original next values
rmse = round(sqrt(mse(y + mean, fitted_values + mean)), 2)
r2score = round(r2_score(y + mean, fitted_values + mean), 2)

print('RMSE:', rmse)
print('R2 Score:', r2score)

RMSE: 8.19
R2 Score: 0.99


In [21]:
import numpy as np

predictions = []
x = X[-1]

for i in range(180):
  x = x.reshape((1, 1, X.shape[1]))
  prediction = model.predict(x)
  x = x.ravel()
  x = np.delete(x, 0)
  x = np.append(x, prediction[0][0])
  predictions.append(prediction[0][0])

In [22]:
predictions = predictions + mean

for i in predictions:
  print(i)

171.49327
139.84805
151.12933
145.38626
151.35632
152.348
152.9398
142.10262
146.60393
144.04382
148.71161
147.59125
150.88051
150.78088
157.5277
167.01025
151.90721
152.21494
144.1612
144.26941
140.07954
140.83136
141.46596
142.12991
140.89185
146.15797
150.19072
147.5716
145.49559
143.66708
142.69891
142.54205
139.69301
141.09996
140.99553
137.54901
136.71971
133.83824
131.24266
127.0429
132.22134
135.23395
138.32713
139.94078
142.19496
149.11841
147.8068
141.47847
137.29453
138.01022
138.61044
136.00717
135.10811
134.29016
132.67415
130.96075
131.19226
131.68028
129.82681
130.17264
129.56395
130.27817
130.96423
132.9161
132.33328
129.53967
130.47585
132.70732
133.07707
132.27701
131.48206
130.0651
127.13713
126.2466
124.08887
123.12416
122.96546
123.28265
123.69252
123.65419
123.07475
121.42453
120.37748
119.09337
118.60002
118.885605
119.18823
119.587234
120.262794
119.735695
117.487885
115.3518
114.64433
114.463455
115.002884
115.685326
116.52745
116.40903
115.48195
113.625916
111

In [24]:
for i in df.PM25.values[-30:]:
  print(i)

155.0
186.0
154.0
159.0
151.0
151.0
158.0
148.0
150.0
155.0
163.0
189.0
140.0
163.0
153.0
169.0
163.0
167.0
181.0
152.0
154.0
165.0
163.0
151.0
164.0
152.0
148.0
161.0
169.0
173.0
