In [None]:
# Connect to Google Drive for necessary files like dataset
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Important variables. This may change as environment changes
dataset_path = "/content/drive/MyDrive/airquality_aam_aca/data/"

In [None]:
import pandas as pd

# Load the dataset into the program
df = pd.read_csv(dataset_path + "kolkata.csv", parse_dates=True)

# As the records are collected on daily basis, the index is the date of collection of record
df.set_index('date')

print(df.head())

In [None]:
# Extract the series, calculate and subtract mean from it
import numpy as np

# Calculate the mean of whole series to detrend the data
mean = np.mean(df.PM25.values)

# Following line of code is two step process
# i) Subtract mean from each record of dataframe ii) round off each record to 2 precisions
data = np.array(list(map(lambda x: round(x, 2), df.PM25.values - mean)))

print('===== Original series =====')
print(df.PM25.values[:10])

print('===== Modified series =====')
data = df.PM25.values
print(data[:10])

In [None]:
# Prepare the supervised dataset and reshape it for LSTM model
# The utility.py must be uploaded 
from utility import sequence_to_table

# sequence_to_table method converts a list or array into tabular format
data = sequence_to_table(data, look_back=30)

# divide data into feature and target
X = data.drop(columns=['next']).values  # column 'next' is expected outcome of forecast so not a part of features to be trained
y = data.next.values

# first reshape the data to make it compatible for LSTM
X_reshaped = X.reshape((X.shape[0], 1, X.shape[1]))

LSTM Model
------

In [None]:
# Proposed LSTM model

import tensorflow as tf
from keras.models import Sequential
tf.random.set_seed(42)

model = Sequential(name="Proposed_LSTM")
model.add(tf.keras.layers.LSTM(units=512, 
                              activation='relu', 
                              input_shape=(1, X_reshaped.shape[2]), return_sequences=True, name="input"))
model.add(tf.keras.layers.LSTM(units=512, 
                              activation='relu', name="lstm"))
model.add(tf.keras.layers.Dense(1, name="output"))

model.compile(loss='mse', optimizer='adam')

In [None]:
model.summary()

In [None]:
# Train the model
history = model.fit(X_reshaped, y, epochs=100, batch_size=32)

In [None]:
# Print the loss graph. The loss is represented using RMSE. A graph after training will show you loss decreasing as epochs increases.
import matplotlib.pyplot as plt

plt.figure(figsize=(5, 3)).set_dpi(128)
plt.plot(history.history['loss'], label='Loss')
plt.xlabel('Number of Epochs')
plt.ylabel('Mean Squared Error (MSE)')
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error as mse, r2_score

from math import sqrt

fitted_values = model.predict(X_reshaped)

# Calculate the RMSE with original next values
rmse = round(sqrt(mse(y + mean, fitted_values + mean)), 2)
r2score = round(r2_score(y + mean, fitted_values + mean), 2)

print('RMSE:', rmse)
print('R2 Score:', r2score)

In [None]:
# Plot fitted series and original series. There is almost negligible difference in their appearance and nature
import matplotlib.pyplot as plt

original = y + mean
forecasted = fitted_values + mean

plt.figure(figsize=(18, 6)).set_dpi(128)
plt.plot(original[-1000:], label='Actual series')
plt.plot(forecasted[-1000:], label='Fitted series')
plt.legend()
plt.xlabel('Days')
plt.ylabel('PM2.5 Concentration Level')
plt.show()

In [None]:
# Predicting the future N values (extrapolation)
import numpy as np

predictions = []
x = X[-1]  # Initially take last training input for predicting its immediate next value

for i in range(60):
  x = x.reshape((1, 1, X.shape[1]))  # Prepare the input for a tensorflow model using reshaping
  prediction = model.predict(x)   # Predict the immediate next value
  x = x.ravel()  # Ravel method converts the 3D array to single dimension array 
  x = np.delete(x, 0)  # Now, we delete first item of the original input
  x = np.append(x, prediction[0][0])  # and append the predicted value to use it for next predictions
  predictions.append(prediction[0][0])   # append the predicted value to 'predictions' list

In [None]:
predictions = predictions + mean  # Add original mean of the time series into predictions to reveal predictions' original values

# Print predictions
for i in predictions:
  print(i)