In [8]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn import metrics
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from numpy import array
from keras import optimizers
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.io as pio
pio.renderers.default = "vscode"


plt.style.use('default')
%matplotlib inline

In [10]:
dataframe = pd.read_csv('clean_data.csv', usecols=[5], engine='python')
dataset = dataframe.values
dataset = dataset.astype('float32')

In [11]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(dataset)

MinMaxScaler()

In [12]:
def create_dataset(dataset, look_back=1):
	dataX, dataY = [], []
	for i in range(len(dataset)-look_back-1):
		a = dataset[i:(i+look_back), 0]
		dataX.append(a)
		dataY.append(dataset[i + look_back, 0])
	return np.array(dataX), np.array(dataY)

	#dataX -> 100 200 300 Y -> 400 
	#dataY -> 200 300 400 Y -> 500 

# SVR

In [13]:
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

In [14]:
#for plot
look_back = 3
data = pd.read_csv('clean_data.csv', index_col=0)
data = data.iloc[train_size+look_back+1:,:]
data.sort_index(ascending=True, inplace=True)

In [15]:
train = scaler.transform(train)
test = scaler.transform(test)

In [16]:
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [17]:
trainX1 = np.reshape(trainX, (-1, 3))
trainY1 = np.reshape(trainY, (917, 1))

regressor=SVR(kernel='rbf', C=10)
regressor.fit(trainX1, trainY1)

print(trainX1.shape)
print(trainY1.shape)
 

import joblib
joblib.dump(regressor, "svr.gz")

(917, 3)
(917, 1)



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



['svr.gz']

In [18]:
pred = regressor.predict(testX)
series_results = pd.DataFrame(scaler.inverse_transform([pred]).flatten(), columns=['SVR_10'])


print(f"Mean Abs Error: {metrics.mean_absolute_error(testY, pred)}")
print(f"Mean Sq Error: {metrics.mean_squared_error(testY, pred)}")
print(f"Root Mean Error: {np.sqrt(metrics.mean_squared_error(testY, pred))}")

Mean Abs Error: 0.061743450538459484
Mean Sq Error: 0.0067924560592082725
Root Mean Error: 0.08241635795889231


In [19]:
fig = go.Figure()
fig.update_layout(
    title="Comparison of predicted AQI and Measured AQI",
    xaxis_title="Date",
    yaxis_title="AQI Level",
    legend_title="Legend",
)

fig.add_trace(go.Scatter(x=series_results.index, y=scaler.inverse_transform([testY]).flatten(), name="Actual", line_shape='spline'))
fig.add_trace(go.Scatter(x=series_results.index, y=scaler.inverse_transform([pred]).flatten()   , name="SVR", line_shape='spline'))
fig.show()

# Polynomial Regression with degree=10

In [20]:
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

In [21]:
#for plot
look_back = 3
data = pd.read_csv('clean_data.csv', index_col=0)
data = data.iloc[train_size+look_back+1:,:]
data.sort_index(ascending=True, inplace=True)

In [22]:
train = scaler.transform(train)
test = scaler.transform(test)

In [23]:
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [24]:
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [25]:
print(trainX.shape)
print(trainY.shape)

(917, 1, 3)
(917,)


In [26]:
from sklearn.preprocessing import PolynomialFeatures  
poly_regs = PolynomialFeatures(degree= 10)  

trainX1 = np.reshape(trainX, (-1, 3))
trainY1 = np.reshape(trainY, (917, 1))
print(trainX1.shape)
print(trainY1.shape)

x_train_poly = poly_regs.fit_transform(trainX1)  
poly_lin_reg = LinearRegression()  
poly_lin_reg.fit(x_train_poly, trainY1)  

import joblib
joblib.dump(poly_lin_reg, "poly_reg.gz")

(917, 3)
(917, 1)


['poly_reg.gz']

In [27]:
testX1 = np.reshape(testX, (-1, 3))
testY1 = np.reshape(testY, (450, 1))

print(testX1.shape)
print(testY1.shape)

x_test_poly = poly_regs.fit_transform(testX1)  
pred = poly_lin_reg.predict(x_test_poly)

series_results["Poly_Reg"] = scaler.inverse_transform(pred)

series_results.index = data.index
series_results.sort_index(ascending=True, inplace=True)


print(f"Mean Abs Error: {metrics.mean_absolute_error(testY1, pred)}")
print(f"Mean Sq Error: {metrics.mean_squared_error(testY1, pred)}")
print(f"Root Mean Error: {np.sqrt(metrics.mean_squared_error(testY1, pred))}")

(450, 3)
(450, 1)
Mean Abs Error: 0.06037198752164841
Mean Sq Error: 0.009249642491340637
Root Mean Error: 0.09617505967617035


In [28]:
fig = go.Figure()
fig.update_layout(
    title="Comparison of predicted AQI and Measured AQI",
    xaxis_title="Date",
    yaxis_title="AQI Level",
    legend_title="Legend",
)

fig.add_trace(go.Scatter(x=series_results.index, y=scaler.inverse_transform(testY1).flatten(), name="Actual", line_shape='spline'))
fig.add_trace(go.Scatter(x=series_results.index, y=scaler.inverse_transform(pred).flatten()   , name="Polynomial reg", line_shape='spline'))
fig.show()

# ARIMA with parameters p=1, d=1, q=1

In [29]:
from statsmodels.tsa.arima.model import ARIMA

In [30]:
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

In [31]:
train = scaler.transform(train)
test = scaler.transform(test)

In [32]:
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [33]:
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [34]:
history = [x for x in train]

In [35]:
predictions = list()

In [37]:
predictions.clear()
for t in range(len(test)):
	model = ARIMA(history, order=(1,1,1))
	model_fit = model.fit()
	output = model_fit.forecast()
	yhat = output[0]
	predictions.append(yhat)
	obs = test[t]
	history.append(obs)
	print('predicted=%f, expected=%f' % (yhat, obs))

predicted=0.092640, expected=0.184211
predicted=0.123192, expected=0.164474
predicted=0.130236, expected=0.177632
predicted=0.141781, expected=0.184211
predicted=0.151631, expected=0.180921
predicted=0.157685, expected=0.164474
predicted=0.157581, expected=0.171053
predicted=0.161145, expected=0.213816
predicted=0.176715, expected=0.200658
predicted=0.180673, expected=0.184211
predicted=0.179871, expected=0.164474
predicted=0.174621, expected=0.177632
predicted=0.176539, expected=0.184211
predicted=0.178839, expected=0.180921
predicted=0.178976, expected=0.243421
predicted=0.198984, expected=0.263158
predicted=0.214870, expected=0.351974
predicted=0.253425, expected=0.276316
predicted=0.251179, expected=0.361842
predicted=0.283844, expected=0.338816
predicted=0.293885, expected=0.351974
predicted=0.308119, expected=0.335526
predicted=0.312701, expected=0.342105
predicted=0.319913, expected=0.338816
predicted=0.323834, expected=0.266447
predicted=0.304200, expected=0.319079
predicted=0.


Maximum Likelihood optimization failed to converge. Check mle_retvals



predicted=0.046324, expected=0.190789
predicted=0.094420, expected=0.078947
predicted=0.080369, expected=0.151316
predicted=0.103798, expected=0.078947
predicted=0.091240, expected=0.115132
predicted=0.100220, expected=0.223684
predicted=0.139382, expected=0.049342
predicted=0.102105, expected=0.223684
predicted=0.146675, expected=0.187500
predicted=0.153133, expected=0.154605
predicted=0.150526, expected=0.164474
predicted=0.154729, expected=0.125000
predicted=0.144122, expected=0.164474
predicted=0.152498, expected=0.151316
predicted=0.151036, expected=0.210526
predicted=0.170455, expected=0.217105
predicted=0.182127, expected=0.174342
predicted=0.176441, expected=0.121711
predicted=0.158716, expected=0.049342
predicted=0.126223, expected=0.069079
predicted=0.114477, expected=0.085526
predicted=0.109124, expected=0.144737
predicted=0.122992, expected=0.157895
predicted=0.132462, expected=0.144737
predicted=0.134149, expected=0.157895
predicted=0.140966, expected=0.141447
predicted=0.

In [38]:
normal = [[val] for val in predictions]
testPredict = scaler.inverse_transform(normal[:-4])
testY2 = scaler.inverse_transform([testY])

In [39]:
print(len(testY2[0]))
print(len(testPredict))

450
450


In [40]:
print(f"Mean Abs Error: {metrics.mean_absolute_error(testY2[0], testPredict[:,0])}")
print(f"Mean Sq Error: {metrics.mean_squared_error(testY2[0], testPredict[:,0])}")
print(f"Root Mean Error: {np.sqrt(metrics.mean_squared_error(testY2[0], testPredict[:,0]))}")

Mean Abs Error: 17.913986148164334
Mean Sq Error: 623.4038398387481
Root Mean Error: 24.968056388889146


In [41]:
series_results["ARIMA"] = testPredict
fig = go.Figure()
fig.update_layout(
    title="Comparison of predicted AQI and Measured AQI",
    xaxis_title="Date",
    yaxis_title="AQI Level",
    legend_title="Legend",
)

fig.add_trace(go.Scatter(x=series_results.index, y=scaler.inverse_transform(testY1).flatten(), name="Actual", line_shape='spline'))
fig.add_trace(go.Scatter(x=series_results.index, y=testPredict.flatten() , name="ARIMA", line_shape='spline'))
fig.show()

# LSTM (Look_back=3)

In [42]:
import tensorflow as tf
tf.random.set_seed(7)

In [43]:
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

In [44]:
train = scaler.transform(train)
test = scaler.transform(test)

In [45]:
look_back = 3
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [46]:
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [47]:
from tensorflow import keras
model = keras.models.load_model('AQI_LSTM.h5')



In [48]:
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)



In [49]:
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY1 = scaler.inverse_transform([testY])

In [50]:
series_results["LSTM_3"] = testPredict
print(f"Mean Abs Error: {metrics.mean_absolute_error(testY1[0], testPredict[:,0])}")
print(f"Mean Sq Error: {metrics.mean_squared_error(testY1[0], testPredict[:,0])}")
print(f"Root Mean Error: {np.sqrt(metrics.mean_squared_error(testY1[0], testPredict[:,0]))}")

Mean Abs Error: 14.568286552699073
Mean Sq Error: 420.34122893641717
Root Mean Error: 20.502224975265907


In [51]:
fig = go.Figure()
fig.update_layout(
    title="Comparison of predicted AQI and Measured AQI",
    xaxis_title="Date",
    yaxis_title="AQI Level",
    legend_title="Legend",
)

fig.add_trace(go.Scatter(x=series_results.index, y=testY1.flatten(), name="Actual", line_shape='spline'))
fig.add_trace(go.Scatter(x=series_results.index, y=testPredict.flatten()  , name="LSTM", line_shape='spline'))
fig.show()

# LSTM (Look_back=10)

In [62]:
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

In [63]:
train = scaler.transform(train)
test = scaler.transform(test)

In [64]:
look_back = 10
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [65]:
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [66]:
from tensorflow import keras
model = keras.models.load_model('AQI_LSTM_10.h5')



In [67]:
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)



In [68]:
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY1 = scaler.inverse_transform([testY])

In [70]:
# series_results["LSTM_10"] = testPredict
print(f"Mean Abs Error: {metrics.mean_absolute_error(testY1[0], testPredict[:,0])}")
print(f"Mean Sq Error: {metrics.mean_squared_error(testY1[0], testPredict[:,0])}")
print(f"Root Mean Error: {np.sqrt(metrics.mean_squared_error(testY1[0], testPredict[:,0]))}")

Mean Abs Error: 14.766150166254075
Mean Sq Error: 432.0732779099467
Root Mean Error: 20.786372408622594


In [71]:
fig = go.Figure()
fig.update_layout(
    title="Comparison of predicted AQI and Measured AQI",
    xaxis_title="Date",
    yaxis_title="AQI Level",
    legend_title="Legend",
)

fig.add_trace(go.Scatter(x=series_results.index, y=testY1.flatten(), name="Actual", line_shape='spline'))
fig.add_trace(go.Scatter(x=series_results.index, y=testPredict.flatten()  , name="LSTM_10", line_shape='spline'))
fig.show()

# Comparision of various Models

In [53]:
fig = go.Figure()
fig.update_layout(
    title="Comparison of predicted AQI and Measured AQI",
    xaxis_title="Date",
    yaxis_title="AQI Level",
    legend_title="Legend",
)
fig.update_xaxes(range=['2016-10-01', '2022-03-01'])

fig.add_trace(go.Scatter(x=series_results.index, y=testY1.flatten(), name="Actual", line_shape='spline'))
fig.add_trace(go.Scatter(x=series_results.index, y=series_results.SVR_10  , name="SVR(c=10)", line_shape='spline'))
fig.add_trace(go.Scatter(x=series_results.index, y=series_results.LSTM_3  , name="LSTM(3)", line_shape='spline'))
fig.add_trace(go.Scatter(x=series_results.index, y=series_results.LSTM_10  , name="LSTM(10)", line_shape='spline'))
fig.add_trace(go.Scatter(x=series_results.index, y=series_results.Poly_Reg  , name="Polynomial(10)", line_shape='spline'))
fig.add_trace(go.Scatter(x=series_results.index, y=series_results.ARIMA  , name="ARIMA", line_shape='spline'))


fig.show()