<a href="https://colab.research.google.com/github/rohit-rcrohit7/AQLoRaBurk/blob/master/Pollution_forecasting_using_Time_series_and_LSTM_w.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!apt install cuda

In [0]:
!pip install mxnet-cu101 numpy pandas matplotlib

In [0]:
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00381/PRSA_data_2010.1.1-2014.12.31.csv

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn')

import mxnet
import mxnet.gluon as G

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

In [0]:
data = pd.read_csv("PRSA_data_2010.1.1-2014.12.31.csv")

In [0]:
data.head()

In [0]:
# data preprocessing - I
data.dropna(inplace=True)

le = LabelEncoder()
data["cbwd"] = le.fit_transform(data["cbwd"])

data.head()

In [0]:
plt.figure(dpi=105,figsize=(14,4))
data["pm2.5"].iloc[:40000].plot(legend=True)
data["pm2.5"].iloc[40000:].plot(legend=True)
plt.legend(['Training set (40000 Hours)','Test set'])
plt.title('PM2.5 Levels')
plt.ylabel("PM2.5")
plt.show()

In [0]:
def make_dataset_many_to_one(array,time_steps):

    x = []
    y = []

    for i in range(time_steps,len(array)):
        x.append(array[i-time_steps:i])
        y.append(array[i][-1])
        
    return np.array(x), np.array(y)

In [0]:
data_cols = [
         "DEWP", #Dew Point 
         "TEMP", #Temperature
         "PRES", #Pressure
         "cbwd", #Combined wind direction
         "Iws",  #Cumulated wind speed
         "Is",   #Cumulated hours of snow
         "Ir" ,   #Cumulated hours of rain 
         "pm2.5" # Target Var.
            ]


train = data[data_cols].iloc[:40000]
total = data[data_cols] #for testing at bottom
train.head()

In [0]:
#scale values to [0,1]
scaler = MinMaxScaler(feature_range=(0,1))
scaler.fit_transform(total)
train = scaler.transform(train)
total = scaler.transform(total)

trn_x,trn_y = make_dataset_many_to_one(train,24)  #every 24 hours.
total_x,total_y = make_dataset_many_to_one(total,24)  #every 24 hours.

trn_x = trn_x.reshape(-1,24,8)
total_x = total_x.reshape(-1,24,8)

In [0]:
trn_x = mxnet.nd.array(trn_x)
trn_y = mxnet.nd.array(trn_y)

total_x = mxnet.nd.array(total_x)
total_y = mxnet.nd.array(total_y)

In [0]:
class Net(G.Block):
    def __init__(self, **kwargs):
        super(Net, self).__init__(**kwargs)
        with self.name_scope():
            
            self.lstm = G.rnn.LSTM(120,3,dropout=0.2)
            self.out = G.nn.Dense(1)
            
    def forward(self,x):
        
        x = self.lstm(x)
        x = self.out(x)
        
        return x
    
Model = Net()
print(Model)

In [0]:
device = mxnet.gpu(0) if mxnet.context.num_gpus() > 0 else mxnet.cpu(0)
Model.initialize(mxnet.init.Xavier(), ctx=device)

In [0]:
trainer = G.Trainer(
    params=Model.collect_params(),
    optimizer='adam',
    optimizer_params={'learning_rate': 0.001},
)

In [0]:
loss_function = G.loss.L2Loss()
mse = mxnet.metric.MSE()

In [0]:
from mxnet import autograd

EPOCHS = 15

trn_loss = []

train_iter = mxnet.io.NDArrayIter(trn_x, trn_y, 100, shuffle=True)

for epoch in range(EPOCHS):
    
    for trn_batch in train_iter:

        x = trn_batch.data[0].as_in_context(device)
        y = trn_batch.label[0].as_in_context(device)
        
        with autograd.record():
            y_pred = Model(x)
            loss = loss_function(y_pred, y)
        
        #backprop
        loss.backward()
        
        #Optimize!
        trainer.step(batch_size=trn_x.shape[0])
        
    train_iter.reset()
    
    # Calculate train metrics
    
    predictions = Model(trn_x.as_in_context(device))
    mse.update(trn_y, predictions)
    trn_loss.append(mse.get()[1])
    mse.reset()
    
    train_iter.reset()
    
    print("epoch: {} | trn_loss: {:.8f}".format(epoch+1,
                                                trn_loss[-1]))

In [0]:
pred = Model( total_x.as_in_context(device) )
predictions = []

for p in pred.asnumpy():
    predictions.append(p[0])

In [0]:
pm_scaler = scaler.fit(data["pm2.5"].values.reshape(-1, 1))
predicted = pm_scaler.inverse_transform( np.array(predictions).reshape(-1, 1) )    

infered = pd.DataFrame(data['pm2.5'].iloc[24:])
infered['Predicted'] = predicted

In [0]:
plt.figure(dpi=100,figsize=(11,7))
infered["pm2.5"].iloc[40000:].plot(legend=True)
infered["Predicted"].iloc[40000:].plot(legend=True, color='r')
plt.legend(['Real','Predicted'])
plt.title('Pollution Level')
plt.ylabel("pm2.5")
plt.show()