In [2]:
import io
import os
import platform
import pdb

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from matplotlib.pyplot import cm

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn import preprocessing

In [3]:
def read_data(file_path):
    data=pd.read_csv(file_path)
    return data

In [4]:
data=pd.read_csv("./vancouver.csv")

In [5]:
data.fillna(np.mean(data),inplace=True)

In [6]:
data.tail()

Unnamed: 0,Date,Time,TEMP_MEAN,SO2,O3,CO,HUMIDITY,NO2,NO,WDIR_SCLR,RAD_TOTAL,WSPD_VECT,WSPD_SCLR,ATM_PRESS_1HR,WDIR_VECT,PM25,PRECIP_TOTAL,PM10
17524,6/4/2018,4:00 AM,10.9,0.5,24.0,0.15,80.6,7.2,0.4,83.6,0.0,4.5,4.59,101.1,82.2,3.6,0.0,10.318944
17525,6/4/2018,5:00 AM,10.8,0.4,23.8,0.13,82.0,7.3,0.5,90.4,11.6,6.83,7.17,101.1,91.1,3.2,0.0,10.318944
17526,6/4/2018,6:00 AM,10.8,0.5,21.6,0.15,83.5,9.7,1.2,87.0,55.3,6.61,6.9,101.1,86.9,2.9,0.0,10.318944
17527,6/4/2018,7:00 AM,11.4,0.6,17.3,0.18,83.0,14.8,6.2,81.8,113.8,4.77,5.05,101.2,81.7,3.5,0.0,10.318944
17528,6/4/2018,8:00 AM,12.3,0.7,18.7,0.19,79.2,13.9,4.7,94.8,182.7,9.581267,2.98,101.2,160.37165,3.2,0.0,10.318944


In [9]:
dataset=data[['TEMP_MEAN','SO2','O3','CO','HUMIDITY','NO2','NO','WSPD_SCLR','ATM_PRESS_1HR','PM25','PRECIP_TOTAL','PM10']].copy()
# dataset=pd.to_numeric(dataset, errors='coerce')
# train_data=dataset.head(14023)
# valid_data=dataset.drop(train_data.index)
scaler = preprocessing.MinMaxScaler() 
scaled_values = scaler.fit_transform(dataset) 
dataset.loc[:,:] = scaled_values

In [19]:
dataset.head()

Unnamed: 0,TEMP_MEAN,SO2,O3,CO,HUMIDITY,NO2,NO,WSPD_SCLR,ATM_PRESS_1HR,PM25,PRECIP_TOTAL,PM10
0,0.70339,0.04,0.27609,0.119048,0.711165,0.217021,0.024607,0.246461,0.694915,0.015744,0.0,0.077052
1,0.70339,0.04,0.17041,0.10119,0.673544,0.15461,0.022215,0.284346,0.694915,0.011927,0.0,0.054439
2,0.683616,0.053333,0.155878,0.10119,0.728155,0.188652,0.035885,0.267485,0.694915,0.011927,0.0,0.064489
3,0.672316,0.026667,0.340819,0.077381,0.802184,0.097872,0.021873,0.349917,0.677966,0.017176,0.0,0.08794
4,0.672316,0.046584,0.233778,0.136323,0.764563,0.209918,0.037687,0.334305,0.677966,0.015267,0.0,0.057789


In [10]:
data_set=np.asarray(dataset,dtype=np.float32)

In [11]:
seq_len = 30 + 1
x=len(data_set)-seq_len
sequences = [data_set[t:t+seq_len] for t in range(x)]

In [12]:
len(sequences)

17498

In [13]:
seq=torch.FloatTensor(sequences)

In [14]:
seq.shape

torch.Size([17498, 31, 12])

In [15]:
split_row=round(0.90*seq.size(0))
x_train_set=seq[:split_row, :-1]
y_train_set=seq[:split_row, -1]
x_valid_set=seq[split_row:, :-1]
y_valid_set=seq[split_row:, -1]

In [16]:
print(x_train_set.shape)
print(y_train_set.shape)
print(x_valid_set.shape)
print(y_valid_set.shape)

torch.Size([15748, 30, 12])
torch.Size([15748, 12])
torch.Size([1750, 30, 12])
torch.Size([1750, 12])


In [17]:
class LSTM(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers=1,dropout=0,bidirectional=False):
        super(LSTM,self).__init__()
        self.input_size=input_size
        self.hidden_size=hidden_size
        self.num_layers=num_layers
        self.dropout=dropout
        self.bidirectional=bidirectional
        self.lstm = nn.LSTM(input_size,
                            hidden_size,
                            num_layers,
                            dropout=dropout,
                            bidirectional=bidirectional)
        self.linear = nn.Linear(hidden_size, 12)
        
    def forward(self,inputs,hidden):
        outputs,hidden=self.lstm(inputs,hidden)
        predictions=self.linear(outputs[-1])
        return predictions,outputs,hidden
    
    def init_hidden(self,batch_size):
        num_directions=2 if self.bidirectional else 1
        hidden = (torch.zeros(self.num_layers*num_directions, batch_size, self.hidden_size),
                  torch.zeros(self.num_layers*num_directions, batch_size, self.hidden_size))
        return hidden

In [18]:
def get_batch(x,y,i,batch_size):
    if x.dim() == 2:
        x = x.unsqueeze(2)
    batch_x = x[(i*batch_size):(i*batch_size)+batch_size, :, :]
    batch_y = y[(i*batch_size):(i*batch_size)+batch_size]

    # Reshape Tensors into (seq_len, batch_size, input_size) format for the LSTM.
    batch_x = batch_x.transpose(0, 1)
    
    return batch_x, batch_y

In [24]:
input_size=12
hidden_size=24
num_layers=2
lstm=LSTM(input_size,hidden_size,num_layers)

learning_rate=0.003
max_grad_norm=5
loss_fn = nn.MSELoss()
optimizer = optim.Adam(lstm.parameters(), lr=learning_rate)

batch_size = 12
num_epochs = 30 #3
num_sequences = x_train_set.size(0)
num_batches = num_sequences //batch_size

print("Training model for {} epoch of {} batches".format(num_epochs, num_batches))
for epoch in range(num_epochs):
    total_loss = 0

    # Shuffle input and target sequences.
    idx = torch.randperm(x_train_set.size(0))
    x = x_train_set[idx]
    y = y_train_set[idx]

    for i in range(num_batches):
        # Get input and target batches and reshape for LSTM.
        batch_x, batch_y = get_batch(x_train_set, y_train_set, i, batch_size)

        # Reset the gradient.
        lstm.zero_grad()
        
        # Initialize the hidden states (see the function lstm.init_hidden(batch_size)).
        hidden = lstm.init_hidden(batch_size)
        
        # Complete a forward pass.
        y_pred, outputs, hidden = lstm(batch_x,hidden)
        
        # Calculate the loss with the 'loss_fn'.
        loss = torch.sqrt(loss_fn(y_pred,batch_y))
        
        # Compute the gradient.
        loss.backward()
        
        # Clip to the gradient to avoid exploding gradient.
        nn.utils.clip_grad_norm_(lstm.parameters(), max_grad_norm)

        # Make one step with optimizer.
        optimizer.step()
        
        # Accumulate the total loss.
        total_loss += loss.data

    print("Epoch {}: Loss = {:.8f}".format(epoch+1, total_loss/num_batches))

Training model for 30 epoch of 1312 batches
Epoch 1: Loss = 0.07638816
Epoch 2: Loss = 0.05538071
Epoch 3: Loss = 0.04872962
Epoch 4: Loss = 0.04548705
Epoch 5: Loss = 0.04289592
Epoch 6: Loss = 0.04056099
Epoch 7: Loss = 0.03901644
Epoch 8: Loss = 0.03823249
Epoch 9: Loss = 0.03777927
Epoch 10: Loss = 0.03749513
Epoch 11: Loss = 0.03727673
Epoch 12: Loss = 0.03709121
Epoch 13: Loss = 0.03693084
Epoch 14: Loss = 0.03678806
Epoch 15: Loss = 0.03665888
Epoch 16: Loss = 0.03653976
Epoch 17: Loss = 0.03642836
Epoch 18: Loss = 0.03632499
Epoch 19: Loss = 0.03622905
Epoch 20: Loss = 0.03613593
Epoch 21: Loss = 0.03604420
Epoch 22: Loss = 0.03595629
Epoch 23: Loss = 0.03587238
Epoch 24: Loss = 0.03579192
Epoch 25: Loss = 0.03571394
Epoch 26: Loss = 0.03563807
Epoch 27: Loss = 0.03556441
Epoch 28: Loss = 0.03549302
Epoch 29: Loss = 0.03542414
Epoch 30: Loss = 0.03535778
