# LSTM 

### Importing the module that scrapes the internet for the latest values count

In [3]:
conda install -c anaconda beautifulsoup4

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

Note: you may need to restart the kernel to use updated packages.


In [4]:
import csv_updater

ModuleNotFoundError: No module named 'bs4'

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import sklearn

#### Getting the starter data from John Hopkin's CSV File

In [None]:
df = pd.read_csv("johnhopkins-3March.csv")

In [None]:
df = df.T

In [None]:
cols_drop = []
for i in range(114):
    if i != 0 and i != 5:
        cols_drop.append(i)
df.drop(df.columns[cols_drop], axis = 1, inplace = True) 

In [None]:
df.drop(['Country/Region', 'WHO region', 'Province/States'], inplace = True)

#### Cleaning the Data

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.rename(columns = {0:'Infected', 5:'Deaths'}, inplace = True)

In [None]:
df.head()

In [None]:
df.shape

#### Using the dates as indices

In [None]:
dates_unix = []
start = 1579478400
for i in range(47):
    start += 86400
    dates_unix.append(start)

print(dates_unix)

In [None]:
df['Date'] = dates_unix

In [None]:
df.head()

In [None]:
df['Date'] = pd.to_datetime(df['Date'], unit='s')

In [None]:
df.head()

In [None]:
df.set_index = df['Date']

In [None]:
start

In [None]:
df.head()

In [None]:
df = df.reset_index(drop=True)

In [None]:
df.set_index("Date", inplace = True) 

In [None]:
df.head()

In [None]:
df.fillna(0, inplace=True)

In [None]:
df.head()

### Plotting the data

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df.Infected, 'b--')
plt.plot(df.Deaths, 'r--')
plt.ylabel("Count")
plt.xlabel("Date")
plt.show()

In [None]:
infected, deaths = csv_updater.get_nums()

In [None]:
import datetime
now = datetime.datetime.now()

In [None]:
currdate = now.strftime("%Y-%m-%d")

In [None]:
infected, deaths

In [None]:
currdate = '2020-03-06'
currdate = pd.to_datetime(currdate)

In [None]:
df.loc[currdate]= [infected, deaths]

In [None]:
df.tail(1)

In [None]:
if df.loc[currdate].any():
    print("Data already exists")    
else:
    df.loc[currdate] = [infected, deaths]

In [None]:
df.tail()

In [None]:
df.loc['2020-03-04'] = [95310,3285]

In [None]:
df.loc['2020-03-05'] = [98414,3387]

In [None]:
# df.drop(['2020-03-04','2020-03-05'], inplace=True)

### Exporting the well structured data

In [None]:
df.to_csv("StructuredDataset.csv")

In [2]:
df = pd.read_csv("StructuredDataset.csv")

NameError: name 'pd' is not defined

In [None]:
df.set_index('Date', inplace = True)

In [None]:
plt.figure(figsize=(20,10))
plt.plot(df.Infected, 'b--')
plt.plot(df.Deaths, 'r--')
plt.xlabel('Dates')
plt.show()

## LSTM

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler

In [None]:
tr_inf, tr_dea = df.Infected, df.Deaths

### Forecasting Infected Numbers

In [None]:
def sliding_windows(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)

sc = MinMaxScaler()
training_data_inf = sc.fit_transform(tr_inf.values.reshape(-1, 1))

seq_length = 4
x_inf, y_inf = sliding_windows(training_data_inf, seq_length)

dataX_inf = Variable(torch.Tensor(np.array(x_inf)))
dataY_inf = Variable(torch.Tensor(np.array(y_inf)))

trainX_inf = Variable(torch.Tensor(np.array(x_inf)))
trainY_inf = Variable(torch.Tensor(np.array(y_inf)))

In [None]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out

### Training the model

In [None]:
num_epochs = 2000
learning_rate = 0.01

input_size = 1
hidden_size = 2
num_layers = 1

num_classes = 1

lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
# Train the model
for epoch in range(num_epochs):
    outputs = lstm(trainX_inf)
    optimizer.zero_grad()
    
    # obtain the loss function
    loss = criterion(outputs, trainY_inf)
    
    loss.backward()
    
    optimizer.step()
    if epoch % 100 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

### Plotting the results

In [None]:
lstm.eval()
train_predict_inf = lstm(dataX_inf)

data_predict_inf = train_predict_inf.data.numpy()
dataY_plot_inf = dataY_inf.data.numpy()

data_predict_inf = sc.inverse_transform(data_predict_inf)
dataY_plot_inf = sc.inverse_transform(dataY_plot_inf)

plt.figure(figsize=(20,10))
plt.plot(dataY_plot_inf)
plt.plot(data_predict_inf, 'r--')
plt.suptitle('Infected People')
plt.legend(['Actual Number', 'Predicted Number'], loc='upper left')
plt.show()

### Forecasting Death Numbers

In [None]:
def sliding_windows(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)

sc = MinMaxScaler()
training_data_dea = sc.fit_transform(tr_dea.values.reshape(-1, 1))

seq_length = 4
x_dea, y_dea = sliding_windows(training_data_dea, seq_length)

dataX_dea = Variable(torch.Tensor(np.array(x_dea)))
dataY_dea = Variable(torch.Tensor(np.array(y_dea)))

trainX_dea = Variable(torch.Tensor(np.array(x_dea)))
trainY_dea = Variable(torch.Tensor(np.array(y_dea)))

In [None]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out

### Training the model

In [None]:
num_epochs = 2000
learning_rate = 0.01

input_size = 1
hidden_size = 2
num_layers = 1

num_classes = 1

lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    outputs = lstm(trainX_dea)
    optimizer.zero_grad()
    
    # obtain the loss function
    loss = criterion(outputs, trainY_dea)
    
    loss.backward()
    
    optimizer.step()
    if epoch % 100 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

### Predicting the results

In [None]:
dataX_dea.shape

In [None]:
dea_pred_sample = torch.tensor(np.random.random_integers(1, 100 + 1, size=(42,4,1)))

In [None]:
dea_pred_sample.shape

In [None]:
dea_pred_sample

In [None]:
lstm.eval()
train_predict_dea = lstm(dataX_dea)

data_predict_dea = train_predict_dea.data.numpy()
dataY_plot_dea = dataY_dea.data.numpy()

data_predict_dea = sc.inverse_transform(data_predict_dea)
dataY_plot_dea = sc.inverse_transform(dataY_plot_dea)

plt.figure(figsize=(20,10))
plt.plot(dataY_plot_dea)
plt.plot(data_predict_dea, 'r--')
plt.suptitle('Death Numbers')
plt.legend(['Actual Number', 'Predicted Number'], loc='upper left')
plt.show()

## Plotting Everything

In [None]:
plt.figure(figsize=(20,10))
plt.plot()
plt.plot(dataY_plot_inf,'g')
plt.plot(data_predict_inf, 'g--')
plt.suptitle('Infected People')
plt.plot(dataY_plot_dea,'r')
plt.plot(data_predict_dea, 'r--')
plt.suptitle('LSTM Predictions')
plt.legend(['Actual', 'Predicted'])
plt.show()

In [None]:
df