In [None]:
!pip install PyGithub
import pandas as pd
import requests
import io
import numpy as np  
from datetime import date, timedelta
import re
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
from github import Github
import github
import torch
import torch.nn as nn
# Import tensor dataset & data loader
from torch.utils.data import TensorDataset, DataLoader
# Import nn.functional
import torch.nn.functional as F
import torch.optim as optim
from typing import Union, Tuple
import os
import sys
import time
from collections import OrderedDict
from sklearn.preprocessing import MinMaxScaler
from statistics import mean
from sklearn.metrics import mean_absolute_error,mean_squared_error, r2_score
import math
import random
import imageio
import pickle as pkl
#from sklearn.metrics import mean_absolute_percentage_error
matplotlib.style.use('seaborn')
%matplotlib inline
random.seed(42)
torch.manual_seed(42)
np.random.seed(42)

Collecting PyGithub
  Downloading PyGithub-1.55-py3-none-any.whl (291 kB)
[?25l[K     |█▏                              | 10 kB 29.2 MB/s eta 0:00:01[K     |██▎                             | 20 kB 9.0 MB/s eta 0:00:01[K     |███▍                            | 30 kB 7.8 MB/s eta 0:00:01[K     |████▌                           | 40 kB 7.2 MB/s eta 0:00:01[K     |█████▋                          | 51 kB 4.2 MB/s eta 0:00:01[K     |██████▊                         | 61 kB 4.4 MB/s eta 0:00:01[K     |███████▉                        | 71 kB 4.5 MB/s eta 0:00:01[K     |█████████                       | 81 kB 5.0 MB/s eta 0:00:01[K     |██████████                      | 92 kB 3.9 MB/s eta 0:00:01[K     |███████████▎                    | 102 kB 4.1 MB/s eta 0:00:01[K     |████████████▍                   | 112 kB 4.1 MB/s eta 0:00:01[K     |█████████████▌                  | 122 kB 4.1 MB/s eta 0:00:01[K     |██████████████▋                 | 133 kB 4.1 MB/s eta 0:00:01[K 

In [None]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1, response_variable_index=0, number_feature = 6):
  dataX, dataY = [], []
  for i in range(len(dataset)-look_back-1):
    a = dataset[i:(i+look_back),:number_feature]
    dataX.append(a)
    dataY.append(dataset[i + look_back, response_variable_index])
  return np.array(dataX), np.array(dataY)

In [None]:
def data_preparation(df, scaling_range=(0,1),time_step=5,number_feature=6, response_variable_index=3,data_split_ratio=0.8,Suffle=True,Eval=False):
    df = df.astype('float32')
    # normalize the dataset
    scaler = MinMaxScaler(feature_range=scaling_range)
    dataset = scaler.fit_transform(df.copy())
    X, Y = create_dataset(dataset, time_step,response_variable_index=response_variable_index, number_feature=number_feature)
    # split into train and test sets
    train_size = int(len(dataset) * data_split_ratio)
    test_size = len(dataset) - train_size
    trainX, testX = X[0:train_size,:], X[train_size:len(dataset),:]
    trainY, testY = Y[0:train_size], Y[train_size:len(dataset)]
    
    print(trainX.shape)
    # reshape input to be [samples, time steps, features]
    if not multi_feature:
      trainX = np.reshape(trainX, (trainX.shape[0],trainX.shape[1],1))
      testX = np.reshape(testX, (testX.shape[0], testX.shape[1],1))
    #print(trainX.shape)
    X_train=trainX
    X_test=testX
    y_train=trainY.reshape(-1,1)

    print(X_train.shape, y_train.shape)
    # summarize the data
    inputs = torch.from_numpy(X_train)
    targets = torch.from_numpy(y_train)
    # Define dataset
    train_ds = TensorDataset(inputs, targets)

    batch_size = 16
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=Suffle)

    y_test=testY.reshape(-1,1)
    
    inputs = torch.from_numpy(X_test)
    targets = torch.from_numpy(y_test)
    # Define dataset
    #test_ds = TensorDataset(inputs, targets)
    test_ds=(inputs, targets)
    if Eval:
      return (torch.from_numpy(X_train),trainY),test_ds,scaler
    #test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)
    return train_loader, test_ds,scaler

In [None]:
def fit(
    model: nn.Module, 
    optimizer: optim.Optimizer, criterion: nn,
    data: Union[DataLoader, Tuple[DataLoader]], 
    max_epochs: int, 
    cuda=True):
  use_test = False
  if isinstance(data, DataLoader):
    train_loader = data
  elif isinstance(data, tuple):
    if len(data) == 2:
      train_loader, test_loader = data
      if not isinstance(train_loader, DataLoader):
        raise TypeError(f'Expected 1st entry of type DataLoader, but got {type(train_loader)}!')
      #if not isinstance(test_loader, DataLoader):
       # raise TypeError(f'Expected 2nd entry of type DataLoader, but got {type(test_loader)}!')
      use_test = True
    else:
      raise ValueError(f'Expected tuple of length 2, but got {len(data)}!')
  
  
  #criterion = nn.L1Loss()
  model.train()
  losses = []
  test_losses=[]
  batch_total = len(train_loader)
  best_model=None
  min_loss=np.iinfo(0).max
  for epoch in range(max_epochs):
    #random.seed(42)
    #torch.manual_seed(42)
    #np.random.seed(42)
    running_loss=[]
    test_loss=[]
    for batch_idx, batch in enumerate(train_loader):
      x, y = batch
      if cuda:
        x, y = x.cuda(), y.cuda()
      output = model(x)
      loss = criterion(output, y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      
      running_loss.append(loss.item())
      #rmse += torch.sqrt(criterion(yhat, y))
      #losses.append(loss.item())
      
    if use_test:
      model.eval()
      test_x, test_y =test_loader
      if cuda:
        test_x, test_y = test_x.cuda(), test_y.cuda()
      test_output = model(test_x)
      loss = criterion(test_output, test_y)
      test_loss.append(loss.item())
      #test_mae = criterion(test_output, test_y)
      test_x
      #predictions = scaler.inverse_transform(test_output.cpu().detach().numpy())
      #test_y = scaler.inverse_transform(test_y.cpu().detach().numpy())
      epoch_loss = mean_squared_error(test_y.cpu().detach().numpy(),test_output.cpu().detach().numpy())
      if epoch_loss<min_loss:
        min_loss = epoch_loss
        best_model= model.state_dict()
      test_losses.append(loss.item())
      model.train()
      if epoch%50==0:
        sys.stdout.write(f'\rEpoch: {epoch}/{max_epochs}  Loss: {mean(running_loss):.6f} Test loss: {epoch_loss:.6f}')
    else:
      sys.stdout.write(f'\rEpoch: {epoch}/{max_epochs}  Loss: {running_loss:.6f}' )
    epoch_loss =mean(running_loss)
    losses.append(epoch_loss)
  return (losses, test_losses, best_model)

In [None]:
def predict(model: nn.Module, data: DataLoader, cuda=True):
  predictions=None
  model.eval()
  for id,(x, y) in enumerate(data):
      if id==0:
        predictions=model(x)
      else:
        output = model(x)
        predictions=torch.vstack((predictions,output))
  return predictions
def plot_predictions(model, data_loader):
  
  predictions=predict(model, data_loader)

  train_y = y_train

  test_y=y_test
  predictions=predictions.cpu()
  plt.plot(range(len(train_y)),train_y, label='train data')
  plt.plot(np.arange(len(train_y),len(train_y)+len(test_y),1),test_y,label='Acutal')
  plt.plot(np.arange(len(train_y),len(train_y)+len(test_y),1),predictions.detach().numpy(),label='predictions')
  plt.legend()
  
def plot_loss(epochs,train_losses,test_losses,model_name):
  plt.rcParams['figure.figsize'] = [10, 5]
  plt.rcParams['figure.dpi'] = 100
  plt.plot(range(epochs),train_losses, label='train loss')
  plt.plot(range(epochs),test_losses,label='test loss')
  plt.title(model_name)
  plt.legend()
  plt.show()

In [None]:
class Flatten(nn.Module):
    def forward(self, x):
        x = x.view(x.size()[0], -1)
        return x

class CNNModel(nn.Module):
  def __init__(self, time_step,n_layers,vector_length,kernel_size):
    super(CNNModel, self).__init__()
    self.time_step=time_step
    self.n_layers=n_layers
    in_channels=1
    out_channels=16
    layers=[]
    dimension=vector_length
    for l in range(self.n_layers):
        cnn_1d_layer=nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding="same")
        #dimension=dimension-kernel_size+2*1+1
        #if dimension>1:
        layers.append(cnn_1d_layer)
        if l%2==0:
          layers.append(nn.Tanh())
        else:
          layers.append(nn.ELU(inplace=True))
        in_channels=out_channels
        out_channels=out_channels*2
    layers.append(Flatten())
    layers.append(nn.Dropout(p=0.2))
    self.body = nn.Sequential(*layers)
    #print('dm',dimension,out_channels)
    out=int(vector_length*(out_channels/2))
    self.head=nn.Linear(out, 1)
  def forward(self, x):
    b, features, look_back = x.shape
    #print(b,n_steps, features)
    x = x.reshape([b,1,features*look_back])
    y = self.body(x)
    #print(y.shape)
    #print('re',y.view(len(y),-1).shape)
    return self.head(y.view(len(y),-1))

In [None]:
Shortlisted_States=['Karnataka','Maharashtra','Uttar-Pradesh','Kerala','Tamil-Nadu']#'Delhi'
results_cnn=[]
cnn_models=[]
for state in Shortlisted_States:
  best_models=[]
  df=pd.read_csv("https://raw.githubusercontent.com/sureshkuc/Data-Science-in-Life-Science-Project/main/Indian-States-Covid19-Datasets/"+state+".csv", parse_dates=["Date"]).drop(columns =["Unnamed: 0"])
  df_temp1 = df[df["Date"] <= "2020-06-18"]
  df_temp2=  df[df["Date"] > "2020-03-09"]
  df = pd.merge(df_temp1, df_temp2, how='inner')
  df = df.set_index("Date")
  df = df[['Confirmed', 'Recovered', 'Deceased', 'New_Confirmerd', 'New_Deaths', 'New_Recovered']]
  #print(df.describe())

  time_step=[5,7]
  Number_of_feature=[1,2,3,4,5,6]
  multi_feature=True
  min_error=np.iinfo(0).max
  cnn_best_model={}
  temp_result=[]
  for n_f in Number_of_feature:
    for t_s in time_step:
      train_loader, test_loader,scaler = data_preparation(df, scaling_range=(0,1),time_step=t_s,number_feature=n_f, response_variable_index=0,data_split_ratio=0.8)
      for n_layers in range(2,5,1):
        for kernel_size in range(1,5,1):
          
          max_epochs=10
          #random.seed(42)
          #torch.manual_seed(42)
          #np.random.seed(42)
          #CNN model with L1 loss
          #best_model=Call_CNN_model(state,dataset=(train_loader, test_loader), lr=1e-2,criterion=nn.L1Loss(),max_epochs=max_epochs)
          CNN_model =  CNNModel(t_s,n_layers,t_s*n_f,kernel_size)
          cuda=torch.cuda.is_available()
          if cuda:
            CNN_model = CNN_model.cuda()
          optimizer = optim.SGD(CNN_model.parameters(), lr=1e-2, momentum=0.9)
          train_losses,test_losses, best_model = fit(CNN_model, optimizer, nn.L1Loss(),(train_loader, test_loader), max_epochs=max_epochs,cuda=cuda)
          end = time.time()
          #print(f'\nTraining took {end-start}s!')
          #plot_loss(max_epochs,train_losses,test_losses,model_name='CNN for '+state)
          CNN_model =  CNNModel(t_s,n_layers,t_s*n_f,kernel_size)
          CNN_model.load_state_dict(best_model)
          CNN_model.eval()
          test_x,test_y=test_loader
          predictions=CNN_model(test_x)
          test_y=test_y.cpu().detach().numpy()
          predictions=predictions.cpu().detach().numpy()
          #predictions = scaler.inverse_transform(predictions)
          #target = scaler.inverse_transform(target)
          mae=mean_absolute_error(test_y,predictions)
          rmse=math.sqrt(mean_squared_error(test_y,predictions))
          #rmse=math.sqrt(mean_squared_error(test_y,predictions))
          r2s=r2_score(test_y,predictions)
          if rmse<min_error:
            min_error=rmse
            cnn_best_model=best_model
            temp_result=[state,n_f,t_s,n_layers,kernel_size,mae,rmse,r2s]
            
          #mape=mean_absolute_percentage_error(test_y,predictions)
          
          
          print(state,'n_f',n_f,'t_s',t_s,'n_layers',n_layers,'kernel_size',kernel_size,mae,rmse,r2s)
          results_cnn.append([state,n_f,t_s,n_layers,kernel_size,mae,rmse,r2s])
      #CNN_model =  CNNModel(t_s,n_layers,t_s*n_f,kernel_size)
      #CNN_model.load_state_dict(best_model)
  
  cnn_models.append(cnn_best_model) 

(80, 5, 1)
(80, 5, 1) (80, 1)
Epoch: 0/10  Loss: 0.127348 Test loss: 0.863917Karnataka n_f 1 t_s 5 n_layers 2 kernel_size 1 0.17734042 0.17996508469966746 -0.7292414583522189
Epoch: 0/10  Loss: 0.117509 Test loss: 0.389513Karnataka n_f 1 t_s 5 n_layers 2 kernel_size 2 0.14837871 0.1497894694669421 -0.19795807393314813
Epoch: 0/10  Loss: 0.123983 Test loss: 0.443245Karnataka n_f 1 t_s 5 n_layers 2 kernel_size 3 0.070157796 0.0725351764324143 0.7190840375287999
Epoch: 0/10  Loss: 0.095834 Test loss: 0.311133Karnataka n_f 1 t_s 5 n_layers 2 kernel_size 4 0.1280842 0.13198537480998615 0.06989773102133201
Epoch: 0/10  Loss: 0.193368 Test loss: 0.313259Karnataka n_f 1 t_s 5 n_layers 3 kernel_size 1 0.23457934 0.23736128112608187 -2.008145431361298
Epoch: 0/10  Loss: 0.101198 Test loss: 0.524054Karnataka n_f 1 t_s 5 n_layers 3 kernel_size 2 0.029290915 0.03435863754325443 0.9369694688784052
Epoch: 0/10  Loss: 0.070367 Test loss: 0.392736Karnataka n_f 1 t_s 5 n_layers 3 kernel_size 3 0.1625651

In [None]:
df_cnn = pd.DataFrame (results_cnn,columns=['State','Number_feature','Time_Step','number_layers','kernel_size','MAE','RMSE','R2_Score'])
df_cnn.head()

Unnamed: 0,State,Number_feature,Time_Step,number_layers,kernel_size,MAE,RMSE,R2_Score
0,Karnataka,1,5,2,1,0.17734,0.179965,-0.729241
1,Karnataka,1,5,2,2,0.148379,0.149789,-0.197958
2,Karnataka,1,5,2,3,0.070158,0.072535,0.719084
3,Karnataka,1,5,2,4,0.128084,0.131985,0.069898
4,Karnataka,1,5,3,1,0.234579,0.237361,-2.008145


In [None]:
#github_upload(folder_name='Indian-States-Model-Results',file_name='CNN_on_short_data.csv', file_data=df_cnn.to_csv())

Indian-States-Model-Results/CNN_on_short_data.csv CREATED


In [None]:
class MLP(nn.Module):
  def __init__(self, input_dim, layers,output_dim):
    super(MLP, self).__init__()
    self.input_dim=input_dim
    self.n_layers=layers
    self.output_dim=output_dim
    in_features=input_dim
    out_features=16
    layers=[]
    for l in range(self.n_layers):
        if l==(self.n_layers-1):
          layers.append(nn.Linear(in_features=in_features, out_features=self.output_dim))
        else:
          layers.append(nn.Linear(in_features=in_features, out_features=out_features))
        if l%2==0:
          layers.append(nn.Tanh())
        else:
          layers.append(nn.ELU(inplace=True))
        in_features=out_features
        out_features=int(out_features/2)
    self.body = nn.Sequential(*layers)
  def forward(self, x):
    b, n_steps, features = x.shape
    #print(b,n_steps, features)
    x = x.reshape([b,n_steps*features])
    return self.body(x)

In [None]:
Shortlisted_States=['Karnataka','Maharashtra','Uttar-Pradesh','Kerala','Tamil-Nadu']
results_mlp=[]
for state in Shortlisted_States:
  best_models=[]
  df=pd.read_csv("https://raw.githubusercontent.com/sureshkuc/Data-Science-in-Life-Science-Project/main/Indian-States-Covid19-Datasets/"+state+".csv", parse_dates=["Date"]).drop(columns =["Unnamed: 0"])
  df_temp1 = df[df["Date"] <= "2020-06-18"]
  df_temp2=  df[df["Date"] > "2020-03-09"]
  df = pd.merge(df_temp1, df_temp2, how='inner')
  df = df.set_index("Date")
  df = df[['Confirmed', 'Recovered', 'Deceased', 'New_Confirmerd', 'New_Deaths', 'New_Recovered']]
  #print(df.describe())

  time_step=[5,7]
  Number_of_feature=[1,2,3,4,5,6]
  multi_feature=True
  for n_f in Number_of_feature:
    for t_s in time_step:
      train_loader, test_loader,scalar = data_preparation(df, scaling_range=(0,1),time_step=t_s,number_feature=n_f, response_variable_index=0,data_split_ratio=0.8)
      for n_layers in range(1,3,1):
          print(state,'n_f',n_f,'t_s',t_s,'n_layers',n_layers,'Error',mae,rmse,r2s)
          max_epochs=10
          random.seed(42)
          torch.manual_seed(42)
          np.random.seed(42)
          #CNN model with L1 loss
          #best_model=Call_CNN_model(state,dataset=(train_loader, test_loader), lr=1e-2,criterion=nn.L1Loss(),max_epochs=max_epochs)
          fc_model = MLP(input_dim=n_f*t_s, layers=n_layers,output_dim=1)
          cuda=torch.cuda.is_available()
          if cuda:
            fc_model = fc_model.cuda()
          fc_optim = optim.SGD(fc_model.parameters(), lr=0.02, momentum=0.9)
          #fc_optim = optim.Adam(fc_model.parameters(), lr=1e-3)
          train_losses,test_losses,best_model = fit(fc_model, fc_optim,nn.L1Loss(),(train_loader, test_loader), max_epochs=max_epochs,cuda=cuda)
          #print(f'\nTraining took {end-start}s!')
          #plot_loss(max_epochs,train_losses,test_losses,model_name='CNN for '+state)
          fc_model = MLP(input_dim=n_f*t_s, layers=n_layers,output_dim=1)
          fc_model.load_state_dict(best_model)
          fc_model.eval()
          test_x,test_y=test_loader
          predictions=fc_model(test_x)
          test_y=test_y.cpu().detach().numpy()
          predictions=predictions.cpu().detach().numpy()
          mae=mean_absolute_error(test_y,predictions)
          rmse=math.sqrt(mean_squared_error(test_y,predictions))
          #mape=mean_absolute_percentage_error(test_y,predictions)
          r2s=r2_score(test_y,predictions)
          results_mlp.append([state,n_f,t_s,n_layers,mae,rmse,r2s])
          #print(state,n_f,t_s,n_layers,mae,rmse,r2s)

(80, 5, 1)
(80, 5, 1) (80, 1)
Karnataka n_f 1 t_s 5 n_layers 1 Error 0.26164466 0.2695217407318226 -3.2786214440857675
Epoch: 0/10  Loss: 0.047180 Test loss: 0.069669Karnataka n_f 1 t_s 5 n_layers 2 Error 0.07939138 0.08696174117479391 0.5962286175592951
Epoch: 0/10  Loss: 0.145948 Test loss: 0.253267(80, 7, 1)
(80, 7, 1) (80, 1)
Karnataka n_f 1 t_s 7 n_layers 1 Error 0.029453028 0.037209590375804284 0.9260754248222786
Epoch: 0/10  Loss: 0.122863 Test loss: 0.185007Karnataka n_f 1 t_s 7 n_layers 2 Error 0.11850298 0.12068727938643234 -0.22371952737497947
Epoch: 0/10  Loss: 0.094363 Test loss: 0.329318(80, 5, 2)
(80, 5, 2) (80, 1)
Karnataka n_f 2 t_s 5 n_layers 1 Error 0.038968544 0.051950321623808915 0.7732562502834583
Epoch: 0/10  Loss: 0.167944 Test loss: 0.160467Karnataka n_f 2 t_s 5 n_layers 2 Error 0.083640896 0.08538240545435664 0.6107614148283276
Epoch: 0/10  Loss: 0.107721 Test loss: 0.942938(80, 7, 2)
(80, 7, 2) (80, 1)
Karnataka n_f 2 t_s 7 n_layers 1 Error 0.033020604 0.03

In [None]:
df_mlp = pd.DataFrame (results_mlp,columns=['State','Number_feature','Time_Step','number_layers','MAE','RMSE','R2_Score'])
df_mlp

Unnamed: 0,State,Number_feature,Time_Step,number_layers,MAE,RMSE,R2_Score
0,Karnataka,1,5,1,0.079391,0.086962,0.596229
1,Karnataka,1,5,2,0.029453,0.037210,0.926075
2,Karnataka,1,7,1,0.118503,0.120687,-0.223720
3,Karnataka,1,7,2,0.038969,0.051950,0.773256
4,Karnataka,2,5,1,0.083641,0.085382,0.610761
...,...,...,...,...,...,...,...
115,Tamil-Nadu,5,7,2,0.095963,0.100983,0.399364
116,Tamil-Nadu,6,5,1,0.076855,0.087409,0.640635
117,Tamil-Nadu,6,5,2,0.065932,0.077363,0.718493
118,Tamil-Nadu,6,7,1,0.051419,0.064032,0.758505


In [None]:
#github_upload(folder_name='Indian-States-Model-Results',file_name='MLP_on_short_data.csv', file_data=df_mlp.to_csv())

Indian-States-Model-Results/MLP_on_short_data.csv UPDATED


In [None]:
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim,  output_dim,num_layers, seq_length):
        super(LSTM, self).__init__()
        # Hidden dimensions
        self.hidden_dim = hidden_dim
        self.seq_length=seq_length
        # Number of hidden layers
        self.num_layers = num_layers

        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, feature_dim)
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.relu = nn.ELU()
        # Readout layer
        print(output_dim)
        self.fc = nn.Linear(hidden_dim*self.seq_length, output_dim)

    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()

        # Initialize cell state
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()

        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        x = out.contiguous().view(batch_size,-1)
        # Index hidden state of last time step
        # out.size() --> 100, 32, 100
        # out[:, -1, :] --> 100, 100 --> just want last time step hidden states! 
        out = self.fc(self.relu(x)) 
        # out.size() --> 100, 10
        return out
    


In [None]:
Shortlisted_States=['Karnataka','Maharashtra','Uttar-Pradesh','Kerala','Tamil-Nadu']
results_lstm=[]
lstm_models=[]
for state in Shortlisted_States:
  best_models=[]
  df=pd.read_csv("https://raw.githubusercontent.com/sureshkuc/Data-Science-in-Life-Science-Project/main/Indian-States-Covid19-Datasets/"+state+".csv", parse_dates=["Date"]).drop(columns =["Unnamed: 0"])
  df_temp1 = df[df["Date"] <= "2020-06-18"]
  df_temp2=  df[df["Date"] > "2020-03-09"]
  df = pd.merge(df_temp1, df_temp2, how='inner')
  df = df.set_index("Date")
  df = df[['Confirmed', 'Recovered', 'Deceased', 'New_Confirmerd', 'New_Deaths', 'New_Recovered']]
  #print(df.describe())

  time_step=[5,7]
  Number_of_feature=[1,2,3,4,5,6]
  multi_feature=True
  output_dim=1
  min_error=np.iinfo(0).max
  lstm_best_model={}
  for n_f in Number_of_feature:
    for t_s in time_step:
      train_loader, test_loader,scaler = data_preparation(df, scaling_range=(0,1),time_step=t_s,number_feature=n_f, response_variable_index=0,data_split_ratio=0.8, Suffle=False)
      for n_layers in range(1,2,1):
        for n_hidden_nodes in [1,8,16,32]:
          #random.seed(42)
          #torch.manual_seed(42)
          #np.random.seed(42)
          max_epochs=50
          
          #CNN model with L1 loss
          #best_model=Call_CNN_model(state,dataset=(train_loader, test_loader), lr=1e-2,criterion=nn.L1Loss(),max_epochs=max_epochs)
          lstm_model = LSTM(n_f, n_hidden_nodes, output_dim, n_layers,t_s)
          #if torch.cuda.is_available():
          #stm_model = lstm_model.cuda()
          #print(lstm_model)
          lstm_optim = optim.SGD(lstm_model.parameters(), lr=1e-2, momentum=0.9)
          #fc_optim = optim.Adam(fc_model.parameters(), lr=1e-3)
          train_losses,test_losses,best_model = fit(lstm_model, lstm_optim,nn.L1Loss(),(train_loader, test_loader), max_epochs=max_epochs,cuda=False)
          #print(f'\nTraining took {end-start}s!')
          #plot_loss(max_epochs,train_losses,test_losses,model_name='CNN for '+state)
          lstm_model = LSTM(n_f, n_hidden_nodes, output_dim, n_layers,t_s)
          lstm_model.load_state_dict(best_model)
          lstm_model.eval()
          test_x,test_y=test_loader
          predictions=lstm_model(test_x)
          test_y=test_y.cpu().detach().numpy()
          predictions=predictions.cpu().detach().numpy()
          mae=mean_absolute_error(test_y,predictions)
          rmse=math.sqrt(mean_squared_error(test_y,predictions))
          if rmse<min_error:
            min_error=rmse
            lstm_best_model=best_model
          #mape=mean_absolute_percentage_error(test_y,predictions)
          r2s=r2_score(test_y,predictions)
          results_lstm.append([state,n_f,t_s,n_layers,n_hidden_nodes,mae,rmse,r2s])
          print(state,'n_f',n_f,'t_s',t_s,'n_layers',n_layers,n_hidden_nodes,'Error',mae,rmse,r2s)
  lstm_models.append(lstm_best_model) 

(80, 5, 1)
(80, 5, 1) (80, 1)
1
Epoch: 0/50  Loss: 0.051016 Test loss: 0.3548371
Karnataka n_f 1 t_s 5 n_layers 1 1 Error 0.46585044 0.4809195102056081 -11.348774318628704
1
Epoch: 0/50  Loss: 0.134252 Test loss: 0.4259491
Karnataka n_f 1 t_s 5 n_layers 1 8 Error 0.32935384 0.33524273954618367 -5.000644141781347
1
Epoch: 0/50  Loss: 0.185966 Test loss: 0.5585121
Karnataka n_f 1 t_s 5 n_layers 1 16 Error 0.27925783 0.283907704451955 -3.303615172027354
1
Epoch: 0/50  Loss: 0.073047 Test loss: 0.4120711
Karnataka n_f 1 t_s 5 n_layers 1 32 Error 0.10916517 0.11160362187008635 0.33497855431661916
(80, 7, 1)
(80, 7, 1) (80, 1)
1
Epoch: 0/50  Loss: 0.152302 Test loss: 0.5054311
Karnataka n_f 1 t_s 7 n_layers 1 1 Error 0.7183716 0.7260731515575126 -43.29142351406288
1
Epoch: 0/50  Loss: 0.113735 Test loss: 0.5914151
Karnataka n_f 1 t_s 7 n_layers 1 8 Error 0.41436848 0.41801896194747623 -13.680837354244114
1
Epoch: 0/50  Loss: 0.091827 Test loss: 0.4426941
Karnataka n_f 1 t_s 7 n_layers 1 16 E

In [None]:
df_lstm = pd.DataFrame (results_lstm,columns=['State','Number_feature','Time_Step','number_layers','number_hiddinen_nodes','MAE','RMSE','R2_Score'])
df_lstm.head()

Unnamed: 0,State,Number_feature,Time_Step,number_layers,number_hiddinen_nodes,MAE,RMSE,R2_Score
0,Karnataka,1,5,1,1,0.46585,0.48092,-11.348774
1,Karnataka,1,5,1,8,0.329354,0.335243,-5.000644
2,Karnataka,1,5,1,16,0.279258,0.283908,-3.303615
3,Karnataka,1,5,1,32,0.109165,0.111604,0.334979
4,Karnataka,1,7,1,1,0.718372,0.726073,-43.291424


In [None]:
#github_upload(folder_name='Indian-States-Model-Results',file_name='LSTM_on_short_data.csv', file_data=df_lstm.to_csv())

Indian-States-Model-Results/LSTM_on_short_data.csv CREATED


In [None]:
class GRUNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers):
        super(GRUNet, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        weight = next(self.parameters()).data
        h = weight.new(self.n_layers, x.size(0), self.hidden_dim).zero_()
        out, h = self.gru(x, h)
        out = self.fc(self.relu(out[:,-1]))
        return out

In [None]:
Shortlisted_States=['Karnataka','Maharashtra','Uttar-Pradesh','Kerala','Tamil-Nadu']
results_gru=[]
for state in Shortlisted_States:
  best_models=[]
  df=pd.read_csv("https://raw.githubusercontent.com/sureshkuc/Data-Science-in-Life-Science-Project/main/Indian-States-Covid19-Datasets/"+state+".csv", parse_dates=["Date"]).drop(columns =["Unnamed: 0"])
  df_temp1 = df[df["Date"] <= "2020-06-18"]
  df_temp2=  df[df["Date"] > "2020-03-09"]
  df = pd.merge(df_temp1, df_temp2, how='inner')
  df = df.set_index("Date")
  df = df[['Confirmed', 'Recovered', 'Deceased', 'New_Confirmerd', 'New_Deaths', 'New_Recovered']]
  #print(df.describe())

  time_step=[5,7]
  Number_of_feature=[1,2,3,4,5,6]
  multi_feature=True
  output_dim=1
  for n_f in Number_of_feature:
    for t_s in time_step:
      train_loader, test_loader,scaler = data_preparation(df, scaling_range=(0,1),time_step=t_s,number_feature=n_f, response_variable_index=0,data_split_ratio=0.8, Suffle=False)
      for n_layers in range(1,3,1):
        for n_hidden_nodes in [1,5,8,16,32]:
          
          max_epochs=10
          random.seed(42)
          torch.manual_seed(42)
          np.random.seed(42)
          #CNN model with L1 loss
          #best_model=Call_CNN_model(state,dataset=(train_loader, test_loader), lr=1e-2,criterion=nn.L1Loss(),max_epochs=max_epochs)
          GRUNet_model = GRUNet(n_f, n_hidden_nodes, output_dim, n_layers)
          #if torch.cuda.is_available():
          #stm_model = lstm_model.cuda()
          #gru_optim = optim.SGD(GRUNet_model.parameters(), lr=1e-3, momentum=0.9)
          gru_optim = optim.Adam(GRUNet_model.parameters(), lr=1e-2)
          train_losses,test_losses,best_model = fit(GRUNet_model, gru_optim,nn.L1Loss(),(train_loader, test_loader), max_epochs=max_epochs,cuda=False)
          #print(f'\nTraining took {end-start}s!')
          #plot_loss(max_epochs,train_losses,test_losses,model_name='CNN for '+state)
          GRUNet_model = GRUNet(n_f, n_hidden_nodes, output_dim, n_layers)
          GRUNet_model.load_state_dict(best_model)
          GRUNet_model.eval()
          test_x,test_y=test_loader
          predictions=GRUNet_model(test_x)
          test_y=test_y.cpu().detach().numpy()
          predictions=predictions.cpu().detach().numpy()
          mae=mean_absolute_error(test_y,predictions)
          rmse=math.sqrt(mean_squared_error(test_y,predictions))
          #mape=mean_absolute_percentage_error(test_y,predictions)
          r2s=r2_score(test_y,predictions)
          results_gru.append([state,n_f,t_s,n_layers,n_hidden_nodes,mae,rmse,r2s])
          print(state,'n_f',n_f,'t_s',t_s,'n_layers',n_layers,n_hidden_nodes,'Error',mae,rmse,r2s)

(80, 5, 1)
(80, 5, 1) (80, 1)
Epoch: 0/10  Loss: 0.350546 Test loss: 0.268804Karnataka n_f 1 t_s 5 n_layers 1 1 Error 0.7706389 0.784844825435557 -31.888698213431567
Epoch: 0/10  Loss: 0.351519 Test loss: 1.018753Karnataka n_f 1 t_s 5 n_layers 1 5 Error 0.6762188 0.6900670071559967 -24.425038855365866
Epoch: 0/10  Loss: 0.179443 Test loss: 0.388706Karnataka n_f 1 t_s 5 n_layers 1 8 Error 0.61530405 0.6286460859608453 -20.100441614368293
Epoch: 0/10  Loss: 0.066907 Test loss: 0.242868Karnataka n_f 1 t_s 5 n_layers 1 16 Error 0.48299846 0.49171791685098637 -11.909551724683348
Epoch: 0/10  Loss: 0.067334 Test loss: 0.396526Karnataka n_f 1 t_s 5 n_layers 1 32 Error 0.037934486 0.04169371901432225 0.9071845266941638
Epoch: 0/10  Loss: 0.543205 Test loss: 1.391785Karnataka n_f 1 t_s 5 n_layers 2 1 Error 0.7217745 0.7346344347522065 -27.815200145001874
Epoch: 0/10  Loss: 0.267883 Test loss: 0.208029Karnataka n_f 1 t_s 5 n_layers 2 5 Error 0.7212464 0.7341154735641432 -27.774507441872075
Epoch

In [None]:
df_gru = pd.DataFrame (results_gru,columns=['State','Number_feature','Time_Step','number_layers','number_hiddinen_nodes','MAE','RMSE','R2_Score'])
df_gru.head()

Unnamed: 0,State,Number_feature,Time_Step,number_layers,number_hiddinen_nodes,MAE,RMSE,R2_Score
0,Karnataka,1,5,1,1,0.770639,0.784845,-31.888698
1,Karnataka,1,5,1,5,0.676219,0.690067,-24.425039
2,Karnataka,1,5,1,8,0.615304,0.628646,-20.100442
3,Karnataka,1,5,1,16,0.482998,0.491718,-11.909552
4,Karnataka,1,5,1,32,0.037934,0.041694,0.907185


In [None]:
#github_upload(folder_name='Indian-States-Model-Results',file_name='GRU_on_short_data.csv', file_data=df_gru.to_csv())

Indian-States-Model-Results/GRU_on_short_data.csv UPDATED
