In [1]:
import os
import time
import torch
import warnings 
import statistics

import numpy   as np 
import pandas  as pd
from pandas_profiling import ProfileReport

import seaborn as sns
from matplotlib import pyplot as plt

from sklearn.metrics       import mean_squared_log_error
from sklearn.preprocessing import StandardScaler

from tqdm.notebook    import tqdm
from IPython.display  import display_html 

from torch import nn
from torch import optim


Bad key "text.kerning_factor" on line 4 in
c:\users\beche\anaconda3\envs\covid-19-playground\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test_patch.mplstyle.
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.1.3/matplotlibrc.template
or from the matplotlib source distribution


In [2]:
from core.networks  import BasicRecurrentPredictor
from core.nn        import WeightInitializer

from core.data      import compare_countries as cc
from core.data      import utils             as dataUtils

from core.nn        import loss

In [3]:
warnings.filterwarnings('ignore')
sns.set()

torch.manual_seed(123);
torch.cuda.manual_seed(123)
np.random.seed(123)

In [4]:
allData = pd.read_csv('assets/train.csv', parse_dates=['Date'])

In [5]:
allData = dataUtils.preprocess_data(allData)
allData.head()

Unnamed: 0,Id,Province_State,Country_Region,Date,ConfirmedCases,Fatalities
0,1,Afghanistan,Afghanistan,2020-01-22,0.0,0.0
1,2,Afghanistan,Afghanistan,2020-01-23,0.0,0.0
2,3,Afghanistan,Afghanistan,2020-01-24,0.0,0.0
3,4,Afghanistan,Afghanistan,2020-01-25,0.0,0.0
4,5,Afghanistan,Afghanistan,2020-01-26,0.0,0.0


In [6]:
COUNTRY = 'Romania'
WIN_SIZE = 10
OBS_SIZE = 5
TRAIN_UP_TO  = pd.to_datetime('2020-04-07')
DEVICE = 'cpu'

In [7]:
errorData  = cc.get_nearest_sequence(allData, COUNTRY, 500, 20 , loss.l1_norm_error)

In [8]:
confData = dataUtils.get_target_data(allData, errorData, 0.5, COUNTRY, target = 'confirmed')
deadData = dataUtils.get_target_data(allData, errorData, 0.5, COUNTRY, target = 'fatalities')

In [9]:
confScaler = dataUtils.get_scaler(confData, 'confirmed')
deadScaler = dataUtils.get_scaler(deadData, 'fatalities')

In [10]:
confTrainData = dataUtils.get_train_data(confData, 'confirmed', TRAIN_UP_TO, WIN_SIZE, 2, scaler = confScaler)
deadTrainData = dataUtils.get_train_data(deadData, 'fatalities', TRAIN_UP_TO, WIN_SIZE, 2, scaler = deadScaler)

In [11]:
w = WeightInitializer()

## Confirmed

In [12]:
# build the model
confModel = BasicRecurrentPredictor(
            # basic parameters
            chNo        = 1,                  # number of input features
            hidChNo     = 128,                # number of RNN cell hidden dimension
            future      = 0,
    
            # rnn cell type and depth
            rnnCell     = 'LSTMCell',         # RNN cell type (LSTM/GRU/RNN)
            rnnNoCells  = 2,                  # no of RNN cells

            # multi layer perceptron that is applied to the output of rnn
            mlpLayerCfg   = [64,64],           # layer hidden dims
            mlpActiv      = 'PReLU',           # inner activation of the mlp
            dropRate      = None,              # dropout rate for each layer of mlp
            normType      = None,              # normalization type
            mlpActivLast  = None,              # note that every timestamp in the sequence will be activated too
            returnFullSeq = True).build()

In [13]:
w.init_weights(confModel, 'normal_', {})

In [14]:
confTrainData = dataUtils.get_train_data(confData, 'confirmed', 
                                      step       = 2,
                                      winSize    = WIN_SIZE, 
                                      trainLimit = TRAIN_UP_TO, 
                                      scaler     = confScaler,
                                      shuffle    = True)
confTrainData.shape

torch.Size([213, 10, 1])

In [15]:
confLoss  = nn.MSELoss()
confOptim = optim.LBFGS(confModel.parameters(), 
                        lr             = 0.065, 
                        max_iter       = 50, 
                        tolerance_grad = 1e-7, 
                        history_size   = 75)

In [16]:
confModel.to(DEVICE);
confTrainData = confTrainData.to(DEVICE);

In [17]:
def conf_closure():
    confOptim.zero_grad()
    
    future  = WIN_SIZE - OBS_SIZE
    obsData = confTrainData[:,:OBS_SIZE]
    
    out  = confModel(obsData, future = future).reshape(-1,WIN_SIZE, 1)
    loss = confLoss(out, confTrainData)
    loss.backward()
#     nn.utils.clip_grad_value_(model.rnnModel.parameters(), 10e+1)
    
    return loss

In [18]:
for i in tqdm(range(10)):
    print('STEP: ', i)    
    loss = confOptim.step(conf_closure)
    print('Loss: ', loss.item())
    
    if torch.isnan(loss):
        raise ValueError('Loss is NaN')

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

STEP:  0
Loss:  729.4046630859375
STEP:  1
Loss:  0.7863373160362244
STEP:  2
Loss:  0.03704420104622841
STEP:  3
Loss:  0.009488021023571491
STEP:  4
Loss:  0.00649484246969223
STEP:  5
Loss:  0.005023346748203039
STEP:  6
Loss:  0.004035197664052248
STEP:  7
Loss:  0.0038527073338627815
STEP:  8
Loss:  0.003627027617767453
STEP:  9
Loss:  0.0033547335769981146



In [None]:
convValData = dataUtils.get_val_data()

In [19]:
model.eval()

# get figure
fig, ax = plt.subplots(1, 1, figsize = (9, 4))
fig.suptitle(COUNTRY + ' prediction')

# prepera data for prediction
d = confirmedTrain[confirmedTrain['Province_State'] == COUNTRY] 
dPred = confScaler.transform(d['ConfirmedCases'].values.reshape(-1, 1))
dPred = torch.Tensor(dPred).unsqueeze(0).to(DEVICE)

# make prediction
pred   = model(dPred[:,-OBS_SIZE:], future = 50).cpu().detach().numpy()
pred   = confScaler.inverse_transform(pred[0])

# plot prediction
predDate = pd.date_range(start = d['Date'].values[-OBS_SIZE], periods=pred.shape[0])              
sns.lineplot(y = pred, x = predDate, ax = ax)

# plot train data
dPred = confScaler.inverse_transform(dPred[0].cpu())
sns.lineplot(y = dPred[:, 0], x = d['Date'], ax = ax)

# plot validation
valData = confirmedVal[confirmedVal['Province_State'] == COUNTRY]['ConfirmedCases']
valDate = confirmedVal[confirmedVal['Province_State'] == COUNTRY]['Date']
sns.lineplot(y = valData, x =valDate, ax = ax);

ax.legend(['Train', 'Pred', 'Validation'])
ax.grid(True)

NameError: name 'model' is not defined

### Fatalities