In [1]:
import numpy as np
import pandas as pd
import os, sys
from sklearn.neural_network import MLPRegressor
from sklearn.datasets import make_regression

# read COVID data of the first 3 months
data =  pd.read_csv('Downloads/covid_19_clean_complete.csv', delim_whitespace=False)
data = data[:17892]

In [2]:
# transform the 'time' attribute
data['Date']= pd.to_datetime(data['Date']) 
def create_time_features(df):
    df['date'] = df.index
    df['hour'] = df['date'].dt.hour
    df['dayofweek'] = df['date'].dt.dayofweek
    df['quarter'] = df['date'].dt.quarter
    df['month'] = df['date'].dt.month
    df['year'] = df['date'].dt.year
    df['dayofyear'] = df['date'].dt.dayofyear
    df['dayofmonth'] = df['date'].dt.day
    df['weekofyear'] = df['date'].dt.isocalendar().week
    
    X = df[['hour','dayofweek','quarter','month','year',
           'dayofyear','dayofmonth','weekofyear']]
    return X

In [3]:
data = data.set_index(['Date'])
create_time_features(data).head()


Unnamed: 0_level_0,hour,dayofweek,quarter,month,year,dayofyear,dayofmonth,weekofyear
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-22,0,2,1,1,2020,22,22,4
2020-01-22,0,2,1,1,2020,22,22,4
2020-01-22,0,2,1,1,2020,22,22,4
2020-01-22,0,2,1,1,2020,22,22,4
2020-01-22,0,2,1,1,2020,22,22,4


In [4]:
# encode the label
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
def FunLabelEncoder(df):
    for c in df.columns:
        if df.dtypes[c] == object:
            le.fit(df[c].astype(str))
            df[c] = le.transform(df[c].astype(str))
    return df

In [5]:
data = FunLabelEncoder(data)


In [6]:
# data preview
data

Unnamed: 0_level_0,Province/State,Country/Region,Lat,Long,Confirmed,Deaths,Recovered,Active,WHO Region,date,hour,dayofweek,quarter,month,year,dayofyear,dayofmonth,weekofyear
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2020-01-22,78,0,33.939110,67.709953,0,0,0,0,2,2020-01-22,0,2,1,1,2020,22,22,4
2020-01-22,78,1,41.153300,20.168300,0,0,0,0,3,2020-01-22,0,2,1,1,2020,22,22,4
2020-01-22,78,2,28.033900,1.659600,0,0,0,0,0,2020-01-22,0,2,1,1,2020,22,22,4
2020-01-22,78,3,42.506300,1.521800,0,0,0,0,3,2020-01-22,0,2,1,1,2020,22,22,4
2020-01-22,78,4,-11.202700,17.873900,0,0,0,0,0,2020-01-22,0,2,1,1,2020,22,22,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-03-30,78,89,48.019600,66.923700,302,1,21,280,3,2020-03-30,0,0,1,3,2020,90,30,14
2020-03-30,78,90,-0.023600,37.906200,50,1,1,48,0,2020-03-30,0,0,1,3,2020,90,30,14
2020-03-30,78,155,35.907757,127.766922,9661,158,5228,4275,5,2020-03-30,0,0,1,3,2020,90,30,14
2020-03-30,78,92,29.311660,47.481766,266,0,72,194,2,2020-03-30,0,0,1,3,2020,90,30,14


In [7]:
# get X
x = data[['Country/Region','Lat','Long','month','dayofyear','dayofmonth','weekofyear']].copy()
# x = data[['Country/Region','Lat','Long']]
x

Unnamed: 0_level_0,Country/Region,Lat,Long,month,dayofyear,dayofmonth,weekofyear
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-22,0,33.939110,67.709953,1,22,22,4
2020-01-22,1,41.153300,20.168300,1,22,22,4
2020-01-22,2,28.033900,1.659600,1,22,22,4
2020-01-22,3,42.506300,1.521800,1,22,22,4
2020-01-22,4,-11.202700,17.873900,1,22,22,4
...,...,...,...,...,...,...,...
2020-03-30,89,48.019600,66.923700,3,90,30,14
2020-03-30,90,-0.023600,37.906200,3,90,30,14
2020-03-30,155,35.907757,127.766922,3,90,30,14
2020-03-30,92,29.311660,47.481766,3,90,30,14


In [9]:
# get prediction target y1: the Confirmed cases
y1 = data[['Confirmed']].copy()
y1

Unnamed: 0_level_0,Confirmed
Date,Unnamed: 1_level_1
2020-01-22,0
2020-01-22,0
2020-01-22,0
2020-01-22,0
2020-01-22,0
...,...
2020-03-30,302
2020-03-30,50
2020-03-30,9661
2020-03-30,266


In [10]:
# get prediction target y1: the Deaths cases
y2 = data[['Deaths']].copy()
y2

Unnamed: 0_level_0,Deaths
Date,Unnamed: 1_level_1
2020-01-22,0
2020-01-22,0
2020-01-22,0
2020-01-22,0
2020-01-22,0
...,...
2020-03-30,1
2020-03-30,1
2020-03-30,158
2020-03-30,0


In [11]:
# split data into training and testing data
x_train, x_test, y_train, y_test = x[:13419], x[13419:], y1[:13419], y1[13419:]

In [12]:
# normalizing data before training
from sklearn import preprocessing
scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)


In [17]:
# A simple baseline: the Multi-layer Perceptron (MLP)
# Case 1: 'Confirmed' prediction

from sklearn.neural_network import MLPRegressor
regr = MLPRegressor(hidden_layer_sizes=(100,), learning_rate = 'adaptive', 
                    learning_rate_init=0.01,
                    tol=1e-6, epsilon=1e-08, max_iter=20000).fit(x_train, np.array(y_train))
training_score = regr.score(x_train, y_train)
testing_score = regr.score(x_test, y_test)
print("Confrimed prediction: ", testing_score)


Confrimed prediction:  0.3602082396608287


In [18]:
# MLP
# Case 2: 'Deaths' prediction

x_train, x_test, y_train, y_test = x[:13419], x[13419:], y2[:13419], y2[13419:]
from sklearn.neural_network import MLPRegressor
regr = MLPRegressor(hidden_layer_sizes=(100,), learning_rate = 'adaptive', 
                    learning_rate_init=0.01,
                    tol=1e-6, epsilon=1e-08, max_iter=20000).fit(x_train, np.array(y_train))
regr.score(x_train, y_train)
testing_score = regr.score(x_test, y_test)
print("Death prediction: ", testing_score)


Death prediction:  0.421104680880085


In [70]:
# Deeper MLP with increased number depth and width
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import r2_score


class Deeper_MLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()

        self.input_fc = nn.Linear(input_dim, 250)
        self.hidden_fc = nn.Linear(250, 100)
        self.hidden_fc = nn.Linear(100, 50)
        self.hidden_fc = nn.Linear(50, 10)
        self.output_fc = nn.Linear(10, 1) # the number of output is 1 as we are doing regression, not classification here

    def forward(self, x):
        batch_size = x.shape[0]
        x = x.view(batch_size, -1)
        h_1 = F.relu(self.input_fc(x))
        h_2 = F.relu(self.hidden_fc(h_1))
        y_pred = self.output_fc(h_2)

        return y_pred

x_train, x_test, y_train, y_test = x[:13419], x[13419:], y1[:13419], y1[13419:] # confirmed cases
x_train = torch.tensor(x_train.values.astype(np.float32))
model = Deeper_MLP(x_train.shape[0])

# training
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.005)

for epoch in range(0, 200):
    model.train()
    optimizer.zero_grad()
    outputs = model(x_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

# testing
model.eval()
pred = model(x_test)
print("mse loss of predicting the Confirmed case: ", r2_score(pred, y_test))

###############Death Case######################

x_train, x_test, y_train, y_test = x[:13419], x[13419:], y2[:13419], y2[13419:] # death cases
x_train = torch.tensor(x_train.values.astype(np.float32))
model = Deeper_MLP(x_train.shape[0])

# training
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.005)

for epoch in range(0, 200):
    model.train()
    optimizer.zero_grad()
    outputs = model(x_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

# testing
model.eval()
pred = model(x_test)
print("mse loss of predicting the Death case: ", r2_score(pred, y_test))




mse loss of predicting the Confirmed case:  0.3691378395614254
mse loss of predicting the Deaths case:  0.4311231537232233


In [54]:
# DNN Model: ResNet
'''ResNet Regression in Tensorflow

Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition. arXiv:1512.03385
'''
from tensorflow.keras import layers,models
from tensorflow.keras import callbacks
from keras.utils.vis_utils import plot_model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime
from sklearn.metrics import r2_score

def identity_block(input_tensor,units):
    x = layers.Dense(units)(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    x = layers.Dense(units)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    x = layers.Dense(units)(x)
    x = layers.BatchNormalization()(x)

    x = layers.add([x, input_tensor])
    x = layers.Activation('relu')(x)

    return x

def dens_block(input_tensor,units):
    x = layers.Dense(units)(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    x = layers.Dense(units)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    x = layers.Dense(units)(x)
    x = layers.BatchNormalization()(x)

    shortcut = layers.Dense(units)(input_tensor)
    shortcut = layers.BatchNormalization()(shortcut)

    x = layers.add([x, shortcut])
    x = layers.Activation('relu')(x)
    return x


def ResNetRegression():
    Res_input = layers.Input(shape=(7,))

    width = 16

    x = dens_block(Res_input,width)
    x = identity_block(x,width)
    x = identity_block(x,width)

    x = dens_block(x,width)
    x = identity_block(x,width)
    x = identity_block(x,width)
    
    x = dens_block(x,width)
    x = identity_block(x,width)
    x = identity_block(x,width)

    x = layers.BatchNormalization()(x)
    x = layers.Dense(1, activation='linear')(x)
    model = models.Model(inputs=Res_input, outputs=x)
    return model

model = ResNetRegression()
optimizer = keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='mse', optimizer=optimizer, metrics=['mse'])
model.summary()
x_train, x_test, y_train, y_test = x[:13419], x[13419:], y1[:13419], y1[13419:] # confirmed cases
train = model.fit(x_train, y_train, epochs=500, batch_size=64, verbose=2, 
                  callbacks=[callbacks.EarlyStopping(monitor='val_loss', patience=10,verbose=2, mode='auto')], 
                  validation_split=0.1)

pred = model.predict(x_test)

# use r2 score for evaluation consistency
print("mse loss of predicting the Confirmed case: ", r2_score(pred, y_test))


x_train, x_test, y_train, y_test = x[:13419], x[13419:], y2[:13419], y2[13419:] # death cases
train = model.fit(x_train, y_train, epochs=30, batch_size=64,
                  callbacks=[callbacks.EarlyStopping(monitor='val_loss', patience=5, mode='auto')], 
                  validation_split=0.1)

pred = model.predict(x_test)
# use r2 score for evaluation consistency
print("mse loss of predicting the Confirmed case: ", r2_score(pred, y_test))



mse loss of predicting the Confirmed case:  0.3794172395607263
mse loss of predicting the Deaths case:  0.4562003173829201
