# Import Package

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn import metrics
from sklearn.metrics import r2_score

# from sklearn.externals import joblib
import joblib

import pickle
import uuid



# Data

## Download Data

In [2]:
import os.path
if not os.path.isfile('Covid_Data.zip'):
  !gdown --id 1oPQDuqWnH9v72qJiiyqI5LXhdV0yGaPE
  !unzip 'Covid_Data.zip'


Downloading...
From: https://drive.google.com/uc?id=1oPQDuqWnH9v72qJiiyqI5LXhdV0yGaPE
To: /content/Covid_Data.zip
100% 30.3k/30.3k [00:00<00:00, 26.8MB/s]
Archive:  Covid_Data.zip
   creating: Input Data/
  inflating: Input Data/time_series_19-covid-Confirmed.csv  
  inflating: Input Data/time_series_19-covid-Deaths.csv  
  inflating: Input Data/time_series_19-covid-Recovered.csv  


In [3]:
descriptor = ("d", "r", "c")
filenames = [r'time_series_19-covid-Deaths.csv',
             r'time_series_19-covid-Recovered.csv',
             r'time_series_19-covid-Confirmed.csv']

print(zip(descriptor, filenames))



<zip object at 0x7fbd55221040>


## Load Data & Feature Enginering;dgk

In [4]:
dfs = []
DATAs = []
for descriptor_, fname in zip(descriptor, filenames):
    print("Working on:",'Input Data/'+ fname)
    df = pd.read_csv('Input Data/'+ fname)
    df.drop(columns=["Province/State","Country/Region"], inplace=True)
    DATA = np.array((0,0,0,0))

    
    for i, j in df.iterrows():
        # print(i, j)
        latitude = j['Lat']
        longitude = j['Long']
        
        for k,l in j.iteritems():
            if k=='Lat':
                continue
            if k=='Long':
                continue
            date = datetime.strptime(k, '%m/%d/%y')
            day = date - date.strptime("01/22/20", '%m/%d/%y')
            days = day.days
            #print(days)
            temp = np.array([j["Lat"], j['Long'], days, l])
            #print(temp)
            DATA = np.vstack((DATA,temp))  
            
    DATA = np.delete(DATA, 0,0)
    np.random.shuffle(DATA)
    
    dfs.append(df)
    DATAs.append(DATA)


Working on: Input Data/time_series_19-covid-Deaths.csv
Working on: Input Data/time_series_19-covid-Recovered.csv
Working on: Input Data/time_series_19-covid-Confirmed.csv


In [5]:
dfs[0]

Unnamed: 0,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20
0,15.0000,101.0000,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
1,36.0000,138.0000,0,0,0,0,0,0,0,0,...,6,6,6,6,6,6,10,10,15,16
2,1.2833,103.8333,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,28.1667,84.2500,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2.5000,112.5000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401,42.5922,-83.3362,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
402,42.2791,-83.3362,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
403,39.5393,-75.6674,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
404,22.0000,-80.0000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
DATAs

[array([[ 30.3883, -95.6963,  23.    ,   0.    ],
        [ 38.0606, -84.4803,   0.    ,   0.    ],
        [ 43.6632, -96.8351,   0.    ,   0.    ],
        ...,
        [ 43.3266, -84.5361,   1.    ,   0.    ],
        [ 26.8946, -81.9098,   1.    ,   0.    ],
        [ 39.549 , 116.1306,  23.    ,   3.    ]]),
 array([[  28.1667,   84.25  ,   34.    ,    1.    ],
        [  23.7   ,  121.    ,   48.    ,   17.    ],
        [  41.1489,  -73.983 ,    9.    ,    0.    ],
        ...,
        [  45.547 , -123.1386,   50.    ,    0.    ],
        [  28.0339,    1.6596,   45.    ,    0.    ],
        [ -35.6751,  -71.543 ,   41.    ,    0.    ]]),
 array([[ 40.3888, -82.7649,  29.    ,   0.    ],
        [ 42.6712, -97.8722,  23.    ,   0.    ],
        [ 37.5777, 112.2922,  37.    , 133.    ],
        ...,
        [ 29.028 , -81.0755,   7.    ,   0.    ],
        [ 11.55  , 104.9167,   1.    ,   0.    ],
        [ 40.3888, -82.7649,  18.    ,   0.    ]])]

In [7]:
DATAs[0]

array([[ 30.3883, -95.6963,  23.    ,   0.    ],
       [ 38.0606, -84.4803,   0.    ,   0.    ],
       [ 43.6632, -96.8351,   0.    ,   0.    ],
       ...,
       [ 43.3266, -84.5361,   1.    ,   0.    ],
       [ 26.8946, -81.9098,   1.    ,   0.    ],
       [ 39.549 , 116.1306,  23.    ,   3.    ]])

# Models

## Confirmed

In [56]:
DATA = DATAs[2]
input_data = DATA[:,:-1]
print(input_data.shape)
output_data = DATA[:, -1]
print(output_data.shape)
input_train, input_test, output_train, output_test = train_test_split(input_data, output_data)

(20706, 3)
(20706,)


In [57]:
# Importing necessary libraries
from keras.models import Sequential
from keras.layers import Dense
from keras import callbacks
import tensorflow as tf

In [58]:
X_train, X_test, y_train, y_test = input_train, input_test, output_train, output_test

In [59]:
model = Sequential()

model.add(Dense(4, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='relu'))


In [60]:
#Early stopping
early_stopping = callbacks.EarlyStopping(
    min_delta=0.001,          # minimium amount of change to be considered an improvement
    patience=20,              # number of epochs to wait before deciding there is no significant improvement in training
    restore_best_weights=True,
)

In [61]:
from sklearn.metrics import r2_score

In [62]:
model.compile(
    loss='mean_squared_error', 
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    )

In [63]:
history = model.fit(X_train, y_train, 
                    batch_size = 1024, 
                    epochs = 100000, 
                    validation_split=0.2,
                    callbacks=[early_stopping]
                    )

Epoch 1/100000
Epoch 2/100000
Epoch 3/100000
Epoch 4/100000
Epoch 5/100000
Epoch 6/100000
Epoch 7/100000
Epoch 8/100000
Epoch 9/100000
Epoch 10/100000
Epoch 11/100000
Epoch 12/100000
Epoch 13/100000
Epoch 14/100000
Epoch 15/100000
Epoch 16/100000
Epoch 17/100000
Epoch 18/100000
Epoch 19/100000
Epoch 20/100000
Epoch 21/100000


In [64]:
predictions = model.predict(X_test)
print('r2-score is', r2_score(y_pred=predictions, y_true=y_test))

r2-score is -0.0036864656157711284


In [65]:
predictions

array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)

In [66]:
# clf = MLPRegressor(hidden_layer_sizes = (4,4,4,4),
#                    activation='relu',
#                    solver='lbfgs',
#                    learning_rate='constant',
#                    learning_rate_init=0.1,
#                    alpha=0.0001,
#                    max_iter=10000000)
# clf.fit(input_train, output_train)

In [67]:
# clf.score(input_train, output_train)


In [68]:
# clf.score(input_test, output_test)

## Recovered

In [None]:
DATA = DATAs[1]
input_data = DATA[:,:-1]
print(input_data.shape)
output_data = DATA[:, -1]
print(output_data.shape)
input_train, input_test, output_train, output_test = train_test_split(input_data, output_data)

(20706, 3)
(20706,)


In [None]:
clf = MLPRegressor(hidden_layer_sizes = (4,4,4,4),
                   activation='relu',
                   solver='lbfgs',
                   learning_rate='constant',
                   learning_rate_init=0.5,
                   alpha=0.001,
                   max_iter=1000000000)
clf.fit(input_train, output_train)

MLPRegressor(alpha=0.001, hidden_layer_sizes=(4, 4, 4, 4),
             learning_rate_init=0.5, max_iter=1000000000, solver='lbfgs')

In [None]:
clf.score(input_test, output_test)

0.052095222238248806


## Dead

In [None]:
DATA = DATAs[0]
input_data = DATA[:,:-1]
print(input_data.shape)
output_data = DATA[:, -1]
print(output_data.shape)
input_train, input_test, output_train, output_test = train_test_split(input_data, output_data)

(20706, 3)
(20706,)


In [None]:
clf = MLPRegressor(hidden_layer_sizes = (4,4,4,4),
                   activation='relu',
                   solver='lbfgs',
                   learning_rate='adaptive',
                   learning_rate_init=0.01,
                   alpha=0.01,
                   max_iter=1000000000)
clf.fit(input_train, output_train)

MLPRegressor(alpha=0.01, hidden_layer_sizes=(4, 4, 4, 4),
             learning_rate='adaptive', learning_rate_init=0.01,
             max_iter=1000000000, solver='lbfgs')

In [None]:
clf.score(input_test, output_test)

-0.049701352027102574