In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

## Loading Data (Source: JHU)

In [2]:
confirmed_df  = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')

#recoveries_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

In [3]:
confirmed_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,5/4/20,5/5/20,5/6/20,5/7/20,5/8/20,5/9/20,5/10/20,5/11/20,5/12/20,5/13/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,2894,3224,3392,3563,3778,4033,4402,4687,4963,5226
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,803,820,832,842,850,856,868,872,876,880
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,4648,4838,4997,5182,5369,5558,5723,5891,6067,6253
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,750,751,751,752,752,754,755,755,758,760
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,35,36,36,36,43,43,45,45,45,45


In [4]:
cols = confirmed_df.keys()

confirmed  = confirmed_df.loc[:, cols[4]:]
confirmed.head()

NameError: name 'deaths_df' is not defined

In [None]:
def get_data(country):
    cases = [confirmed_df[confirmed_df['Country/Region']==country][i].sum() for i in dates]
    daily_increase = pd.Series(cases).diff().tolist()
    deaths = [deaths_df[deaths_df['Country/Region']==country][i].sum() for i in dates]
    daily_death = pd.Series(deaths).diff().tolist()
    recoveries  = [recoveries_df[recoveries_df['Country/Region']==country][i].sum() for i in dates]
    daily_recovery = pd.Series(recoveries).diff().tolist()
    return (cases, deaths, recoveries, daily_increase, daily_death, daily_recovery)

def country_plot(x, y1, y2, y3, y4, country):
    plt.figure(figsize=(16, 9))
    plt.plot(x, y1, linewidth=4)
    plt.title('{} Confirmed Cases'.format(country), size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=10)
    plt.xticks(rotation=90)
    plt.yticks(size=20)
    plt.grid(axis='y')
    plt.show()

    plt.figure(figsize=(16, 9))
    plt.bar(x, y2)
    plt.title('{} Daily Increases in Confirmed Cases'.format(country), size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=10)
    plt.xticks(rotation=90)
    plt.yticks(size=20)
    plt.grid(axis='y')
    plt.show()

    plt.figure(figsize=(16, 9))
    plt.bar(x, y3, color='darkgoldenrod')
    plt.title('{} Daily Increases in Deaths'.format(country), size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=10)
    plt.xticks(rotation=90)
    plt.yticks(size=20)
    plt.grid(axis='y')
    plt.show()

    plt.figure(figsize=(16, 9))
    plt.bar(x, y4, color='green')
    plt.title('{} Daily Increases in Recoveries'.format(country), size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=10)
    plt.xticks(rotation=90)
    plt.yticks(size=20)
    plt.grid(axis='y')
    plt.show()

In [None]:
dates = confirmed.keys()
country = 'Philippines'
philippine_cases, philippine_deaths, philippine_recoveries, daily_increase, daily_death,daily_recovery = get_data(country)
country_plot(dates, philippine_cases, daily_increase, daily_death, daily_recovery, country)

In [None]:
np.c_[np.array(range(1,len(dates)+1)), dates, philippine_cases]

## 1. LINEAR REGRESSION: Training on the Whole Data until May 11, 2020

In [None]:
from sklearn.linear_model import LinearRegression

lin_model = LinearRegression()
X = np.linspace(1, len(dates), len(dates)).reshape(-1, 1)
y = np.array(philippine_cases)

lin_model.fit(X, y)

def plot_linear():
    plt.figure(figsize=(14,8))
    plt.scatter(X, y, color='black')
    plt.plot(X, lin_model.predict(X), color='red', linewidth=4)
    plt.title( 'Linear Approximation' )
    plt.xlabel('Days starting: 1/22/20')
    plt.ylabel('PH COVID-19 Cases')
    plt.show()
    return

plot_linear()

## 2. Polynomial: Training on the Whole Data until May 11, 2020

In [None]:
from sklearn.preprocessing import PolynomialFeatures

poly_reg  = PolynomialFeatures(degree=2)
X_poly    = poly_reg.fit_transform(X)
pol_model = LinearRegression()

pol_model.fit(X_poly, y)

pol_predictions = pol_model.predict(poly_reg.fit_transform(X))
 
def plot_polymonial(predictions):
    plt.figure(figsize=(14,8))
    plt.scatter(X, y, color='black')
    plt.plot(X, predictions, color='red', linewidth=4)
    plt.title('Polynomial Approximation')
    plt.xlabel('Days starting: 1/22/20')
    plt.ylabel('PH Covid-19 Cases')
    plt.show()
    return


plot_polymonial(pol_predictions)

## 3. Training on Partial Data until April 30, 2020

In [None]:
np.c_[dates[:100], philippine_cases[:100]]

In [None]:
lin_partial = LinearRegression()
lin_partial.fit(X[:100], y[:100])
lin_predictions = lin_partial.predict(X)

def plot_linear(predictions):
    plt.figure(figsize=(14,8))
    plt.scatter(X, y, color='black')
    plt.plot(X, predictions, color='red', linewidth=4)
    plt.title( 'Linear Approximation' )
    plt.xlabel('Days starting: 1/22/20')
    plt.ylabel('PH COVID-19 Cases')
    plt.show()
    return

plot_linear(lin_predictions)

## 4. Linear Predictions (Cases Forecast)

In [None]:
print('       Date ', 'Actual_Cases', 'Predictions')
np.c_[dates[100:], philippine_cases[100:], np.ceil(lin_predictions[100:])]

### DATE: '5/9/20', ACTUAL: 10610, LINEAR PREDICTION: 6217

In [None]:
poly_reg    = PolynomialFeatures(degree=2)
X_poly      = poly_reg.fit_transform(X[:100])
pol_partial = LinearRegression()

pol_partial.fit(X_poly, y[:100])

pol_predictions = pol_partial.predict(poly_reg.fit_transform(X))
 
plot_polymonial(pol_predictions)

In [None]:
print('       Date ', 'Actual_Cases', 'Predictions')
np.c_[dates[100:], philippine_cases[100:], np.ceil(pol_predictions[100:])]

### DATE: '5/9/20', ACTUAL: 10610, PREDICTION: 10968  (NOT BAD FOR 2nd DEGREE POLYNOMIAL)

In [None]:
poly_reg    = PolynomialFeatures(degree=4)
X_poly      = poly_reg.fit_transform(X[:100])
pol_partial = LinearRegression()

pol_partial.fit(X_poly, y[:100])

pol_predictions = pol_partial.predict(poly_reg.fit_transform(X))
 
plot_polymonial(pol_predictions)

In [None]:
print('       Date ', 'Actual_Cases', 'Predictions')
np.c_[dates[100:], philippine_cases[100:], np.ceil(pol_predictions[100:])]

### DATE: '5/9/20', ACTUAL: 10610, PREDICTION: 10310  (NOT BAD FOR 4th DEGREE POLYNOMIAL)

## 5. BUT What if I USED THE OLD DATA UNTIL APRIL 8 ONLY ???

In [None]:
np.c_[dates[:78], philippine_cases[:78]]

In [None]:
poly_reg    = PolynomialFeatures(degree=2)
X_poly      = poly_reg.fit_transform(X[:78])

pol_partial = LinearRegression()
pol_partial.fit(X_poly, y[:78])
pol_predictions = pol_partial.predict(poly_reg.fit_transform(X))

plot_polymonial(pol_predictions)

In [None]:
print('       Date ', 'Actual_Cases', 'Predictions')
np.c_[dates[78:], philippine_cases[78:], np.ceil(pol_predictions[78:])]

### DATE: '5/9/20', ACTUAL: 10610, PREDICTION: 8310  (BAD PREDICTION)

## 6. How about selected data that are linear?

In [None]:
lin_partial = LinearRegression()
lin_partial.fit(X[65:100], y[65:100])
lin_predictions = lin_partial.predict(X[65:])

def plot_linear(predictions):
    plt.figure(figsize=(14,8))
    plt.scatter(X[65:], y[65:], color='black')
    plt.plot(X[65:], predictions, color='red', linewidth=4)
    plt.title( 'Linear Approximation' )
    plt.xlabel('Day 65 from 1/22/20 or Starting from 3/27/20')
    plt.ylabel('PH COVID-19 Cases')
    plt.show()
    return

plot_linear(lin_predictions)

In [None]:
print('       Date ', 'Actual_Cases', 'Predictions')
np.c_[dates[65:], philippine_cases[65:], np.ceil(lin_predictions)]

### DATE: '5/9/20', ACTUAL: 10610, PREDICTION: 10486  (GOOD PREDICTION!)

## 7. Support Vector Regression (https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html)

In [None]:
from sklearn.svm import SVR

svm_model = SVR(shrinking=True, kernel='poly',gamma=0.01, epsilon=1,degree=3, C=0.1)
svm_model.fit(X[:100], y[:100])
svm_predictions = svm_model.predict(X)


def plot_linear(predictions):
    plt.figure(figsize=(14,8))
    plt.scatter(X, y, color='black')
    plt.plot(X, predictions, color='red', linewidth=4)
    plt.title( 'SVM Regression (Train data: Until April 30, 2020 ONLY)')
    plt.xlabel('Days from 1/22/20')
    plt.ylabel('PH COVID-19 Cases')
    plt.show()
    return


plot_linear(svm_predictions)

In [None]:
print('       Date ', 'Actual_Cases', 'Predictions')
np.c_[dates[100:], philippine_cases[100:], np.ceil(svm_predictions[100:])]

## 8. Multi-layer Perceptron regressor (https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html)

In [None]:
from sklearn.neural_network import MLPRegressor


mlp_model = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=1000,
                    learning_rate='adaptive', random_state=np.random.seed(23))
mlp_model.fit(X[:100], y[:100])
mlp_predictions = mlp_model.predict(X)


plt.figure(figsize=(14,8))
plt.scatter(X, y, color='black', label= 'Actual Data')
plt.plot(X, mlp_predictions, 'r-', linewidth=3, label = 'Prediction')
plt.title( 'MLP Regression (Train data: Until April 30, 2020 ONLY)' )
plt.xlabel('Days from 1/22/20')
plt.ylabel('PH COVID-19 Cases')
plt.legend()
plt.show()


In [None]:
print('       Date ', 'Actual_Cases', 'Predictions')
np.c_[dates[100:], philippine_cases[100:], np.ceil(mlp_predictions[100:])]