In [77]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as seabornInstance 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from datetime import datetime
from datetime import timedelta

In [78]:
def get_dataset(url, country, recent_days_count):
    raw_dataset = pd.read_csv(url)
    df = raw_dataset[(raw_dataset['Country/Region']==country)]
    df = df.drop(columns=['Lat', 'Long'])
    df1 = df.melt(['Province/State', 'Country/Region'], var_name='Date', value_name='Cases')
    df1.insert(3, 'Day', range(1, 1 + len(df1)))
    dataset_all = df1.drop(columns=['Province/State', 'Country/Region'])    
    dataset = dataset_all.tail(recent_days_count)
    return dataset

In [79]:
def get_model(dataset):
    X = dataset['Day'].values.reshape(-1,1)
    y = dataset['Cases'].values.reshape(-1,1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
    regressor = LinearRegression()
    regressor.fit(X_train, y_train) #training the algorithm    
    y_pred = regressor.predict(X_test)
    print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
    print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
    print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
#     plot_training_data(X_train, y_train, regressor)
#     plot_test_data(X_test, y_test, y_pred)
    return regressor


In [80]:
def plot_training_data(X_train, y_train, regressor):
    y_train_pred = regressor.predict(X_train)
    plt.scatter(X_train, y_train,  color='gray')
    plt.plot(X_train, y_train_pred, color='red', linewidth=2)
    plt.show()
    return

In [81]:
def plot_test_data(X_test, y_test, y_pred):
    plt.scatter(X_test, y_test,  color='gray')
    plt.plot(X_test, y_pred, color='red', linewidth=2)
    plt.show()
    return

In [82]:
def get_next_days(dataset, prediction_duration):
    next_days = []
    next_days.append(dataset.tail(1)['Day'].values[0] + 1)
    for i in range(1, prediction_duration):
        next_days.append(next_days[-1] + 1)
    return next_days   


In [83]:
def get_next_dates(dataset, prediction_duration):
    next_dates = []
    next_dates.append(datetime.strftime(datetime.strptime(dataset.tail(1)['Date'].values[0] , '%m/%d/%y') + timedelta(days=1), '%m/%d/%Y'))
    for i in range(1, prediction_duration):
        next_dates.append(datetime.strftime(datetime.strptime(next_dates[-1], '%m/%d/%Y') + timedelta(days=1), '%m/%d/%Y'))
    return next_dates

In [84]:
def get_next_day_predictions(next_days, regressor):
    next_day_predictions = []
    for i in range(0, len(next_days)):
        next_day_predictions.append(int(regressor.predict([[next_days[i]]])[0,0]))
    return next_day_predictions


In [85]:
def perform_predictions(url, country, recent_days_count, prediction_duration):
    dataset = get_dataset(url, country, recent_days_count)
    regressor = get_model(dataset)
    next_days = get_next_days(dataset, prediction_duration)
    next_dates = get_next_dates(dataset, prediction_duration)
    next_day_predictions = get_next_day_predictions(next_days, regressor)
    return next_dates, next_day_predictions


In [86]:
def result_dataframe(country, next_dates, confirmed_cases_predictions, death_cases_predictions):
    d = {
        'Country': country, 
        'Date': next_dates, 
        'Confirmed_Cases_Predications': confirmed_cases_predictions,
        'Death_Cases_Predications': death_cases_predictions 
    }

    df = pd.DataFrame(d)
    df.style.set_properties(**{'text-align': 'left'})
    return df

In [87]:
confirmed_case_url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
death_case_url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"

In [89]:
## Prediction Model for India
country="India"

confirmed_recent_days=5
death_recent_days_count=5
prediction_duration=5

confirmed_cases_next_dates, confirmed_cases_predictions = perform_predictions(confirmed_case_url, country, confirmed_recent_days, prediction_duration)

death_cases_next_dates, death_cases_predictions = perform_predictions(death_case_url, country, death_recent_days_count, prediction_duration)

result_dataframe(country, confirmed_cases_next_dates, confirmed_cases_predictions, death_cases_predictions)


Mean Absolute Error: 287.0
Mean Squared Error: 82369.0
Root Mean Squared Error: 287.0
Mean Absolute Error: 8.75
Mean Squared Error: 76.5625
Root Mean Squared Error: 8.75


Unnamed: 0,Country,Date,Confirmed_Cases_Predications,Death_Cases_Predications
0,India,04/07/2020,5016,141
1,India,04/08/2020,5565,156
2,India,04/09/2020,6114,172
3,India,04/10/2020,6663,187
4,India,04/11/2020,7212,203


In [92]:
## Prediction Model for US
country="US"

confirmed_recent_days=8
death_recent_days_count=8
prediction_duration=5

confirmed_cases_next_dates, confirmed_cases_predictions = perform_predictions(confirmed_case_url, country, confirmed_recent_days, prediction_duration)

death_cases_next_dates, death_cases_predictions = perform_predictions(death_case_url, country, death_recent_days_count, prediction_duration)

result_dataframe(country, confirmed_cases_next_dates, confirmed_cases_predictions, death_cases_predictions)


Mean Absolute Error: 2861.019999999902
Mean Squared Error: 11625199.156000426
Root Mean Squared Error: 3409.574629774281
Mean Absolute Error: 190.90000000000146
Mean Squared Error: 41026.10000000114
Root Mean Squared Error: 202.54900641573423


Unnamed: 0,Country,Date,Confirmed_Cases_Predications,Death_Cases_Predications
0,US,04/07/2020,395054,11735
1,US,04/08/2020,424549,12855
2,US,04/09/2020,454043,13976
3,US,04/10/2020,483538,15096
4,US,04/11/2020,513032,16216
