<h1>This notebook tracks the spread of the novel coronavirus, also known as SARS-CoV-2. It is a contagious respiratory virus that first started in Wuhan in December 2019. On 2/11/2020, the disease is officially named COVID-19 by the World Health Organization. 
    <br>Data: <a href='https://github.com/CSSEGISandData/COVID-19'>https://github.com/CSSEGISandData/COVID-19</a>. A big thank you to Johns Hopkins for providing the data.
    <br>
    <br>Learn more from the <a href='https://www.who.int/emergencies/diseases/novel-coronavirus-2019'>WHO</a>
    <br>Learn more from the <a href='https://www.cdc.gov/coronavirus/2019-ncov'>CDC</a>
    <br>Map Visualizations from  <a href='https://gisanddata.maps.arcgis.com/apps/opsdashboard/index.html#/bda7594740fd40299423467b48e9ecf6'>Johns Hopkins</a>
    <br>Feel free to provide me with feedbacks. 
    <br> Last update: 3/19/2020 8:45 PM
    <br> Make sure you run the notebook to see the graphs better. Some diagrams are hard to see in the default view. 
    <br> <a href='https://github.com/therealcyberlord'>Source code is on my Github</a>
    
</h1>

<center><img src='https://newsfortomorrow.com/wp-content/uploads/2020/01/1578562454_Wuhan-pneumonia-New-coronavirus-related-to-SARS-idenitified.jpg'>
* Source: https://newsfortomorrow.com/wp-content/uploads/2020/01/1578562454_Wuhan-pneumonia-New-coronavirus-related-to-SARS-idenitified.jpg </center>

<h1>Keep strong, world! Stay safe everyone.</h1>







In [None]:
import numpy as np 
import matplotlib.pyplot as plt 
import matplotlib.colors as mcolors
import pandas as pd 
import random
import math
import time
from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeCV
from sklearn.compose import TransformedTargetRegressor
from sklearn.preprocessing import QuantileTransformer, quantile_transform
import datetime
import operator
plt.style.use('seaborn')
%matplotlib inline 

Import the data (make sure you update this on a daily basis)

In [None]:
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
cols = confirmed_df.keys()

In [None]:
confirmed = confirmed_df.loc[:, cols[4]:cols[-1]]
confirmed_differ = confirmed.diff()
confirmed_china = confirmed_df[confirmed_df ['Country/Region']== 'China'].loc[:, cols[4]:cols[-1]]
confirmed_china_differ = confirmed_china.diff()
confirmed_US = confirmed_df[confirmed_df ['Country/Region']== 'US'].loc[:, cols[4]:cols[-1]]
confirmed_US_differ = confirmed_US.diff()
confirmed_Korea = confirmed_df[confirmed_df ['Country/Region']== 'Korea, South'].loc[:, cols[4]:cols[-1]]
confirmed_Korea_differ = confirmed_Korea.diff()
confirmed_Italy = confirmed_df[confirmed_df ['Country/Region']== 'Italy'].loc[:, cols[4]:cols[-1]]
confirmed_Italy_differ = confirmed_Italy.diff()
confirmed_california = confirmed_df[confirmed_df ['Province/State']== 'California'].loc[:, cols[4]:cols[-1]]
confirmed_newyork = confirmed_df[confirmed_df ['Province/State']== 'New York'].loc[:, cols[4]:cols[-1]]
confirmed_newjersey = confirmed_df[confirmed_df ['Province/State']== 'New Jersey'].loc[:, cols[4]:cols[-1]]
confirmed_washington = confirmed_df[confirmed_df ['Province/State']== 'Washington'].loc[:, cols[4]:cols[-1]]

In [None]:
print(confirmed_US)
print(confirmed_newyork )

In [None]:
dates = confirmed.keys()
world_cases = []
world_cases_diff = []
china_case = []
china_case_diff = []
california_cases = []
california_cases_diff = []
newyork_cases = []
newyork_cases_diff = []
newjersey_cases = []
newjersey_cases_diff = []
washington_cases = []
washington_cases_diff = []
us_cases = []
us_cases_diff = []
korea_cases = []
korea_cases_diff = []
itlay_cases = []
itlay_cases_diff = []
total_deaths = [] 
mortality_rate = []
recovery_rate = [] 
total_recovered = [] 
total_active = [] 
confirmed_prev = 0
china_prev = 0
us_prev = 0
korea_prev = 0
itlay_prev = 0
california_prev = 0
newyork_prev = 0
newjersey_prev = 0
washington_prev = 0
for i in dates:
    confirmed_sum = confirmed[i].sum()
    confirmed_china_sum = confirmed_china[i].sum()
    confirmed_US_sum = confirmed_US[i].sum()
    confirmed_Korea_sum = confirmed_Korea[i].sum()
    confirmed_Italy_sum =  confirmed_Italy[i].sum()
    confirmed_california_sum =  confirmed_california[i].sum()
    confirmed_newyork_sum =  confirmed_newyork[i].sum()
    confirmed_newjersey_sum =  confirmed_newjersey[i].sum()
    confirmed_washington_sum =  confirmed_washington[i].sum()
    china_case.append(confirmed_china_sum)
    china_case_diff.append(confirmed_china_sum - china_prev)
    us_cases.append(confirmed_US_sum)
    us_cases_diff.append(confirmed_US_sum - us_prev)
    california_cases.append(confirmed_california_sum)
    california_cases_diff.append(confirmed_california_sum - california_prev)
    newyork_cases.append(confirmed_newyork_sum)
    newyork_cases_diff.append(confirmed_newyork_sum - newyork_prev)
    newjersey_cases.append(confirmed_newjersey_sum)
    newjersey_cases_diff.append(confirmed_newjersey_sum - newjersey_prev)
    washington_cases.append(confirmed_washington_sum)
    washington_cases_diff.append(confirmed_washington_sum - washington_prev)
    korea_cases.append(confirmed_Korea_sum)
    korea_cases_diff.append(confirmed_Korea_sum - korea_prev)
    itlay_cases.append(confirmed_Italy_sum)
    itlay_cases_diff.append(confirmed_Italy_sum - itlay_prev )
    world_cases.append(confirmed_sum)
    world_cases_diff.append(confirmed_sum - confirmed_prev)
    confirmed_prev = confirmed_sum
    california_prev = confirmed_california_sum
    newyork_prev = confirmed_newyork_sum
    newjersey_prev = confirmed_newjersey_sum
    washington_prev = confirmed_washington_sum
    china_prev = confirmed_china_sum
    us_prev = confirmed_US_sum
    korea_prev = confirmed_Korea_sum
    itlay_prev = confirmed_Italy_sum

In [None]:
days_since_1_22 = np.array([i for i in range(len(dates))]).reshape(-1, 1)
world_cases = np.array(world_cases).reshape(-1, 1)
total_deaths = np.array(total_deaths).reshape(-1, 1)
total_recovered = np.array(total_recovered).reshape(-1, 1)

Future forcasting

In [None]:
days_in_future = 20
future_forcast = np.array([i for i in range(len(dates)+days_in_future)]).reshape(-1, 1)
adjusted_dates = future_forcast[:-20]

Convert integer into datetime for better visualization

In [None]:
start = '1/22/2020'
start_date = datetime.datetime.strptime(start, '%m/%d/%Y')
future_forcast_dates = []
for i in range(len(future_forcast)):
    future_forcast_dates.append((start_date + datetime.timedelta(days=i)).strftime('%m/%d/%Y'))

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], world_cases[:])
plt.title('No of Coronavirus Cases Worldwide Over Time', size=30)
plt.xlabel('Days since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], world_cases_diff[:])
plt.title('No of New Coronavirus Cases Worldwide Over Time', size=30)
plt.xlabel('Days since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], china_case[:])
plt.title('No of Coronavirus Cases in China Over Time', size=30)
plt.xlabel('Days since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], china_case_diff[:])
plt.title('No of New Coronavirus Cases in China Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], korea_cases[:])
plt.title('No of Coronavirus Cases in South Korea Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], korea_cases_diff[:])
plt.title('No of New Coronavirus Cases in South Korea Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], us_cases[:])
plt.title('# of Coronavirus Cases in US Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], us_cases[:])
plt.title('No of Coronavirus Cases in US Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], us_cases_diff[:])
plt.title('No of New Coronavirus Cases in US Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:

plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], itlay_cases[:])
plt.title('No of Coronavirus Cases in Italy Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:

plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], itlay_cases_diff[:])
plt.title('No of New Coronavirus Cases in Italy Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], california_cases[:])
plt.title('No of Coronavirus Cases in California Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], california_cases_diff[:])
plt.title('No of New Coronavirus Cases in California Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], newyork_cases[:])
plt.title('No of Coronavirus Cases in Newyork Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], newyork_cases_diff[:])
plt.title('No of New Coronavirus Cases in Newyork Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], newjersey_cases[:])
plt.title('No of Coronavirus Cases in Newjersey Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], newjersey_cases_diff[:])
plt.title('No of New Coronavirus Cases in Newjersey Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], washington_cases[:])
plt.title('No of Coronavirus Cases in Washington Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], washington_cases_diff[:])
plt.title('No of New Coronavirus Cases in Washington Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[:40], world_cases[:40], test_size=0.15, shuffle=False) 
svm_confirmed = SVR(kernel='poly')
svm_confirmed.fit(X_train_confirmed, y_train_confirmed)
svm_pred = svm_confirmed.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:-1], world_cases[:-1])
plt.plot(future_forcast, svm_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases Worldwide Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'SVM predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, world_cases, test_size=0.15, shuffle=False) 
regr = RandomForestRegressor(max_depth=2, random_state=0)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], world_cases[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus CasesWorldwide Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'Random Forest predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, world_cases, test_size=0.15, shuffle=False) 
#regr = RidgeCV()
regr = TransformedTargetRegressor(regressor=RidgeCV(),transformer=QuantileTransformer(n_quantiles=300,output_distribution='normal'))
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], world_cases[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases Worldwide Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' Transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[:], world_cases[:], test_size=0.15, shuffle=False) 
regr = TransformedTargetRegressor(regressor=RidgeCV(),func=np.log1p,inverse_func=np.expm1)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], world_cases[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases Worldwide Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' logarithmic  and an exponential transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, china_case, test_size=0.15, shuffle=False) 
svm_confirmed.fit(X_train_confirmed, y_train_confirmed)
svm_pred = svm_confirmed.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], china_case[:])
plt.plot(future_forcast, svm_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in China Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'SVM predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, china_case, test_size=0.15, shuffle=False) 
regr = RandomForestRegressor(max_depth=2, random_state=0)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], china_case[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in China Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'Random Forest predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, china_case, test_size=0.15, shuffle=False) 
regr = LogisticRegression(random_state=0)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], china_case[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in China Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'Logistic Regression predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, china_case, test_size=0.15, shuffle=False) 
#regr = RidgeCV()
regr = TransformedTargetRegressor(regressor=RidgeCV(),transformer=QuantileTransformer(n_quantiles=300,output_distribution='normal'))
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], china_case[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in China Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' Transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, china_case, test_size=0.15, shuffle=False) 
regr = TransformedTargetRegressor(regressor=RidgeCV(),func=np.log1p,inverse_func=np.expm1)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], china_case[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in China Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' logarithmic  and an exponential transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, korea_cases, test_size=0.15, shuffle=False) 
svm_confirmed.fit(X_train_confirmed, y_train_confirmed)
svm_pred = svm_confirmed.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], korea_cases[:])
plt.plot(future_forcast, svm_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in South Korea Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'SVM predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, korea_cases, test_size=0.15, shuffle=False) 
regr = RandomForestRegressor(max_depth=2, random_state=0)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], korea_cases[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in South Korea Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'Random Forest predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, korea_cases, test_size=0.15, shuffle=False) 
#regr = RidgeCV()
regr = TransformedTargetRegressor(regressor=RidgeCV(),transformer=QuantileTransformer(n_quantiles=300,output_distribution='normal'))
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], korea_cases[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in South Korea Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' Transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, korea_cases, test_size=0.15, shuffle=False) 
regr = TransformedTargetRegressor(regressor=RidgeCV(),func=np.log1p,inverse_func=np.expm1)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], korea_cases[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in South Korea Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' logarithmic  and an exponential transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[55:], us_cases[55:], test_size=0.15, shuffle=False) 
svm_confirmed.fit(X_train_confirmed, y_train_confirmed)
svm_pred = svm_confirmed.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], us_cases[:])
plt.plot(future_forcast[55:], svm_pred[55:], linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in US Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'SVM predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[45:], us_cases[45:], test_size=0.15, shuffle=False) 
regr = RandomForestRegressor(max_depth=2, random_state=0)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], us_cases[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in US Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'Random Forest predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[45:], us_cases[45:], test_size=0.15, shuffle=False) 
#regr = RidgeCV()
regr = TransformedTargetRegressor(regressor=RidgeCV(),transformer=QuantileTransformer(n_quantiles=300,output_distribution='normal'))
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], us_cases[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in US Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' Transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[59:], us_cases[59:], test_size=0.15, shuffle=False) 
regr = TransformedTargetRegressor(regressor=RidgeCV(),func=np.log1p,inverse_func=np.expm1)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[50:], us_cases[50:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in US Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' logarithmic  and an exponential transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[50:], california_cases[50:], test_size=0.15, shuffle=False) 
svm_confirmed.fit(X_train_confirmed, y_train_confirmed)
svm_pred = svm_confirmed.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], california_cases[:])
plt.plot(future_forcast[50:], svm_pred[50:], linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in California Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'SVM predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[50:], california_cases[50:], test_size=0.15, shuffle=False) 
regr = RandomForestRegressor(max_depth=2, random_state=0)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], california_cases[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in California Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'Random Forest predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[50:], california_cases[50:], test_size=0.15, shuffle=False) 
#regr = RidgeCV()
regr = TransformedTargetRegressor(regressor=RidgeCV(),transformer=QuantileTransformer(n_quantiles=300,output_distribution='normal'))
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], california_cases[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in California Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' Transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[:45], california_cases[:45], test_size=0.15, shuffle=False) 
regr = TransformedTargetRegressor(regressor=RidgeCV(),func=np.log1p,inverse_func=np.expm1)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], california_cases[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in California Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' logarithmic  and an exponential transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[55:], newyork_cases[55:], test_size=0.15, shuffle=False) 
svm_confirmed.fit(X_train_confirmed, y_train_confirmed)
svm_pred = svm_confirmed.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], newyork_cases[:])
plt.plot(future_forcast[55:], svm_pred[55:], linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in Newyork Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'SVM predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[50:], newyork_cases[50:], test_size=0.15, shuffle=False) 
regr = RandomForestRegressor(max_depth=2, random_state=0)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], newyork_cases[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in Newyork Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'Random Forest predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[50:], newyork_cases[50:], test_size=0.15, shuffle=False) 
#regr = RidgeCV()
regr = TransformedTargetRegressor(regressor=RidgeCV(),transformer=QuantileTransformer(n_quantiles=30,output_distribution='normal'))
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:], newyork_cases[:])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in Newyork Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' Transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[55:], newyork_cases[55:], test_size=0.15, shuffle=False) 
regr = TransformedTargetRegressor(regressor=RidgeCV(),func=np.log1p,inverse_func=np.expm1)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[:50], newyork_cases[:50])
plt.plot(future_forcast, regr_pred, linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in Newyork Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' logarithmic  and an exponential transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[60:], world_cases[60:], test_size=0.15, shuffle=False)
svm_confirmed = SVR(kernel='poly')
svm_confirmed.fit(X_train_confirmed, y_train_confirmed)
svm_pred = svm_confirmed.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[50:], world_cases[50:])
plt.plot(future_forcast[59:], svm_pred[59:], linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in World-wide Over Time', size=30)
plt.xlabel('Days Since 3/12/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'SVM predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[45:],world_cases[45:], test_size=0.15, shuffle=False) 
regr = TransformedTargetRegressor(regressor=RidgeCV(),func=np.log1p,inverse_func=np.expm1)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[50:], world_cases[50:])
plt.plot(future_forcast[59:], regr_pred[59:], linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in World-wide Over Time', size=30)
plt.xlabel('Days Since 3/12/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' logarithmic  and an exponential transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
import statsmodels.api as sm
import math
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[45:], world_cases[45:], test_size=0.15, shuffle=False)
X_train_confirmed= sm.add_constant(X_train_confirmed)
y_train_confirmed=np.log(y_train_confirmed)
svm_confirmed = sm.OLS(y_train_confirmed,X_train_confirmed)
mod = svm_confirmed.fit()
#print(mod.summary())
X_future= sm.add_constant(future_forcast)
#pred = np.exp(mod.predict(X_future))
svm_pred =  np.exp(mod.predict(X_future))
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[50:], world_cases[50:])
plt.plot(future_forcast[55:], svm_pred[55:], linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in World-wide Over Time', size=30)
plt.xlabel('Days Since 3/12/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'Exponential predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
#days_since_1_22 = np.concatenate((days_since_1_22,[[62]]))
#future_forcast = np.concatenate((future_forcast,[[82]]))
#us_cases = us_cases
#us_cases.append(43781)
#us_cases.append(54856)
#print(future_forcast)

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[60:], us_cases[60:], test_size=0.15, shuffle=False) 
svm_confirmed = SVR(kernel='poly')
svm_confirmed.fit(X_train_confirmed, y_train_confirmed)
svm_pred = svm_confirmed.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[50:], us_cases[50:])
plt.plot(future_forcast[59:], svm_pred[59:], linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in US Over Time', size=30)
plt.xlabel('Days Since 3/12/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'SVM predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
import statsmodels.api as sm
import math
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[20:], us_cases[20:], test_size=0.15, shuffle=False)
X_train_confirmed= sm.add_constant(X_train_confirmed)
y_train_confirmed=np.log(y_train_confirmed)
svm_confirmed = sm.OLS(y_train_confirmed,X_train_confirmed)
mod = svm_confirmed.fit()
#print(mod.summary())
X_future= sm.add_constant(future_forcast)
#pred = np.exp(mod.predict(X_future))
svm_pred =  np.exp(mod.predict(X_future))
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[50:], us_cases[50:])
plt.plot(future_forcast[55:], svm_pred[55:], linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in US Over Time', size=30)
plt.xlabel('Days Since 3/12/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', 'SVM predictions'])
plt.xticks(size=15)
plt.show()

In [None]:
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[20:], us_cases[20:], test_size=0.15, shuffle=False) 
regr = TransformedTargetRegressor(regressor=RidgeCV(),func=np.log1p,inverse_func=np.expm1)
regr.fit(X_train_confirmed, y_train_confirmed)
regr_pred = regr.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates[40:], us_cases[40:])
plt.plot(future_forcast[59:], regr_pred[59:], linestyle='dashed', color='purple')
plt.title('No of Coronavirus Cases in US Over Time', size=30)
plt.xlabel('Days Since 3/12/2020', size=30)
plt.ylabel('No of Cases', size=30)
plt.legend(['Confirmed Cases', ' logarithmic  and an exponential transformed Ridge Regression  predictions'])
plt.xticks(size=15)
plt.show()