**Importing Libraries** 


In [88]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

**Loading dataset and data preprocessing**

In [89]:
dataset = pd.read_csv("owid-covid-data.csv")

In [90]:
dataset_2months=dataset.loc[(dataset['location']=='Bangladesh') & (dataset['date']>='2020-04-01') & (dataset['date']<='2020-05-31')]
dataset_BD_allmonths=dataset.loc[(dataset['location']=='Bangladesh')]

In [92]:
dataset_2months=dataset_2months[['date','total_cases','new_cases','total_deaths']]
dataset_BD_allmonths=dataset_BD_allmonths[['date','total_cases','new_cases','total_deaths']]

In [93]:
dataset_2months.isnull().sum() #NO null value found in month of April and May in Bangladesh location 

date            0
total_cases     0
new_cases       0
total_deaths    0
dtype: int64

In [94]:
dataset_BD_allmonths.isnull().sum() #11 null value found 

date             0
total_cases     11
new_cases       11
total_deaths    11
dtype: int64

In [95]:
dataset_BD_allmonths = dataset_BD_allmonths.dropna(inplace=False)
dataset_BD_allmonths.isnull().sum()

date            0
total_cases     0
new_cases       0
total_deaths    0
dtype: int64

In [96]:
dataset_2months['equvalent_date'] = pd.to_datetime(dataset_2months['date'])
dataset_2months['equvalent_date']= dataset_2months['equvalent_date'].map(dt.datetime.toordinal)
dataset_2months

Unnamed: 0,date,total_cases,new_cases,total_deaths,equvalent_date
3374,2020-04-01,51.0,2.0,5.0,737516
3375,2020-04-02,54.0,3.0,6.0,737517
3376,2020-04-03,56.0,2.0,6.0,737518
3377,2020-04-04,61.0,5.0,6.0,737519
3378,2020-04-05,70.0,9.0,8.0,737520
...,...,...,...,...,...
3430,2020-05-27,36751.0,1166.0,522.0,737572
3431,2020-05-28,38292.0,1541.0,544.0,737573
3432,2020-05-29,40321.0,2029.0,559.0,737574
3433,2020-05-30,42844.0,2523.0,582.0,737575


**Creating and executing regression models**

In [108]:
X = dataset_2months.iloc[:, 4].values
Y_total_case = dataset_2months.iloc[:, 1:2].values
Y_new_case = dataset_2months.iloc[:, 2:3].values
Y_total_deaths = dataset_2months.iloc[:, 3:4].values

In [109]:
X=X.reshape(-1,1)

In [114]:
X_train_total_case, X_test_total_case, Y_train_total_case, Y_test_total_case = train_test_split(X, Y_total_case, test_size=0.2)
X_train_new_case, X_test_new_case, Y_train_new_case, Y_test_new_case = train_test_split(X, Y_new_case, test_size=0.2)
X_train_total_deaths, X_test_total_deaths, Y_train_total_deaths, Y_test_total_deaths = train_test_split(X, Y_total_deaths, test_size=0.2)

**Model for Date and Total Case**

In [115]:
model_total_case_deg1 = PolynomialFeatures(degree = 1)
X_fit1 = model_total_case_deg1.fit_transform(X_train_total_case)
model_total_case_deg1.fit(X_fit1, Y_train_total_case)
lin_model_total_case_deg1 = LinearRegression()
lin_model_total_case_deg1.fit(X_fit1, Y_train_total_case)

model_total_case_deg2 = PolynomialFeatures(degree = 2)
X_fit1 = model_total_case_deg2.fit_transform(X_train_total_case)
model_total_case_deg2.fit(X_fit1, Y_train_total_case)
lin_model_total_case_deg2 = LinearRegression()
lin_model_total_case_deg2.fit(X_fit1, Y_train_total_case)

model_total_case_deg3 = PolynomialFeatures(degree = 3)
X_fit1 = model_total_case_deg3.fit_transform(X_train_total_case)
model_total_case_deg3.fit(X_fit1, Y_train_total_case)
lin_model_total_case_deg3 = LinearRegression()
lin_model_total_case_deg3.fit(X_fit1, Y_train_total_case)

model_total_case_deg4 = PolynomialFeatures(degree = 4)
X_fit1 = model_total_case_deg4.fit_transform(X_train_total_case)
model_total_case_deg4.fit(X_fit1, Y_train_total_case)
lin_model_total_case_deg4 = LinearRegression()
lin_model_total_case_deg4.fit(X_fit1, Y_train_total_case)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [116]:
print(f'R^2 value for Total case when degree is 1 : {r2_score(Y_test_total_case,lin_model_total_case_deg1.predict(model_total_case_deg1.fit_transform(X_test_total_case)))}')
print(f'R^2 value for Total case when degree is 2 : {r2_score(Y_test_total_case,lin_model_total_case_deg2.predict(model_total_case_deg2.fit_transform(X_test_total_case)))}')
print(f'R^2 value for Total case when degree is 3 : {r2_score(Y_test_total_case,lin_model_total_case_deg3.predict(model_total_case_deg3.fit_transform(X_test_total_case)))}')
print(f'R^2 value for Total case when degree is 4 : {r2_score(Y_test_total_case,lin_model_total_case_deg4.predict(model_total_case_deg4.fit_transform(X_test_total_case)))}')

R^2 value for Total case when degree is 1 : 0.8533457879914024
R^2 value for Total case when degree is 2 : 0.9962240622137427
R^2 value for Total case when degree is 3 : 0.996224170070835
R^2 value for Total case when degree is 4 : 0.9962247447705459


**Model for Date and new Case**

In [117]:
model_new_case_deg1 = PolynomialFeatures(degree = 1)
X_fit1 = model_new_case_deg1.fit_transform(X_train_new_case)
model_new_case_deg1.fit(X_fit1, Y_train_new_case)
lin_model_new_case_deg1 = LinearRegression()
lin_model_new_case_deg1.fit(X_fit1, Y_train_new_case)


model_new_case_deg2 = PolynomialFeatures(degree = 2)
X_fit1 = model_new_case_deg2.fit_transform(X_train_new_case)
model_new_case_deg2.fit(X_fit1, Y_train_new_case)
lin_model_new_case_deg2 = LinearRegression()
lin_model_new_case_deg2.fit(X_fit1, Y_train_new_case)

model_new_case_deg3 = PolynomialFeatures(degree = 3)
X_fit1 = model_new_case_deg3.fit_transform(X_train_new_case)
model_new_case_deg3.fit(X_fit1, Y_train_new_case)
lin_model_new_case_deg3 = LinearRegression()
lin_model_new_case_deg3.fit(X_fit1, Y_train_new_case)

model_new_case_deg4 = PolynomialFeatures(degree = 4)
X_fit1 = model_new_case_deg4.fit_transform(X_train_new_case)
model_new_case_deg4.fit(X_fit1, Y_new_case)
lin_model_new_case_deg4 = LinearRegression()
lin_model_new_case_deg4.fit(X_fit1, Y_train_new_case)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [118]:
print(f'R^2 value for new cases when degree is 1 : {r2_score(Y_test_new_case,lin_model_new_case_deg1.predict(model_new_case_deg1.fit_transform(X_test_new_case)))}')
print(f'R^2 value for new cases when degree is 2 : {r2_score(Y_test_new_case,lin_model_new_case_deg2.predict(model_new_case_deg2.fit_transform(X_test_new_case)))}')
print(f'R^2 value for new cases when degree is 3 : {r2_score(Y_test_new_case,lin_model_new_case_deg3.predict(model_new_case_deg3.fit_transform(X_test_new_case)))}')
print(f'R^2 value for new cases when degree is 4 : {r2_score(Y_test_new_case,lin_model_new_case_deg4.predict(model_new_case_deg4.fit_transform(X_test_new_case)))}')

R^2 value for new cases when degree is 1 : 0.8232170988413099
R^2 value for new cases when degree is 2 : 0.8899256776506765
R^2 value for new cases when degree is 3 : 0.8899261968004584
R^2 value for new cases when degree is 4 : 0.8899267616884468


**Model for Date and Total Deaths**

In [104]:
model_total_deaths_deg1 = PolynomialFeatures(degree = 1)
X_fit1 = model_total_deaths_deg1.fit_transform(X_train_total_deaths)
model_total_deaths_deg1.fit(X_fit1, Y_train_total_deaths)
lin_model_total_deaths_deg1 = LinearRegression()
lin_model_total_deaths_deg1.fit(X_fit1, Y_train_total_deaths)

model_total_deaths_deg2 = PolynomialFeatures(degree = 2)
X_fit1 = model_total_deaths_deg2.fit_transform(X_train_total_deaths)
model_total_deaths_deg2.fit(X_fit1, Y_train_total_deaths)
lin_model_total_deaths_deg2 = LinearRegression()
lin_model_total_deaths_deg2.fit(X_fit1, Y_train_total_deaths)

model_total_deaths_deg3 = PolynomialFeatures(degree = 3)
X_fit1 = model_total_deaths_deg3.fit_transform(X_train_total_deaths)
model_total_deaths_deg3.fit(X_fit1, Y_train_total_deaths)
lin_model_total_deaths_deg3 = LinearRegression()
lin_model_total_deaths_deg3.fit(X_fit1, Y_train_total_deaths)


model_total_deaths_deg4 = PolynomialFeatures(degree = 4)
X_fit1 = model_total_deaths_deg4.fit_transform(X_train_total_deaths)
model_total_deaths_deg4.fit(X_fit1, Y_train_total_deaths)
lin_model_total_deaths_deg4 = LinearRegression()
lin_model_total_deaths_deg4.fit(X_fit1, Y_train_total_deaths)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [105]:
print(f'R^2 value for total deaths when degree is 1 : {r2_score(Y_test_total_deaths,lin_model_total_deaths_deg1.predict(model_total_deaths_deg1.fit_transform(X_test_total_deaths)))}')
print(f'R^2 value for total deaths when degree is 2 : {r2_score(Y_test_total_deaths,lin_model_total_deaths_deg2.predict(model_total_deaths_deg2.fit_transform(X_test_total_deaths)))}')
print(f'R^2 value for total deaths when degree is 3 : {r2_score(Y_test_total_deaths,lin_model_total_deaths_deg3.predict(model_total_deaths_deg3.fit_transform(X_test_total_deaths)))}')
print(f'R^2 value for total deaths when degree is 4 : {r2_score(Y_test_total_deaths,lin_model_total_deaths_deg4.predict(model_total_deaths_deg4.fit_transform(X_test_total_deaths)))}')

R^2 value for total deaths when degree is 1 : 0.9132187934946833
R^2 value for total deaths when degree is 2 : 0.9842252238531886
R^2 value for total deaths when degree is 3 : 0.984225828963247
R^2 value for total deaths when degree is 4 : 0.9842264585085924


**User Defined Function**

In [106]:
def Corona_ML(date):
    index=dataset_BD_allmonths[dataset_BD_allmonths['date']==date].index.values
    print(f"in date {date}")
    if len(index)>0:
        print("Actual values:")
        print()
        print("total case :",int(dataset_BD_allmonths.loc[index]['total_cases']))
        print("new cases :",int(dataset_BD_allmonths.loc[index]['new_cases']))
        print("total deaths :",int(dataset_BD_allmonths.loc[index]['total_deaths']))
    else:
        print("Actual values are not available in the dataset for this date")

    Date = pd.to_datetime([date])
    Date=Date.map(dt.datetime.toordinal)
    print()
    print("predited values:") #using degree 2
    print()
    print("total case :",int(lin_model_total_case_deg2.predict(model_total_case_deg2.fit_transform([Date]))))
    print("new case :",int(lin_model_new_case_deg2.predict(model_new_case_deg2.fit_transform([Date]))))
    print("total deaths :",int(lin_model_total_deaths_deg2.predict(model_total_deaths_deg2.fit_transform([Date]))))
    
    

In [107]:
Corona_ML('2020-04-30')

in date 2020-04-30
Actual values:

total case : 7103
new cases : 641
total deaths : 163

predited values:

total case : 6663
new case : 566
total deaths : 142


**Answering questions**

Q1: What would be consequence of taking lower value of N in the regression model?

Answer: Taking lower value of N will result in lower value of r-square(r^2), that means the accuracy of the model will be less. Time complexity also will be less. There is no probability of having overfitting and the model won't touch every data point.

Q2: What would be consequence of taking higher value of N in the regression model?

Answer: Taking higher value of N will result in higher value of r-square(r^2), that means the accuracy of the model will be better. Time complexity will also be more. There is a probability of having overfitting because it may fit noisy data.