In [99]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt  

In [100]:
confirmed_cases = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
confirmed_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
confirmed_recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

In [101]:
class Data():
    def __init__(self,country):
        self.country = country
        self.confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
        self.deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
        self.recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
        self.PreparaData()

    def PreparaData(self):
        confirmedPortugal = self.confirmed[ self.confirmed['Country/Region'] != self.country].index
        self.confirmed.drop(confirmedPortugal , inplace=True)
        deathsPortugal = self.deaths[ self.deaths['Country/Region'] != self.country].index
        self.deaths.drop(deathsPortugal , inplace=True)
        recoveredPortugal = self.recovered[ self.recovered['Country/Region'] != self.country].index
        self.recovered.drop(recoveredPortugal , inplace=True)
        self.confirmed = self.confirmed.drop(columns=['Province/State','Country/Region','Lat','Long'])
        self.deaths = self.deaths.drop(columns=['Province/State','Country/Region','Lat','Long'])
        self.recovered = self.recovered.drop(columns=['Province/State','Country/Region','Lat','Long'])
        timesteps = []
        total_infetados = []
        total_Days = []
        total_Deaths = []
        total_Recovered = []
        daily_deaths=[]
        daily_cases=[]
        self.new_dataset = pd.DataFrame()
        Ground_Zero = dt.strptime('12/31/19','%m/%d/%y')
        for cols in self.confirmed.columns:
            timesteps.append(cols)
            total_infetados.append(self.confirmed[cols].sum())
            current_date = dt.strptime(cols,'%m/%d/%y')
            days_Gone = current_date - Ground_Zero
            total_Days.append(int(days_Gone.days))
            daily_cases.append(self.confirmed[cols].sum()-sum(daily_cases))
        for cols in self.deaths.columns:
            total_Deaths.append(self.deaths[cols].sum())
            daily_deaths.append(self.deaths[cols].sum()-sum(daily_deaths))
        for cols in self.recovered.columns:
            total_Recovered.append(self.recovered[cols].sum())
        self.new_dataset['Total_Cases'] = total_infetados
        self.new_dataset['Total_Recovered'] = total_Recovered
        self.new_dataset['Total_Deaths'] = total_Deaths
        self.new_dataset['Days_Gone'] = total_Days
        self.new_dataset['Daily_Deaths'] = daily_deaths
        self.new_dataset['Daily_Cases'] = daily_cases
        pd.DataFrame.to_csv(self.new_dataset,'Dados/new_dataset.csv',index=False)

In [208]:
dados = Data('Portugal')

In [209]:
dados.new_dataset

Unnamed: 0,Total_Cases,Total_Recovered,Total_Deaths,Days_Gone,Daily_Deaths,Daily_Cases
0,0,0,0,22,0,0
1,0,0,0,23,0,0
2,0,0,0,24,0,0
3,0,0,0,25,0,0
4,0,0,0,26,0,0
...,...,...,...,...,...,...
141,35910,22002,1504,163,7,310
142,36180,22200,1505,164,1,270
143,36463,22438,1512,165,7,283
144,36690,22669,1517,166,5,227


In [210]:
class Linear_Regression():
    def __init__(self,data):
        self.dataset = data
        self.dataset = np.asarray(self.dataset)
        #self.NormalizeData()
        self.PrepareData()
        self.Fit()
        
    def PrepareData(self):
        self.x_train, self.x_test,self.y_train, self.y_test = train_test_split(self.dataset[30:,3],self.dataset[30:,5], test_size=0.15, random_state=0, shuffle=False)
    def Fit(self):
        self.regressor = LinearRegression()  
        self.regressor.fit(np.reshape(self.x_train,(-1,1)),self.y_train) #training the algorithm
    def NormalizeData(self):
        self.scaler = MinMaxScaler(feature_range=(-1, 1))
        self.dataset = self.scaler.fit_transform(self.dataset)
        #print(self.normalized)
    def ComparePredictions(self):
        y_pred = self.regressor.predict(np.reshape(self.x_test,(-1,1)))
        count = 0
        test = np.asarray(self.y_test)
        for i in y_pred:
            print('Predicted:' + str(i)+ '===== Real:' +str(test[count]) )
            count +=1
        print('Mean Absolute Error:', metrics.mean_absolute_error(self.y_test, y_pred))  

In [211]:
Model = Linear_Regression(dados.new_dataset)

In [212]:
Model.regressor.score(np.reshape(Model.x_train,(-1,1)),np.reshape(Model.y_train,(-1,1)))

0.052290770234261874

In [213]:
Model.ComparePredictions()

Predicted:441.8135914159478===== Real:350
Predicted:444.225822287678===== Real:257
Predicted:446.6380531594081===== Real:297
Predicted:449.05028403113823===== Real:200
Predicted:451.4625149028684===== Real:195
Predicted:453.8747457745985===== Real:366
Predicted:456.28697664632864===== Real:331
Predicted:458.6992075180588===== Real:377
Predicted:461.11143838978893===== Real:382
Predicted:463.52366926151905===== Real:342
Predicted:465.93590013324916===== Real:192
Predicted:468.34813100497934===== Real:421
Predicted:470.76036187670945===== Real:294
Predicted:473.17259274843957===== Real:310
Predicted:475.58482362016974===== Real:270
Predicted:477.99705449189986===== Real:283
Predicted:480.40928536363===== Real:227
Predicted:482.82151623536015===== Real:346
Mean Absolute Error: 160.0953316034318


In [203]:
Model.y_test

array([658, 664, 251, 159, 294, 394, 334, 318, 332, 240, 167, 249, 314,
       427, 502, 396, 323, 181], dtype=int64)

In [198]:
Model.x_test

array([150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162,
       163, 164, 165, 166, 167], dtype=int64)