In [62]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
from sklearn.model_selection import train_test_split 
from sklearn import linear_model
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt  
from sklearn.model_selection import cross_val_score

In [14]:
confirmed_cases = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
confirmed_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
confirmed_recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

In [15]:
class Data():
    def __init__(self,country):
        self.country = country
        self.confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
        self.deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
        self.recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
        self.PreparaData()

    def PreparaData(self):
        confirmedPortugal = self.confirmed[ self.confirmed['Country/Region'] != self.country].index
        self.confirmed.drop(confirmedPortugal , inplace=True)
        deathsPortugal = self.deaths[ self.deaths['Country/Region'] != self.country].index
        self.deaths.drop(deathsPortugal , inplace=True)
        recoveredPortugal = self.recovered[ self.recovered['Country/Region'] != self.country].index
        self.recovered.drop(recoveredPortugal , inplace=True)
        self.confirmed = self.confirmed.drop(columns=['Province/State','Country/Region','Lat','Long'])
        self.deaths = self.deaths.drop(columns=['Province/State','Country/Region','Lat','Long'])
        self.recovered = self.recovered.drop(columns=['Province/State','Country/Region','Lat','Long'])
        timesteps = []
        total_infetados = []
        total_Days = []
        total_Deaths = []
        total_Recovered = []
        daily_deaths=[]
        daily_cases=[]
        self.new_dataset = pd.DataFrame()
        Ground_Zero = dt.strptime('12/31/19','%m/%d/%y')
        for cols in self.confirmed.columns:
            timesteps.append(cols)
            total_infetados.append(self.confirmed[cols].sum())
            current_date = dt.strptime(cols,'%m/%d/%y')
            days_Gone = current_date - Ground_Zero
            total_Days.append(int(days_Gone.days))
            daily_cases.append(self.confirmed[cols].sum()-sum(daily_cases))
        for cols in self.deaths.columns:
            total_Deaths.append(self.deaths[cols].sum())
            daily_deaths.append(self.deaths[cols].sum()-sum(daily_deaths))
        for cols in self.recovered.columns:
            total_Recovered.append(self.recovered[cols].sum())
        self.new_dataset['Total_Cases'] = total_infetados
        self.new_dataset['Total_Recovered'] = total_Recovered
        self.new_dataset['Total_Deaths'] = total_Deaths
        self.new_dataset['Days_Gone'] = total_Days
        self.new_dataset['Daily_Deaths'] = daily_deaths
        self.new_dataset['Daily_Cases'] = daily_cases
        pd.DataFrame.to_csv(self.new_dataset,'Dados/new_dataset.csv',index=False)

In [16]:
dados = Data('Portugal')

In [17]:
dados.new_dataset

Unnamed: 0,Total_Cases,Total_Recovered,Total_Deaths,Days_Gone,Daily_Deaths,Daily_Cases
0,0,0,0,22,0,0
1,0,0,0,23,0,0
2,0,0,0,24,0,0
3,0,0,0,25,0,0
4,0,0,0,26,0,0
...,...,...,...,...,...,...
142,36180,22200,1505,164,1,270
143,36463,22438,1512,165,7,283
144,36690,22669,1517,166,5,227
145,37036,22852,1520,167,3,346


In [254]:
class Linear_Regression():
    def __init__(self,data):
        self.dataset = data
        self.dataset = np.asarray(self.dataset)
        #self.NormalizeData()
        self.PrepareData()
        self.Fit()
        
    def PrepareData(self):
        self.x_train, self.x_test,self.y_train, self.y_test = train_test_split(self.dataset[:,3],self.dataset[:,5], test_size=0.10)
    def Fit(self):
        
        #self.ridge = linear_model.Ridge(alpha=.001,max_iter=1000)
        #self.ridge.fit(np.reshape(self.x_train,(-1,1)), self.y_train)
        
        self.lasso = linear_model.Lasso(alpha=0.1,max_iter=20000)
        self.lasso.fit(np.reshape(self.x_train,(-1,1)), self.y_train)
        
        
        #self.regressor = LinearRegression()  
        #self.regressor.fit(np.reshape(self.x_train,(-1,1)),self.y_train) #training the algorithm
    def NormalizeData(self):
        self.scaler = MinMaxScaler(feature_range=(-1, 1))
        self.dataset = self.scaler.fit_transform(self.dataset)
        #print(self.normalized)
    def ComparePredictions(self):
        y_pred = self.lasso.predict(np.reshape(self.x_test,(-1,1)))
        count = 0
        test = np.asarray(self.y_test)
        for i in y_pred:
            print('Predicted:' + str(i)+ '===== Real:' +str(test[count]) )
            count +=1
        print('Mean Absolute Error:', metrics.mean_absolute_error(self.y_test, y_pred))
        #print('Mean Squared Error:', metrics.mean_squared_error(self.y_test, y_pred))



In [255]:
Model = Linear_Regression(dados.new_dataset)

In [256]:

                             
Model.lasso.score(np.reshape(Model.x_train,(-1,1)),np.reshape(Model.y_train,(-1,1)))

0.18907031258058904

In [257]:
Model.ComparePredictions()

Predicted:45.70019213032906===== Real:0
Predicted:468.58910403993957===== Real:300
Predicted:165.27567756683962===== Real:3
Predicted:343.18066809433094===== Real:242
Predicted:115.69559823950598===== Real:0
Predicted:401.5101731853117===== Real:152
Predicted:448.17377725809627===== Real:421
Predicted:238.1875589305656===== Real:792
Predicted:331.51476707613483===== Real:540
Predicted:328.5982918215857===== Real:183
Predicted:421.9254999671549===== Real:297
Predicted:366.5124701307233===== Real:234
Predicted:413.1760742035078===== Real:304
Predicted:258.6028857124088===== Real:754
Predicted:410.2595989489588===== Real:285
Mean Absolute Error: 184.3527942578116


In [237]:
Model.y_test

array([   0,    0,    0,    0, -161,  371,    0,   76,  553,  310,    0,
        257,  516,    0,    0], dtype=int64)

In [166]:
Model.x_test

array([ 94,  75, 102,  89,  85,  51,  24, 161,  52,  64,  29,  78, 167,
        58,  59], dtype=int64)