## Creating a logistic regression to predict Covid RT in Portugal

### Import the relevant libraries

In [1]:
import pandas as pd
import numpy as np

### Load the data

In [2]:
rt_preprocessed = pd.read_csv('rt.csv')

In [3]:
rt_preprocessed

Unnamed: 0,data,rt_nacional,rt_95_inferior_nacional,rt_95_superior_nacional,rt_continente,rt_95_inferior_continente,rt_95_superior_continente,rt_arsnorte,rt_95_inferior_arsnorte,rt_95_superior_arsnorte,...,rt_95_superior_arsalentejo,rt_arsalgarve,rt_95_inferior_arsalgarve,rt_95_superior_arsalgarve,rt_açores,rt_95_inferior_açores,rt_95_superior_açores,rt_madeira,rt_95_inferior_madeira,rt_95_superior_madeira
0,2020-02-21,1.445805,0.885115,2.154474,1.393376,0.834001,2.108648,2.215064,0.794968,4.266393,...,,,,,1.878477,0.216036,5.341265,,,
1,2020-02-22,1.460401,0.909453,2.135591,1.438007,0.894428,2.104977,2.030330,0.819878,3.852039,...,,,,,1.828377,0.211497,5.236868,,,
2,2020-02-23,1.455195,0.923809,2.099881,1.456232,0.924477,2.129269,2.370303,1.140937,4.085290,...,,,,,2.726909,0.565662,6.521018,,,
3,2020-02-24,1.422329,0.918444,2.022243,1.427503,0.925762,2.041711,2.258416,1.190901,3.620633,...,3.569968,,,,2.030977,0.416034,4.983372,,,
4,2020-02-25,1.592746,1.087196,2.198190,1.611543,1.095469,2.214480,1.931235,1.072682,3.068771,...,4.934545,,,,2.366339,0.668512,5.411210,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
621,2021-11-03,1.145108,1.115840,1.174759,1.144398,1.113652,1.174920,1.126552,1.072533,1.183508,...,1.179569,1.163629,1.058053,1.275514,1.054800,0.873004,1.251412,1.278569,1.124044,1.439930
622,2021-11-04,1.154329,1.124110,1.184888,1.158458,1.127431,1.190189,1.154554,1.099829,1.211579,...,1.178338,1.148369,1.046878,1.253765,1.042368,0.864365,1.241214,1.202553,1.063910,1.348709
623,2021-11-05,1.165617,1.134466,1.196583,1.170480,1.138592,1.201710,1.158483,1.103669,1.214822,...,1.250934,1.193300,1.088383,1.299385,1.057981,0.878518,1.259953,1.184128,1.048767,1.325031
624,2021-11-06,1.138544,1.108531,1.169856,1.144740,1.112671,1.177019,1.149113,1.096954,1.203664,...,1.349256,1.168368,1.068022,1.273239,1.026039,0.848013,1.221395,1.125691,0.995863,1.259646


### Create the targets

In [5]:
rt_preprocessed['rt_nacional'].median()

1.012897283936945

In [7]:
targets =np.where(rt_preprocessed['rt_nacional'] >
                  rt_preprocessed['rt_nacional'].median(), 1, 0)

In [8]:
targets

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [9]:
rt_preprocessed['rt_95_superior_nacional'] = targets

In [10]:
rt_preprocessed

Unnamed: 0,data,rt_nacional,rt_95_inferior_nacional,rt_95_superior_nacional,rt_continente,rt_95_inferior_continente,rt_95_superior_continente,rt_arsnorte,rt_95_inferior_arsnorte,rt_95_superior_arsnorte,...,rt_95_superior_arsalentejo,rt_arsalgarve,rt_95_inferior_arsalgarve,rt_95_superior_arsalgarve,rt_açores,rt_95_inferior_açores,rt_95_superior_açores,rt_madeira,rt_95_inferior_madeira,rt_95_superior_madeira
0,2020-02-21,1.445805,0.885115,1,1.393376,0.834001,2.108648,2.215064,0.794968,4.266393,...,,,,,1.878477,0.216036,5.341265,,,
1,2020-02-22,1.460401,0.909453,1,1.438007,0.894428,2.104977,2.030330,0.819878,3.852039,...,,,,,1.828377,0.211497,5.236868,,,
2,2020-02-23,1.455195,0.923809,1,1.456232,0.924477,2.129269,2.370303,1.140937,4.085290,...,,,,,2.726909,0.565662,6.521018,,,
3,2020-02-24,1.422329,0.918444,1,1.427503,0.925762,2.041711,2.258416,1.190901,3.620633,...,3.569968,,,,2.030977,0.416034,4.983372,,,
4,2020-02-25,1.592746,1.087196,1,1.611543,1.095469,2.214480,1.931235,1.072682,3.068771,...,4.934545,,,,2.366339,0.668512,5.411210,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
621,2021-11-03,1.145108,1.115840,1,1.144398,1.113652,1.174920,1.126552,1.072533,1.183508,...,1.179569,1.163629,1.058053,1.275514,1.054800,0.873004,1.251412,1.278569,1.124044,1.439930
622,2021-11-04,1.154329,1.124110,1,1.158458,1.127431,1.190189,1.154554,1.099829,1.211579,...,1.178338,1.148369,1.046878,1.253765,1.042368,0.864365,1.241214,1.202553,1.063910,1.348709
623,2021-11-05,1.165617,1.134466,1,1.170480,1.138592,1.201710,1.158483,1.103669,1.214822,...,1.250934,1.193300,1.088383,1.299385,1.057981,0.878518,1.259953,1.184128,1.048767,1.325031
624,2021-11-06,1.138544,1.108531,1,1.144740,1.112671,1.177019,1.149113,1.096954,1.203664,...,1.349256,1.168368,1.068022,1.273239,1.026039,0.848013,1.221395,1.125691,0.995863,1.259646


### A comments on the targets

In [11]:
targets.sum() / targets.shape[0]

0.5

In [12]:
rt_with_targets = rt_preprocessed.drop(['rt_nacional'], axis=1)

In [13]:
rt_with_targets is rt_preprocessed

False

In [14]:
rt_with_targets.head()

Unnamed: 0,data,rt_95_inferior_nacional,rt_95_superior_nacional,rt_continente,rt_95_inferior_continente,rt_95_superior_continente,rt_arsnorte,rt_95_inferior_arsnorte,rt_95_superior_arsnorte,rt_arscentro,...,rt_95_superior_arsalentejo,rt_arsalgarve,rt_95_inferior_arsalgarve,rt_95_superior_arsalgarve,rt_açores,rt_95_inferior_açores,rt_95_superior_açores,rt_madeira,rt_95_inferior_madeira,rt_95_superior_madeira
0,2020-02-21,0.885115,1,1.393376,0.834001,2.108648,2.215064,0.794968,4.266393,,...,,,,,1.878477,0.216036,5.341265,,,
1,2020-02-22,0.909453,1,1.438007,0.894428,2.104977,2.03033,0.819878,3.852039,,...,,,,,1.828377,0.211497,5.236868,,,
2,2020-02-23,0.923809,1,1.456232,0.924477,2.129269,2.370303,1.140937,4.08529,,...,,,,,2.726909,0.565662,6.521018,,,
3,2020-02-24,0.918444,1,1.427503,0.925762,2.041711,2.258416,1.190901,3.620633,1.927019,...,3.569968,,,,2.030977,0.416034,4.983372,,,
4,2020-02-25,1.087196,1,1.611543,1.095469,2.21448,1.931235,1.072682,3.068771,1.992431,...,4.934545,,,,2.366339,0.668512,5.41121,,,


### Select the inputs for the regression

In [15]:
rt_with_targets.shape

(626, 27)

In [16]:
rt_with_targets.iloc[:,0:14]

Unnamed: 0,data,rt_95_inferior_nacional,rt_95_superior_nacional,rt_continente,rt_95_inferior_continente,rt_95_superior_continente,rt_arsnorte,rt_95_inferior_arsnorte,rt_95_superior_arsnorte,rt_arscentro,rt_95_inferior_arscentro,rt_95_superior_arscentro,rt_arslvt,rt_95_inferior_arslvt
0,2020-02-21,0.885115,1,1.393376,0.834001,2.108648,2.215064,0.794968,4.266393,,,,1.414062,0.732734
1,2020-02-22,0.909453,1,1.438007,0.894428,2.104977,2.030330,0.819878,3.852039,,,,1.439718,0.765302
2,2020-02-23,0.923809,1,1.456232,0.924477,2.129269,2.370303,1.140937,4.085290,,,,1.241255,0.645076
3,2020-02-24,0.918444,1,1.427503,0.925762,2.041711,2.258416,1.190901,3.620633,1.927019,0.607540,3.965284,1.208981,0.616847
4,2020-02-25,1.087196,1,1.611543,1.095469,2.214480,1.931235,1.072682,3.068771,1.992431,0.754982,3.839692,1.383078,0.773308
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
621,2021-11-03,1.115840,1,1.144398,1.113652,1.174920,1.126552,1.072533,1.183508,1.214325,1.152864,1.277882,1.123690,1.080266
622,2021-11-04,1.124110,1,1.158458,1.127431,1.190189,1.154554,1.099829,1.211579,1.233689,1.169683,1.297655,1.129463,1.084677
623,2021-11-05,1.134466,1,1.170480,1.138592,1.201710,1.158483,1.103669,1.214822,1.232988,1.169615,1.295261,1.143982,1.099840
624,2021-11-06,1.108531,1,1.144740,1.112671,1.177019,1.149113,1.096954,1.203664,1.185835,1.126034,1.247972,1.109979,1.066194


In [17]:
rt_with_targets.iloc[:,:-1]

Unnamed: 0,data,rt_95_inferior_nacional,rt_95_superior_nacional,rt_continente,rt_95_inferior_continente,rt_95_superior_continente,rt_arsnorte,rt_95_inferior_arsnorte,rt_95_superior_arsnorte,rt_arscentro,...,rt_95_inferior_arsalentejo,rt_95_superior_arsalentejo,rt_arsalgarve,rt_95_inferior_arsalgarve,rt_95_superior_arsalgarve,rt_açores,rt_95_inferior_açores,rt_95_superior_açores,rt_madeira,rt_95_inferior_madeira
0,2020-02-21,0.885115,1,1.393376,0.834001,2.108648,2.215064,0.794968,4.266393,,...,,,,,,1.878477,0.216036,5.341265,,
1,2020-02-22,0.909453,1,1.438007,0.894428,2.104977,2.030330,0.819878,3.852039,,...,,,,,,1.828377,0.211497,5.236868,,
2,2020-02-23,0.923809,1,1.456232,0.924477,2.129269,2.370303,1.140937,4.085290,,...,,,,,,2.726909,0.565662,6.521018,,
3,2020-02-24,0.918444,1,1.427503,0.925762,2.041711,2.258416,1.190901,3.620633,1.927019,...,0.025070,3.569968,,,,2.030977,0.416034,4.983372,,
4,2020-02-25,1.087196,1,1.611543,1.095469,2.214480,1.931235,1.072682,3.068771,1.992431,...,0.034929,4.934545,,,,2.366339,0.668512,5.411210,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
621,2021-11-03,1.115840,1,1.144398,1.113652,1.174920,1.126552,1.072533,1.183508,1.214325,...,0.886741,1.179569,1.163629,1.058053,1.275514,1.054800,0.873004,1.251412,1.278569,1.124044
622,2021-11-04,1.124110,1,1.158458,1.127431,1.190189,1.154554,1.099829,1.211579,1.233689,...,0.886810,1.178338,1.148369,1.046878,1.253765,1.042368,0.864365,1.241214,1.202553,1.063910
623,2021-11-05,1.134466,1,1.170480,1.138592,1.201710,1.158483,1.103669,1.214822,1.232988,...,0.959526,1.250934,1.193300,1.088383,1.299385,1.057981,0.878518,1.259953,1.184128,1.048767
624,2021-11-06,1.108531,1,1.144740,1.112671,1.177019,1.149113,1.096954,1.203664,1.185835,...,1.038476,1.349256,1.168368,1.068022,1.273239,1.026039,0.848013,1.221395,1.125691,0.995863


In [18]:
unscaled_inputs = rt_with_targets.iloc[:,:-1]

In [19]:
unscaled_inputs

Unnamed: 0,data,rt_95_inferior_nacional,rt_95_superior_nacional,rt_continente,rt_95_inferior_continente,rt_95_superior_continente,rt_arsnorte,rt_95_inferior_arsnorte,rt_95_superior_arsnorte,rt_arscentro,...,rt_95_inferior_arsalentejo,rt_95_superior_arsalentejo,rt_arsalgarve,rt_95_inferior_arsalgarve,rt_95_superior_arsalgarve,rt_açores,rt_95_inferior_açores,rt_95_superior_açores,rt_madeira,rt_95_inferior_madeira
0,2020-02-21,0.885115,1,1.393376,0.834001,2.108648,2.215064,0.794968,4.266393,,...,,,,,,1.878477,0.216036,5.341265,,
1,2020-02-22,0.909453,1,1.438007,0.894428,2.104977,2.030330,0.819878,3.852039,,...,,,,,,1.828377,0.211497,5.236868,,
2,2020-02-23,0.923809,1,1.456232,0.924477,2.129269,2.370303,1.140937,4.085290,,...,,,,,,2.726909,0.565662,6.521018,,
3,2020-02-24,0.918444,1,1.427503,0.925762,2.041711,2.258416,1.190901,3.620633,1.927019,...,0.025070,3.569968,,,,2.030977,0.416034,4.983372,,
4,2020-02-25,1.087196,1,1.611543,1.095469,2.214480,1.931235,1.072682,3.068771,1.992431,...,0.034929,4.934545,,,,2.366339,0.668512,5.411210,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
621,2021-11-03,1.115840,1,1.144398,1.113652,1.174920,1.126552,1.072533,1.183508,1.214325,...,0.886741,1.179569,1.163629,1.058053,1.275514,1.054800,0.873004,1.251412,1.278569,1.124044
622,2021-11-04,1.124110,1,1.158458,1.127431,1.190189,1.154554,1.099829,1.211579,1.233689,...,0.886810,1.178338,1.148369,1.046878,1.253765,1.042368,0.864365,1.241214,1.202553,1.063910
623,2021-11-05,1.134466,1,1.170480,1.138592,1.201710,1.158483,1.103669,1.214822,1.232988,...,0.959526,1.250934,1.193300,1.088383,1.299385,1.057981,0.878518,1.259953,1.184128,1.048767
624,2021-11-06,1.108531,1,1.144740,1.112671,1.177019,1.149113,1.096954,1.203664,1.185835,...,1.038476,1.349256,1.168368,1.068022,1.273239,1.026039,0.848013,1.221395,1.125691,0.995863


### Split the data into train & test and shuffle

#### Import the relevant module

In [27]:
from sklearn.model_selection import train_test_split

### Split

In [28]:
train_test_split(unscaled_inputs, targets)

[           data  rt_95_inferior_nacional  rt_95_superior_nacional  \
 29   2020-03-21                 1.338600                        1   
 526  2021-07-31                 0.899853                        0   
 488  2021-06-23                 1.119538                        1   
 260  2020-11-07                 1.064584                        1   
 564  2021-09-07                 0.806951                        0   
 ..          ...                      ...                      ...   
 21   2020-03-13                 1.711068                        1   
 312  2020-12-29                 1.174079                        1   
 214  2020-09-22                 1.021506                        1   
 397  2021-03-24                 0.909111                        0   
 205  2020-09-13                 1.105351                        1   
 
      rt_continente  rt_95_inferior_continente  rt_95_superior_continente  \
 29        1.419328                   1.346497                   1.497696   
 526