In [1]:
import pandas as pd 
import numpy as np

In [13]:
cases = pd.read_csv('Data/COVIDCases.csv')
cases = cases.groupby(['UF','Ano_Semana'])[['CasosAcumulados','ObitosAcumulados','CasosNovos','ObitosNovos']].sum().reset_index()

# Use the Ano_Semana column to extract the year and week number
cases['Year'] = cases['Ano_Semana'].str.split('/').str[1]
cases['Week'] = cases['Ano_Semana'].str.split('/').str[0]
cases.drop('Ano_Semana', axis=1, inplace=True)

cases.set_index(['UF','Year','Week'], inplace=True)

cases

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,CasosAcumulados,ObitosAcumulados,CasosNovos,ObitosNovos
UF,Year,Week,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AC,2024,1,167530,2070,1485,3
AC,2024,2,167700,2070,170,0
AC,2024,3,168012,2075,312,5
AC,2024,4,168012,2075,0,0
AC,2024,5,168012,2075,0,0
...,...,...,...,...,...,...
TO,2024,3,373187,4259,0,0
TO,2024,4,375220,4262,2033,3
TO,2024,5,376330,4266,1110,4
TO,2024,6,377571,4267,1242,1


In [59]:
df = pd.read_excel('Data/Serie_historica_2003-2019_subdimensoes_do_IMLEE.xlsx')
df = pd.melt(df, id_vars=['ID','UF'], var_name='Year', value_name='IMLEE').sort_values(['UF','Year'])

# Rename UF as State and replace its values 
df.rename(columns={'UF':'State'}, inplace=True)

brazilian_states = {
    'ACRE': 'AC',
    'ALAGOAS': 'AL',
    'AMAPÁ': 'AP',
    'AMAZONAS': 'AM',
    'BAHIA': 'BA',
    'CEARÁ': 'CE',
    'DISTRITO FEDERAL': 'DF',
    'ESPÍRITO SANTO': 'ES',
    'GOIÁS': 'GO',
    'MARANHÃO': 'MA',
    'MATO GROSSO': 'MT',
    'MATO GROSSO DO SUL': 'MS',
    'MINAS GERAIS': 'MG',
    'PARÁ': 'PA',
    'PARAÍBA': 'PB',
    'PARANÁ': 'PR',
    'PERNAMBUCO': 'PE',
    'PIAUÍ': 'PI',
    'RIO DE JANEIRO': 'RJ',
    'RIO GRANDE DO NORTE': 'RN',
    'RIO GRANDE DO SUL': 'RS',
    'RONDÔNIA': 'RO',
    'RORAIMA': 'RR',
    'SANTA CATARINA': 'SC',
    'SÃO PAULO': 'SP',
    'SERGIPE': 'SE',
    'TOCANTINS': 'TO'
}

df['UF'] = df['State'].map(brazilian_states)

#Keep only if year=2019
df = df[df['Year'] == 2019]

df

Unnamed: 0,ID,State,Year,IMLEE,UF
417,12,ACRE,2019,5.958055,AC
429,27,ALAGOAS,2019,6.555318,AL
421,16,AMAPÁ,2019,6.202433,AP
418,13,AMAZONAS,2019,5.813226,AM
431,29,BAHIA,2019,6.853002,BA
425,23,CEARÁ,2019,6.598357,CE
433,32,ESPÍRITO SANTO,2019,7.285271,ES
441,52,GOIÁS,2019,6.131939,GO
423,21,MARANHÃO,2019,6.723661,MA
440,51,MATO GROSSO,2019,5.515078,MT


In [66]:
# Import Lockdown data
ld = pd.read_excel('Data/Lockdown Stringency.xlsx')

#Keep only the first two columns
ld = ld.iloc[:,0:2]

#Rename columns
ld.rename(columns={'Lockdown Stringency Score':"LSS"}, inplace=True)
ld['State'] = ld['State'].str.upper()

#Remove spaces from the begining or end of values in State column
ld['State'] = ld['State'].str.strip()

# Merge the lockdown and IMLEE dataframes
df = pd.merge(df, ld, on='State', how='left')

df


Unnamed: 0,ID,State,Year,IMLEE,UF,LSS
0,12,ACRE,2019,5.958055,AC,2.461193
1,27,ALAGOAS,2019,6.555318,AL,4.108177
2,16,AMAPÁ,2019,6.202433,AP,3.426266
3,13,AMAZONAS,2019,5.813226,AM,2.937432
4,29,BAHIA,2019,6.853002,BA,3.516136
5,23,CEARÁ,2019,6.598357,CE,3.407203
6,32,ESPÍRITO SANTO,2019,7.285271,ES,3.183211
7,52,GOIÁS,2019,6.131939,GO,4.612949
8,21,MARANHÃO,2019,6.723661,MA,5.717593
9,51,MATO GROSSO,2019,5.515078,MT,2.685866


In [69]:
# Regress LSS on IMLEE using formula interface
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Create a model    
model = smf.ols('LSS ~ IMLEE', data=df) 

# Fit the model
results = model.fit()

# Print a summary of the model
print(results.summary())


                            OLS Regression Results                            
Dep. Variable:                    LSS   R-squared:                       0.037
Model:                            OLS   Adj. R-squared:                 -0.003
Method:                 Least Squares   F-statistic:                    0.9235
Date:                Mon, 04 Mar 2024   Prob (F-statistic):              0.346
Time:                        16:46:12   Log-Likelihood:                -29.336
No. Observations:                  26   AIC:                             62.67
Df Residuals:                      24   BIC:                             65.19
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      2.2656      1.400      1.619      0.1