In [2]:
import pandas as pd 
import numpy as np

# State-Level controls

In [55]:
path = "/Users/jpmvbastos/Documents/GitHub/COVIDBR/Data/"
df_list = ['population','stategdp','health_insurance','hospitalbeds','icu','population65','doctors'] 
for i in df_list:
    x = pd.read_excel(path+i+".xls")
    x = pd.melt(x, id_vars=['Sigla','Codigo','Estado'], var_name='Year', value_name=i) # Use pd.melt() to convert to long format
    x['Year'] = x['Year'].astype('int64')
    x.sort_values(['Estado', 'Year'], inplace=True)
    globals()[i] = x    

In [58]:
df = stategdp
for i in [population, health_insurance, hospitalbeds, icu, population65, doctors]:
    df = pd.merge(df, i, on=['Sigla','Codigo','Estado','Year'], how='left')
df.Year = df.Year.astype(int)
df['Estado'] = df['Estado'].apply(lambda x: x.upper())
df['gdp_pc'] = df['stategdp']/df['population'] * 1000
df['health_insurance'] = df['health_insurance']/df['population'] * 100
df.rename(columns={'Estado':'State','Sigla':'UF','Codigo':'Code'}, inplace=True)
df

Unnamed: 0,UF,Code,State,Year,stategdp,population,health_insurance,hospitalbeds,icu,population65,doctors,gdp_pc
0,AC,12,ACRE,2000,4.840506e+06,541873.0,,,,,,8932.915256
1,AC,12,ACRE,2001,5.024465e+06,574355.0,,,,,,8748.013593
2,AC,12,ACRE,2002,5.644717e+06,586942.0,,,,,,9617.162440
3,AC,12,ACRE,2003,5.623480e+06,600595.0,,,,,,9363.181929
4,AC,12,ACRE,2004,5.847496e+06,630328.0,,,,,,9276.909271
...,...,...,...,...,...,...,...,...,...,...,...,...
589,TO,17,TOCANTINS,2017,2.087139e+07,1550194.0,,17.136089,2.221897,122.0,11.813619,13463.726442
590,TO,17,TOCANTINS,2018,2.088626e+07,1555229.0,,18.103979,2.292267,137.0,12.151480,13429.698554
591,TO,17,TOCANTINS,2019,2.211289e+07,1572866.0,8.019564,19.356385,2.342857,132.0,12.166220,14058.977552
592,TO,17,TOCANTINS,2020,2.303456e+07,1590248.0,,18.999657,3.312376,141.0,12.883735,14484.887991


In [68]:
cases = pd.read_csv('Data/COVIDCases.csv')
cases = cases.groupby(['UF','Ano_Semana'])[['CasosAcumulados','ObitosAcumulados','CasosNovos','ObitosNovos']].sum().reset_index()

# Use the Ano_Semana column to extract the year and week number
cases['Year'] = cases['Ano_Semana'].str.split('/').str[1]
cases['Week'] = cases['Ano_Semana'].str.split('/').str[0]
cases.drop('Ano_Semana', axis=1, inplace=True)

cases = cases.set_index(['UF','Year','Week']).reset_index().sort_values(['UF','Year','Week'], ascending=[True,True,True])
cases.head(10)

Unnamed: 0,UF,Year,Week,CasosAcumulados,ObitosAcumulados,CasosNovos,ObitosNovos
0,AC,2024,1,167530,2070,1485,3
1,AC,2024,2,167700,2070,170,0
2,AC,2024,3,168012,2075,312,5
3,AC,2024,4,168012,2075,0,0
4,AC,2024,5,168012,2075,0,0
5,AC,2024,6,168012,2075,0,0
6,AC,2024,7,168368,2075,356,0
7,AL,2024,1,344445,7318,859,1
8,AL,2024,2,345246,7319,801,1
9,AL,2024,3,345750,7323,504,4


Unnamed: 0,UF,Year,Week,CasosAcumulados,ObitosAcumulados,CasosNovos,ObitosNovos


# Economic Freedom Data

In [35]:
ef = pd.read_excel('Data/Serie_historica_2003-2019_subdimensoes_do_IMLEE.xlsx')
ef = pd.melt(ef, id_vars=['ID','UF'], var_name='Year', value_name='IMLEE').sort_values(['UF','Year'])

# Rename UF as State and replace its values 
ef.rename(columns={'UF':'State','ID':'Code'}, inplace=True)

brazilian_states = {
    'ACRE': 'AC',
    'ALAGOAS': 'AL',
    'AMAPÁ': 'AP',
    'AMAZONAS': 'AM',
    'BAHIA': 'BA',
    'CEARÁ': 'CE',
    'DISTRITO FEDERAL': 'DF',
    'ESPÍRITO SANTO': 'ES',
    'GOIÁS': 'GO',
    'MARANHÃO': 'MA',
    'MATO GROSSO': 'MT',
    'MATO GROSSO DO SUL': 'MS',
    'MINAS GERAIS': 'MG',
    'PARÁ': 'PA',
    'PARAÍBA': 'PB',
    'PARANÁ': 'PR',
    'PERNAMBUCO': 'PE',
    'PIAUÍ': 'PI',
    'RIO DE JANEIRO': 'RJ',
    'RIO GRANDE DO NORTE': 'RN',
    'RIO GRANDE DO SUL': 'RS',
    'RONDÔNIA': 'RO',
    'RORAIMA': 'RR',
    'SANTA CATARINA': 'SC',
    'SÃO PAULO': 'SP',
    'SERGIPE': 'SE',
    'TOCANTINS': 'TO'
}

ef['UF'] = ef['State'].map(brazilian_states)

#Keep only if year=2019
ef = ef[ef['Year'] >= 2018]

ef

Unnamed: 0,Code,State,Year,IMLEE,UF
390,12,ACRE,2018,7.15,AC
416,12,ACRE,2019,5.96,AC
442,12,ACRE,2020,3.11,AC
468,12,ACRE,2021,3.94,AC
391,27,ALAGOAS,2018,7.86,AL
...,...,...,...,...,...
491,35,SÃO PAULO,2021,6.03,SP
415,17,TOCANTINS,2018,7.55,TO
441,17,TOCANTINS,2019,4.93,TO
467,17,TOCANTINS,2020,3.16,TO


# Lockdown Stringency

In [49]:
# Import Lockdown data
ld = pd.read_excel('Data/Lockdown Stringency.xlsx')

#Keep only the first three columns
ld = ld.iloc[:,0:3]

#Rename columns
ld.rename(columns={'Lockdown Stringency Score':"LSS"}, inplace=True)
ld['State'] = ld['State'].str.upper()

#Remove spaces from the begining or end of values in State column
ld['State'] = ld['State'].str.strip()

#Remove spaces from the begining or end of values in State column
ld['State'] = ld['State'].str.strip()

# Include UF column
ld['UF'] = ld['State'].map(brazilian_states)

ld.head()

Unnamed: 0,State,Year,LSS,UF
0,ACRE,2020,2.461193,AC
1,ALAGOAS,2020,4.108177,AL
2,AMAPÁ,2020,3.426266,AP
3,AMAZONAS,2020,2.937432,AM
4,BAHIA,2020,3.516136,BA


In [60]:
# Merge datasets

data = pd.merge(df, ef, on=['UF','Year','State','Code'], how='left')
data = pd.merge(data, ld, on=['UF','Year','State'], how='left').sort_values(['UF','Year'])
data[data['Year'] >=2019].head()

Unnamed: 0,UF,Code,State,Year,stategdp,population,health_insurance,hospitalbeds,icu,population65,doctors,gdp_pc,IMLEE,LSS
19,AC,12,ACRE,2019,8782024.0,881935.0,1.758633,16.654667,1.859547,54.0,9.176791,9957.676969,5.96,
20,AC,12,ACRE,2020,8694794.0,894470.0,,17.613782,3.022274,58.0,9.801894,9720.609449,3.11,2.461193
21,AC,12,ACRE,2021,9977504.0,906876.0,,17.926008,4.493448,50.0,10.100609,11002.059299,3.94,4.041847
41,AL,27,ALAGOAS,2019,33129900.0,3337357.0,8.26789,17.17157,2.078769,302.0,11.002441,9926.986624,6.56,
42,AL,27,ALAGOAS,2020,33352690.0,3351543.0,,19.098501,2.765516,302.0,11.821022,9951.44473,3.01,4.108177


In [69]:
# Regress LSS on IMLEE using formula interface
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Create a model    
model = smf.ols('LSS ~ IMLEE', data=df) 

# Fit the model
results = model.fit()

# Print a summary of the model
print(results.summary())


                            OLS Regression Results                            
Dep. Variable:                    LSS   R-squared:                       0.037
Model:                            OLS   Adj. R-squared:                 -0.003
Method:                 Least Squares   F-statistic:                    0.9235
Date:                Mon, 04 Mar 2024   Prob (F-statistic):              0.346
Time:                        16:46:12   Log-Likelihood:                -29.336
No. Observations:                  26   AIC:                             62.67
Df Residuals:                      24   BIC:                             65.19
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      2.2656      1.400      1.619      0.1

In [16]:
population

Unnamed: 0,Sigla,Codigo,Estado,Year,population
0,AC,12,Acre,1992,428006
27,AC,12,Acre,1993,437495
54,AC,12,Acre,1994,446480
81,AC,12,Acre,1995,455242
108,AC,12,Acre,1997,500185
...,...,...,...,...,...
620,TO,17,Tocantins,2017,1550194
647,TO,17,Tocantins,2018,1555229
674,TO,17,Tocantins,2019,1572866
701,TO,17,Tocantins,2020,1590248
