In [2]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as sm

In [3]:
df_pense_raw = pd.read_csv('PENSE2019_MICRODADOS.csv')

In [4]:
df_pense = df_pense_raw[['B01001A', 'B01003', 'B01002', 'B01021A', 'B01006', 'B01007',
                         'B04001', 'B04003', 'B04006B', 'B05002A', 'B06001', 'B12003', 'B12006', 'B12008']]

df_pense = df_pense.rename(columns = {'B01001A': 'sexo',
                                      'B01003': 'idade',
                                      'B01002': 'cor_raca',
                                      'B01021A': 'ano_esc',
                                      'B01006': 'mora_mae',
                                      'B01007': 'mora_pai',
                                      'B04001': 'cigs',
                                      'B04003': 'cigs_dias',
                                      'B04006B': 'cigs_resp',
                                      'B05002A': 'bebs',
                                      'B06001': 'drgs',
                                      'B12003': 'ment_amgs',
                                      'B12006': 'ment_sol',
                                      'B12008': 'ment_suic'})

df_pense = df_pense.dropna(subset=['idade'])

df_pense.head()

Unnamed: 0,sexo,idade,cor_raca,ano_esc,mora_mae,mora_pai,cigs,cigs_dias,cigs_resp,bebs,drgs,ment_amgs,ment_sol,ment_suic
0,2.0,1.0,4.0,2.0,1.0,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,4.0,4.0,3.0
1,2.0,2.0,4.0,3.0,1.0,2.0,2.0,-1.0,1.0,1.0,2.0,4.0,1.0,3.0
2,1.0,1.0,4.0,2.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,4.0,1.0,1.0
3,1.0,1.0,4.0,2.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,4.0,5.0,1.0
4,1.0,2.0,1.0,3.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,4.0,1.0,3.0


In [5]:
#Dummy p/ homem = 1
df_pense['d_homem'] = np.where(df_pense['sexo'] == 1, 1, 0)
df_pense['d_homem'] = df_pense['d_homem'].where(df_pense['sexo'] != 9, np.nan)

#Dummy p/ idade = 1 se >= 16
df_pense['d_idade'] = np.where((df_pense['idade'] == 3) | (df_pense['idade'] == 4), 1, 0)
df_pense['d_idade'] = df_pense['d_idade'].where(df_pense['idade'] != 9, np.nan)

#Dummy p/ branco = 1
df_pense['d_branco'] = np.where(df_pense['cor_raca'] == 1, 1, 0)
df_pense['d_branco'] = df_pense['d_branco'].where((df_pense['cor_raca'] != 9) &
                                                  (df_pense['cor_raca'] != -2), np.nan)

#Dummy p/ ensino médio = 1
df_pense['d_med'] = np.where((df_pense['ano_esc'] >= 5) & (df_pense['ano_esc'] <= 7), 1, 0)
df_pense['d_med'] = df_pense['d_med'].where((df_pense['ano_esc'] != 9) &
                                            (df_pense['ano_esc'] != -2), np.nan)

#Dummy p/ lar tradicional (mora com mãe e pai) = 1
df_pense['d_lar'] = np.where((df_pense['mora_mae'] == 1) & (df_pense['mora_pai'] == 1), 1, 0)
df_pense['d_lar'] = df_pense['d_lar'].where((df_pense['mora_mae'] != 9) &
                                            (df_pense['mora_mae'] != -2), np.nan)
df_pense['d_lar'] = df_pense['d_lar'].where((df_pense['mora_pai'] != 9) &
                                            (df_pense['mora_pai'] != -2), np.nan)

#Dummy p/ uso de cigarro = 1
df_pense['d_cigs'] = np.where(df_pense['cigs'] == 1, 1, 0)
df_pense['d_cigs'] = df_pense['d_cigs'].where((df_pense['cigs'] != -2) &
                                              (df_pense['cigs'] != -1) &
                                              (df_pense['cigs'] != 9), np.nan)

#Dummy p/ uso severo de cigarro (mais de 20 dias por mês) = 1
df_pense['d_tbgs'] = np.where((df_pense['cigs_dias'] == 5) | (df_pense['cigs_dias'] == 7), 1, 0)
df_pense['d_tbgs'] = df_pense['d_tbgs'].where((df_pense['cigs_dias'] != -2) &
                                              (df_pense['cigs_dias'] != -1) &
                                              (df_pense['cigs_dias'] != 9), np.nan)

#Dummy p/ responsável tabagista = 1
df_pense['d_resp_tbgs'] = np.where((df_pense['cigs_resp'] >= 2) & (df_pense['cigs_resp'] <= 4), 1, 0)
df_pense['d_resp_tbgs'] = df_pense['d_resp_tbgs'].where((df_pense['cigs_resp'] != -2) &
                                                        (df_pense['cigs_resp'] != -1) &
                                                        (df_pense['cigs_resp'] != 9), np.nan)

#Dummy p/ consumo de bebida alcóolica = 1
df_pense['d_alcool'] = np.where((df_pense['bebs'] == 1), 1, 0)
df_pense['d_alcool'] = df_pense['d_alcool'].where((df_pense['bebs'] != -2) &
                                                  (df_pense['bebs'] != -1) &
                                                  (df_pense['bebs'] != 9), np.nan)

#Dummy p/ outras drogas = 1
df_pense['d_drgs'] = np.where((df_pense['drgs'] == 1), 1, 0)
df_pense['d_drgs'] = df_pense['d_drgs'].where((df_pense['drgs'] != -2) &
                                              (df_pense['drgs'] != -1) &
                                              (df_pense['drgs'] != 9), np.nan)

#Dummy p/ sem amigos = 1
df_pense['d_amgs'] = np.where((df_pense['ment_amgs'] == 1), 1, 0)
df_pense['d_amgs'] = df_pense['d_amgs'].where((df_pense['ment_amgs'] != -2) &
                                              (df_pense['ment_amgs'] != 9), np.nan)

#Dummy p/ solidão = 1
df_pense['d_sold'] = np.where((df_pense['ment_sol'] == 4) | (df_pense['ment_sol'] == 5), 1, 0)
df_pense['d_sold'] = df_pense['d_sold'].where((df_pense['ment_sol'] != -2) &
                                              (df_pense['ment_sol'] != 9), np.nan)

#Dummy p/ mentalidade suicida = 1
df_pense['d_suic'] = np.where((df_pense['ment_suic'] >= 3) & (df_pense['ment_suic'] <= 5), 1, 0)
df_pense['d_suic'] = df_pense['d_suic'].where((df_pense['ment_suic'] != -2) &
                                              (df_pense['ment_suic'] != 9), np.nan)

df_pense_regs = df_pense[['d_homem', 'd_idade', 'd_branco', 'd_med', 'd_lar', 'd_cigs', 'd_tbgs', 'd_resp_tbgs', 'd_alcool', 'd_drgs', 'd_amgs', 'd_sold', 'd_suic']]
df_pense_regs.head()

Unnamed: 0,d_homem,d_idade,d_branco,d_med,d_lar,d_cigs,d_tbgs,d_resp_tbgs,d_alcool,d_drgs,d_amgs,d_sold,d_suic
0,0.0,0.0,0.0,0.0,0.0,,,,,,0.0,1.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,1.0
2,1.0,0.0,0.0,0.0,1.0,,,,,,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,1.0,,,,,,0.0,1.0,0.0
4,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0


In [None]:
#Estimando o modelo logístico para uso de cigarro
X = df_pense[['d_homem', 'd_idade', 'd_branco', 'd_med', 'd_lar', 'd_resp_tbgs', 'd_alcool', 'd_drgs', 'd_amgs', 'd_sold', 'd_suic']]
Y = df_pense[['d_cigs']]

reg = sm.logit('Y ~ X', data=df_pense).fit()

print(reg.summary())
print(np.exp(reg.params))

In [6]:
#Estimando o mdoelo logístico para uso severo de cigarro (fuma mais de 20 dias por mês)
X = df_pense[['d_homem', 'd_idade', 'd_branco', 'd_med', 'd_lar', 'd_resp_tbgs', 'd_alcool', 'd_drgs', 'd_amgs', 'd_sold', 'd_suic']]
Y = df_pense[['d_tbgs']]

reg = sm.logit('Y ~ X', data=df_pense).fit()

print(reg.summary())
print(np.exp(reg.params))

Optimization terminated successfully.
         Current function value: 0.206056
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:                      Y   No. Observations:                27401
Model:                          Logit   Df Residuals:                    27389
Method:                           MLE   Df Model:                           11
Date:                Wed, 25 Sep 2024   Pseudo R-squ.:                 0.08969
Time:                        08:30:50   Log-Likelihood:                -5646.1
converged:                       True   LL-Null:                       -6202.5
Covariance Type:            nonrobust   LLR p-value:                1.076e-231
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -4.3311      0.167    -25.870      0.000      -4.659      -4.003
X[0]           0.6902      0.

In [None]:
#Estimando o modelo p/ outras drogas
X = df_pense[['d_homem', 'd_idade', 'd_branco', 'd_med', 'd_lar', 'd_resp_tbgs', 'd_alcool', 'd_tbgs', 'd_amgs', 'd_sold', 'd_suic']]
Y = df_pense[['d_drgs']]

reg = sm.logit('Y ~ X', data=df_pense).fit()

print(reg.summary())
print(np.exp(reg.params))