### Regressão de dados em painel 

Pacotes

In [3]:
import pandas as pd
import numpy as np
from itertools import product

# Modelo
import  statsmodels.api as sm 
from linearmodels.panel import PanelOLS

In [4]:
panel_data = pd.read_parquet('../Dados/pnad_covid/pnad_covid_tratada.parquet')

In [5]:
panel_data.head()

Unnamed: 0,Ano,V1013,V1012,UPA,Estrato,V1008,V1016,A003,A001B1,A001B2,...,17,19,20,21,22,23,24,25,27,28
0,2020,5,1,110000016,1110011,1,1,0,2,2,...,False,False,False,False,False,False,True,False,False,False
1,2020,5,1,110000016,1110011,1,1,0,15,2,...,False,False,False,False,False,False,True,False,False,False
2,2020,5,1,110000016,1110011,1,1,0,24,1,...,False,False,False,False,False,False,True,False,False,False
3,2020,5,1,110000016,1110011,1,1,1,29,12,...,False,False,False,False,False,False,True,False,False,False
4,2020,5,4,110000016,1110011,10,1,1,18,6,...,False,False,False,False,False,False,True,False,False,False


In [6]:
panel_data.columns

Index(['Ano', 'V1013', 'V1012', 'UPA', 'Estrato', 'V1008', 'V1016', 'A003',
       'A001B1', 'A001B2', 'A001B3', 'A004', 'A002', 'A005', 'UF', 'C007',
       'C007B', 'C007D', 'C001', 'C002', 'C003', 'C013', 'F006',
       'domicilio_id', 'grupo_etario', 'formalidade', 'setor', 'Ocupação',
       'data_formatada', 'semana_dt', 'sigla_uf', '12', '15', '17', '19', '20',
       '21', '22', '23', '24', '25', '27', '28'],
      dtype='object')

variáveis de identificação:
- semana_dt = semana de referencia
- domicilio_id = identificação do domicilio

variáveis exógenas:
- A003, sexo
- A004, cor
- A005, escolaridade
- grupo_etario
- formalidade
- setor
- Ocupação
- 12, 15, 17, 19, 20, 21, 22, 23, 24, 25, 27, 28, datas (semana de flexibilização em formato dummy)

variável endógena:
C013, trabalho remoto

In [7]:
panel_data = panel_data[['semana_dt', 'domicilio_id', #indetificadores
                         'C013', #endógena
                         'A003', 'A004', 'A005', 'grupo_etario', 'formalidade', 'setor', 'Ocupação', #exógenas
                         '12', '15', '17', '19', '20', '21', '22', '23', '24', '25', '27', '28']]

In [8]:
panel_data.shape

(2508686, 22)

In [9]:
panel_data.dropna(subset='C013', inplace=True)

In [10]:
panel_data.shape

(113093, 22)

### Balanceamento do modelo

In [11]:
# correcting unbalanced data (like ids missing dates along the dataset)
 
ids = list(panel_data['domicilio_id'].unique())
dates = list(panel_data['semana_dt'].unique())

In [12]:
indexes = pd.MultiIndex.from_product(iterables = [ids, dates],names = ['id', 'time'])

In [13]:
balanced_panel = product(ids, dates)

In [14]:
df_aux = pd.DataFrame(data = balanced_panel, columns = ['portfolio_id', 'date'],index= indexes)

In [15]:
#merge the balanced panel with the original dataset

panel_data = pd.merge(df_aux, panel_data, how = 'left', left_on = ['portfolio_id', 'date'], right_on = ['domicilio_id', 'semana_dt'])

In [16]:
#set multiindex to the dataset with portfolio_id and date

panel_data.set_index(['portfolio_id', 'date'], inplace = True)

### Regressão

In [22]:
exog_variables

Unnamed: 0_level_0,Unnamed: 1_level_0,A003,A004,A005,grupo_etario,formalidade,setor,Ocupação,12,15,17,19,20,21,22,23,24,25,27,28
portfolio_id,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
11000001613Homem1211980,18,1.0,1.0,Superior completo,,Militar,,Pessoa ocupada não afastada,False,False,False,False,False,False,False,False,True,False,False,False
11000001613Homem1211980,20,,,,,,,,,,,,,,,,,,,
11000001613Homem1211980,19,,,,,,,,,,,,,,,,,,,
11000001613Homem1211980,21,,,,,,,,,,,,,,,,,,,
11000001613Homem1211980,24,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5300298004Homem2271983,39,,,,,,,,,,,,,,,,,,,
5300298004Homem2271983,42,1.0,0.0,Fundamental completa,,Informal,Outros,Pessoa ocupada não afastada,False,False,False,False,True,False,False,False,False,False,False,False
5300298004Homem2271983,44,,,,,,,,,,,,,,,,,,,
5300298004Homem2271983,45,,,,,,,,,,,,,,,,,,,


In [29]:
panel_data['A005']

portfolio_id             date
11000001613Homem1211980  18         Superior completo
                         20                       NaN
                         19                       NaN
                         21                       NaN
                         24                       NaN
                                         ...         
5300298004Homem2271983   39                       NaN
                         42      Fundamental completa
                         44                       NaN
                         45                       NaN
                         43                       NaN
Name: A005, Length: 1023662, dtype: category
Categories (8, object): ['Sem instrução' < 'Fundamental incompleto' < 'Fundamental completa' < 'Médio incompleto' < 'Médio completo' < 'Superior incompleto' < 'Superior completo' < 'Pós-graduação, mestrado ou doutorado']

In [30]:
#Variáveis explicativas do modelo
# Adicionando uma constante
exog_variables = panel_data[['A003', 'A004', 'A005', 'grupo_etario', 'formalidade', 'setor', 'Ocupação', '12', '15', '17', '19', '20', '21', '22', '23', '24', '25', '27', '28']]

In [24]:
exog_variables.shape

(1023662, 18)

In [36]:
# estimating the fixed effects model
f_effects_model = PanelOLS( dependent = panel_data.C013, # variável endógena (Y)
exog = exog_variables, # exogenous variables (X matrix)
entity_effects = True, # True -> Fixed Effects
check_rank = False,  # True -> Balanced Panel
drop_absorbed=True)

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


In [35]:
f_effects_results = f_effects_model.fit()

AbsorbingEffectError: 
The model cannot be estimated. The included effects have fully absorbed
one or more of the variables. This occurs when one or more of the dependent
variable is perfectly explained using the effects included in the model.

The following variables or variable combinations have been fully absorbed
or have become perfectly collinear after effects are removed:

          A003
          grupo_etario.20-2930-3940-4950-5960-6970-7980 ou mais
          12
          15
          17
          19
          20
          21
          22
          23
          24
          25
          27
          28

Set drop_absorbed=True to automatically drop absorbed variables.


In [33]:
f_effects_results.summary

0,1,2,3
Dep. Variable:,C013.Não,R-squared:,0.0007
Estimator:,PanelOLS,R-squared (Between):,-0.0341
No. Observations:,113093,R-squared (Within):,0.0007
Date:,"Wed, Jun 14 2023",R-squared (Overall):,-0.0355
Time:,00:48:45,Log-likelihood,1.181e+05
Cov. Estimator:,Unadjusted,,
,,F-statistic:,6.3584
Entities:,36558,P-value,0.0000
Avg Obs:,3.0935,Distribution:,"F(8,76527)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
A004,0.0258,0.0070,3.7107,0.0002,0.0122,0.0394
A005.Fundamental incompleto,0.0041,0.0261,0.1559,0.8762,-0.0471,0.0552
A005.Fundamental completa,-0.0006,0.0294,-0.0220,0.9824,-0.0583,0.0570
A005.Médio incompleto,-0.0255,0.0291,-0.8765,0.3807,-0.0826,0.0316
A005.Médio completo,-0.0374,0.0289,-1.2919,0.1964,-0.0941,0.0193
A005.Superior incompleto,-0.0739,0.0296,-2.4912,0.0127,-0.1320,-0.0158
A005.Superior completo,-0.0579,0.0293,-1.9787,0.0478,-0.1153,-0.0005
"A005.Pós-graduação, mestrado ou doutorado",-0.0527,0.0295,-1.7851,0.0742,-0.1106,0.0052


---

Efeito do 2º choque - Flexibilização