### Notebook para estimar o modelo de efeitos aleatórios


In [34]:
import numpy as np
import pandas as pd
from linearmodels import PanelOLS
from linearmodels import RandomEffects

### Exemplo

In [21]:
from linearmodels.datasets import jobtraining


data = jobtraining.load()
year = pd.Categorical(data.year)
data = data.set_index(['fcode', 'year'])
data['year'] = year

In [10]:
import statsmodels.api as sm

exog_vars = ['grant', 'employ']
exog = sm.add_constant(data[exog_vars])

mod = RandomEffects(data.clscrap, exog)

re_res = mod.fit()
print(re_res)

                        RandomEffects Estimation Summary                        
Dep. Variable:                clscrap   R-squared:                        0.0165
Estimator:              RandomEffects   R-squared (Between):              0.0314
No. Observations:                 105   R-squared (Within):               0.0015
Date:                Sun, Jun 18 2023   R-squared (Overall):              0.0199
Time:                        19:46:00   Log-likelihood                   -77.721
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      0.8542
Entities:                          53   P-value                           0.4286
Avg Obs:                       1.9811   Distribution:                   F(2,102)
Min Obs:                       1.0000                                           
Max Obs:                       2.0000   F-statistic (robust):             0.8634
                            

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


Exemplo Efeitos Fixos

In [11]:
mod = PanelOLS(data.clscrap, exog)

re_res = mod.fit()

print(re_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:                clscrap   R-squared:                        0.0205
Estimator:                   PanelOLS   R-squared (Between):              0.0362
No. Observations:                 105   R-squared (Within):              -0.0048
Date:                Sun, Jun 18 2023   R-squared (Overall):              0.0205
Time:                        19:47:15   Log-likelihood                   -91.226
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      1.0653
Entities:                          53   P-value                           0.3484
Avg Obs:                       1.9811   Distribution:                   F(2,102)
Min Obs:                       1.0000                                           
Max Obs:                       2.0000   F-statistic (robust):             1.0653
                            

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


In [16]:
import statsmodels.formula.api as smf

data = jobtraining.load()
data['year'] = pd.Categorical(data.year)
FE_ols = smf.ols(formula='clscrap ~ 1 + grant + employ + C(fcode)', data = data).fit()
print(FE_ols.summary())

                            OLS Regression Results                            
Dep. Variable:                clscrap   R-squared:                       0.630
Model:                            OLS   Adj. R-squared:                  0.230
Method:                 Least Squares   F-statistic:                     1.577
Date:                Sun, 18 Jun 2023   Prob (F-statistic):             0.0529
Time:                        19:48:55   Log-Likelihood:                -40.109
No. Observations:                 105   AIC:                             190.2
Df Residuals:                      50   BIC:                             336.2
Df Model:                          54                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
Intercept             -0.0558      0

### Modelo

#### Efeitos fixos

In [89]:
covid = pd.read_parquet('../Dados/pnad_covid/pnad_covid_balanced_panel.parquet')

In [90]:
#for every portfolio, fill mising values with first value of sigla_uf

covid['sigla_uf'] = covid['sigla_uf'].fillna(method='ffill')

covid['C013'] = covid['C013'].fillna(method='ffill')


In [72]:
data_flex = pd.read_excel('../Anexos/dummy_flex_completa.xlsx')

In [73]:
#use pd.melt to transform wide format to long format

data_flex = pd.melt(data_flex, id_vars=['UF'], value_vars=data_flex.columns[1:].to_list())

data_flex = data_flex.rename(columns={'variable':'semana', 'value':'flex'})

data_flex['semana'] = pd.to_numeric(data_flex['semana'])

data_flex.sort_values(by=['UF', 'semana'], inplace=True)

In [92]:
covid.reset_index(inplace=True)

In [94]:
#merge data_flex with covid matching UF and semana

covid = covid.merge(data_flex, left_on=['sigla_uf', 'date'], right_on=['UF', 'semana'], how='left')

In [96]:
covid.set_index(['sigla_uf', 'date'], inplace=True)

In [97]:
covid

Unnamed: 0_level_0,Unnamed: 1_level_0,portfolio_id,C013,UF,semana,flex
sigla_uf,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RO,18,1100000161Mulher221974,,RO,18,0
RO,19,1100000161Mulher221974,,RO,19,0
RO,20,1100000161Mulher221974,,RO,20,0
RO,21,1100000161Mulher221974,,RO,21,0
RO,22,1100000161Mulher221974,,RO,22,0
...,...,...,...,...,...,...
DF,41,5300269062Homem17122010,Sim,DF,41,1
DF,42,5300269062Homem17122010,Sim,DF,42,1
DF,43,5300269062Homem17122010,Sim,DF,43,1
DF,44,5300269062Homem17122010,Sim,DF,44,1


In [98]:
import statsmodels.formula.api as smf

FE_ols = smf.ols(formula='C013 ~ 1 + C(flex)', data = covid).fit()
print(FE_ols.summary())

ValueError: endog has evaluated to an array with multiple columns that has shape (13079966, 2). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).

In [19]:
import statsmodels.api as sm

exog_vars = ['A003', 'A004', 'A005']
exog = sm.add_constant(covid[exog_vars])

mod = RandomEffects(endog, exog)

re_res = mod.fit()
print(re_res)

                        RandomEffects Estimation Summary                        
Dep. Variable:                    Sim   R-squared:                        0.1738
Estimator:              RandomEffects   R-squared (Between):              0.4889
No. Observations:              113093   R-squared (Within):              -0.0406
Date:                Sun, Jun 18 2023   R-squared (Overall):              0.4856
Time:                        20:03:15   Log-likelihood                 9.424e+04
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      1829.5
Entities:                       36558   P-value                           0.0000
Avg Obs:                       3.0935   Distribution:               F(13,113079)
Min Obs:                       1.0000                                           
Max Obs:                       14.000   F-statistic (robust):             2424.4
                            