In [1]:
import numpy as np
from scipy.stats import norm, gaussian_kde
import matplotlib.pyplot as plt
import pandas as pd
from linearmodels.panel.data import PanelData
from linearmodels.panel import PanelOLS, PooledOLS, RandomEffects, compare
from collections import OrderedDict
import wooldridge
from statsmodels.formula.api import ols

# 警告メッセージを非表示
import warnings
warnings.filterwarnings("ignore")

In [2]:
# url の設定
url = 'https://raw.githubusercontent.com/Haruyama-KobeU/Haruyama-KobeU.github.io/master/data/data4.csv'

# 読み込み
df = pd.read_csv(url)
df = df.set_index(['country', 'year']).sort_index()

In [10]:
df_panel = PanelData(df)

df_panel.isnull

country    year
Australia  2000    False
           2001    False
           2002     True
Japan      2000    False
           2001    False
           2002    False
UK         2000    False
           2001    False
           2002     True
dtype: bool

In [9]:
df_panel.first_difference()

Unnamed: 0_level_0,Unnamed: 1_level_0,gdp,inv,con,pop
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Australia,2001,10.0,10.0,30.0,1.0
Japan,2001,-5.0,5.0,-10.0,1.0
Japan,2002,-2.0,-4.0,2.0,1.0
UK,2001,10.0,9.0,1.0,1.0


In [12]:
formula = 'lcrmrte ~ d82 + d83 + d84 + d85 + d86 + d87 + lprbarr + \
                lprbconv + lprbpris + lavgsen + lpolpc'


mod_dif = FirstDifferenceOLS.from_formula(formula,\
                                           data=wooldridge.data('crime4').set_index(['county','year']))

In [13]:
res_dif = mod_dif.fit()

print(res_dif.summary)

                     FirstDifferenceOLS Estimation Summary                      
Dep. Variable:                lcrmrte   R-squared:                        0.4326
Estimator:         FirstDifferenceOLS   R-squared (Between):              0.7834
No. Observations:                 540   R-squared (Within):               0.4281
Date:                Fri, Aug 04 2023   R-squared (Overall):              0.7826
Time:                        15:00:04   Log-likelihood                    248.48
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      36.661
Entities:                          90   P-value                           0.0000
Avg Obs:                       7.0000   Distribution:                  F(11,529)
Min Obs:                       7.0000                                           
Max Obs:                       7.0000   F-statistic (robust):             36.661
                            

# 固定効果モデルについて考える
- あとランダムと相関ランダムモデルも

In [18]:
wagepan = wooldridge.data('wagepan')
# wooldridge.data('wagepan', description=True)

wagepan = wooldridge.data('wagepan').\
    set_index(['nr','year'],drop=False)

wagepan = PanelData(wagepan)

wagepan

Unnamed: 0_level_0,Unnamed: 1_level_0,nr,year,agric,black,bus,construc,ent,exper,fin,hisp,...,union,lwage,d81,d82,d83,d84,d85,d86,d87,expersq
nr,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
13,1980,13.0,1980.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.197540,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
13,1981,13.0,1981.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,...,1.0,1.853060,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
13,1982,13.0,1982.0,0.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,...,0.0,1.344462,0.0,1.0,0.0,0.0,0.0,0.0,0.0,9.0
13,1983,13.0,1983.0,0.0,0.0,1.0,0.0,0.0,4.0,0.0,0.0,...,0.0,1.433213,0.0,0.0,1.0,0.0,0.0,0.0,0.0,16.0
13,1984,13.0,1984.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,...,0.0,1.568125,0.0,0.0,0.0,1.0,0.0,0.0,0.0,25.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12548,1983,12548.0,1983.0,0.0,0.0,0.0,1.0,0.0,8.0,0.0,0.0,...,0.0,1.591879,0.0,0.0,1.0,0.0,0.0,0.0,0.0,64.0
12548,1984,12548.0,1984.0,0.0,0.0,0.0,1.0,0.0,9.0,0.0,0.0,...,1.0,1.212543,0.0,0.0,0.0,1.0,0.0,0.0,0.0,81.0
12548,1985,12548.0,1985.0,0.0,0.0,0.0,1.0,0.0,10.0,0.0,0.0,...,0.0,1.765962,0.0,0.0,0.0,0.0,1.0,0.0,0.0,100.0
12548,1986,12548.0,1986.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,...,1.0,1.745894,0.0,0.0,0.0,0.0,0.0,1.0,0.0,121.0


In [22]:
formula_fe = 'lwage ~ married + union + expersq \
                      +d81+d82+d83+d84+d85+d86+d87 + EntityEffects'

mod_fe = PanelOLS.from_formula(formula_fe, data=wagepan)
result_fe = mod_fe.fit()

print(result_fe.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:                  lwage   R-squared:                        0.1806
Estimator:                   PanelOLS   R-squared (Between):              0.2386
No. Observations:                4360   R-squared (Within):               0.1806
Date:                Fri, Aug 04 2023   R-squared (Overall):              0.2361
Time:                        15:54:56   Log-likelihood                   -1324.8
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      83.851
Entities:                         545   P-value                           0.0000
Avg Obs:                       8.0000   Distribution:                 F(10,3805)
Min Obs:                       8.0000                                           
Max Obs:                       8.0000   F-statistic (robust):             83.851
                            

推定

exper，educ，black，hispを加えて回帰式を定義する。

（注意）

時間ダミー変数の代わりにTimeEffectsを使わないように。入れることができますが，そのような仕様になっていません。

In [24]:
formula_re = 'lwage ~ 1 + married + union + expersq \
                        + exper + educ + black + hisp \
                        +d81+d82+d83+d84+d85+d86+d87'

result_re = RandomEffects.from_formula(formula_re, data=wagepan).fit()
print(result_re.summary.tables[1])

result_re.variance_decomposition

                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.0234     0.1514     0.1546     0.8771     -0.2735      0.3203
married        0.0638     0.0168     3.8035     0.0001      0.0309      0.0967
union          0.1059     0.0179     5.9289     0.0000      0.0709      0.1409
expersq       -0.0047     0.0007    -6.8623     0.0000     -0.0061     -0.0034
exper          0.1058     0.0154     6.8706     0.0000      0.0756      0.1361
educ           0.0919     0.0107     8.5744     0.0000      0.0709      0.1129
black         -0.1394     0.0480    -2.9054     0.0037     -0.2334     -0.0453
hisp           0.0217     0.0428     0.5078     0.6116     -0.0622      0.1057
d81            0.0404     0.0247     1.6362     0.1019     -0.0080      0.0889
d82            0.0309     0.0324     0.9519     0.34

Effects                   0.106946
Residual                  0.123324
Percent due to Effects    0.464438
Name: Variance Decomposition, dtype: float64