In [10]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [2]:
sv1849 = [283, 157, 192, 249, 259, 226, 352, 97, 111, 8, 235, 92]
lsv1849 = [256, 267, 312, 257, 318, 446, 143, 193, 243, 215, 544, 187, 153, 81, 113, 176]
sv1854 = [371, 161, 148, 362, 244, 237, 282, 59, 171, 9, 240, 174]
lsv1854 = [113, 174, 270, 93, 210, 388, 92, 58, 117, 49, 193, 303, 142, 48, 165, 132]

### 前後比較結果を比較する

In [36]:
a = sum(sv1854) - sum(sv1849) # sv対象地域の固有の効果を消す
b = sum(lsv1854) - sum(lsv1849) # lsv対象地域の固有の効果を消す
print(b - a) # 平行トレンド過程でどちらの地域も時間による固有の効果は同じと仮定し、その効果を消す

-1554

### 回帰分析で効果検証

In [38]:
JS_sum = pd.DataFrame({
    "y" : [sum(sv1849), sum(lsv1849), sum(sv1854), sum(lsv1854)],
    "lsv" : [0,1,0,1], # 地域の固有の効果を表している
    "d54" : [0,0,1,1], # 時間による固有の効果を表している
    "lsv*d54":[0,0,0,1] # 1854年に介入があった地域のデータであることを示す(知りたい効果量)
})
"""
sum(lsv1854) - sum(lsv1849) = (1*lsv + 1*d54 + 1*lsv*d54) - (1*lsv + 0*d54 + 0*lsv*d54)
                            = (1*d54 + 1*lsv*d54) # 地域固有の効果が消えた
sum(sv1854) - sum(sv1849) = (0*lsv + 1*d54 + 0*lsv*d54) - (0*lsv + 0*d54 + 0*lsv*d54)
                            = (1*d54) # 地域固有の効果が消えた
(1*d54 + 1*lsv*d54) - (1*d54) = 1*lsv*d54 # なのでlsv*d54の係数が効果量を表している
"""

JS_sum.head()

Unnamed: 0,y,lsv,d54,lsv*d54
0,2261,0,0,0
1,3904,1,0,0
2,2458,0,1,0
3,2547,1,1,1


In [39]:
y = JS_sum[["y"]]
X = JS_sum[["lsv", "d54", "lsv*d54"]]
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,1.0
Model:,OLS,Adj. R-squared:,
Method:,Least Squares,F-statistic:,0.0
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,
Time:,21:58:44,Log-Likelihood:,104.68
No. Observations:,4,AIC:,-201.4
Df Residuals:,0,BIC:,-203.8
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2261.0000,inf,0,,,
lsv,1643.0000,inf,0,,,
d54,197.0000,inf,0,,,
lsv*d54,-1554.0000,inf,-0,,,

0,1,2,3
Omnibus:,,Durbin-Watson:,0.857
Prob(Omnibus):,,Jarque-Bera (JB):,0.348
Skew:,-0.435,Prob(JB):,0.84
Kurtosis:,1.846,Cond. No.,6.85


In [30]:
JS_df = pd.DataFrame({
    "y" : sv1849+sv1854+lsv1849+lsv1854,
    "area" : [f"Area{i+1}" for i in range(len(sv1849))]*2 + [f"Area{len(sv1849)+i+1}" for i in range(len(lsv1849))]*2,
    "lsv" : [0]*len(sv1849)*2 + [1]*len(lsv1849)*2,
    "d54" : [0]*len(sv1849) + [1]*len(sv1854) + [0]*len(lsv1849) + [1]*len(lsv1854),
    "lsv*d54":[0]*len(sv1849+sv1854+lsv1849) + [1]*len(lsv1854)
})
JS_df = pd.concat([JS_df, pd.get_dummies(JS_df["area"], drop_first=True)], axis=1)
JS_df.head()

Unnamed: 0,y,area,lsv,d54,lsv*d54,Area10,Area11,Area12,Area13,Area14,...,Area26,Area27,Area28,Area3,Area4,Area5,Area6,Area7,Area8,Area9
0,283,Area1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,157,Area2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,192,Area3,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,249,Area4,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,259,Area5,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0


In [34]:
y = JS_df[["y"]]
X = JS_df.drop(["y", "area"], axis=1)
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.845
Model:,OLS,Adj. R-squared:,0.672
Method:,Least Squares,F-statistic:,4.881
Date:,"Mon, 15 Aug 2022",Prob (F-statistic):,5.45e-05
Time:,19:02:36,Log-Likelihood:,-289.21
No. Observations:,56,AIC:,638.4
Df Residuals:,26,BIC:,699.2
Df Model:,29,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,318.7917,45.727,6.972,0.000,224.799,412.785
lsv,-70.3922,45.452,-1.549,0.134,-163.821,23.036
d54,16.4167,25.365,0.647,0.523,-35.721,68.555
lsv*d54,-101.2292,33.554,-3.017,0.006,-170.201,-32.257
Area10,-318.5000,62.131,-5.126,0.000,-446.211,-190.789
Area11,-89.5000,62.131,-1.441,0.162,-217.211,38.211
Area12,-194.0000,62.131,-3.122,0.004,-321.711,-66.289
Area13,-21.4933,42.633,-0.504,0.618,-109.126,66.140
Area14,14.5067,42.633,0.340,0.736,-73.126,102.140

0,1,2,3
Omnibus:,6.39,Durbin-Watson:,2.028
Prob(Omnibus):,0.041,Jarque-Bera (JB):,10.433
Skew:,-0.0,Prob(JB):,0.00543
Kurtosis:,5.115,Cond. No.,3.46e+16


In [40]:
# 目的変数の対数を取り、係数に何%減少したかの解釈を与える
y = JS_df[["y"]]
y = np.log(y)
X = JS_df.drop(["y", "area"], axis=1)
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.919
Model:,OLS,Adj. R-squared:,0.83
Method:,Least Squares,F-statistic:,10.23
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,3.07e-08
Time:,22:01:46,Log-Likelihood:,4.8197
No. Observations:,56,AIC:,50.36
Df Residuals:,26,BIC:,111.1
Df Model:,29,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,5.7439,0.240,23.952,0.000,5.251,6.237
lsv,-0.3337,0.238,-1.400,0.173,-0.824,0.156
d54,0.0739,0.133,0.556,0.583,-0.200,0.347
lsv*d54,-0.5661,0.176,-3.217,0.003,-0.928,-0.204
Area10,-3.6425,0.326,-11.179,0.000,-4.312,-2.973
Area11,-0.3107,0.326,-0.954,0.349,-0.980,0.359
Area12,-0.9404,0.326,-2.886,0.008,-1.610,-0.271
Area13,-0.0278,0.224,-0.124,0.902,-0.487,0.432
Area14,0.2091,0.224,0.935,0.358,-0.250,0.669

0,1,2,3
Omnibus:,0.064,Durbin-Watson:,1.761
Prob(Omnibus):,0.969,Jarque-Bera (JB):,0.017
Skew:,0.0,Prob(JB):,0.992
Kurtosis:,2.915,Cond. No.,3.46e+16
