In [1]:
import random
import pandas as pd
from statsmodels.formula.api import ols

# 模拟数据

In [2]:
def generate_sample_data(n:int, p_c:float, delta:float) -> pd.DataFrame:
    random.seed(20200820)
    p_t = p_c + delta
    control_group = pd.DataFrame({'d':[0]*n,
                                  'y':[int(random.random()<=p_c) for i in range(n)]})
    treatment_group = pd.DataFrame({'d':[1]*n, 
                                     'y':[int(random.random()<=p_t) for i in range(n)]})

    return pd.concat([control_group, treatment_group])

# 回归分析

In [3]:
ols(data=generate_sample_data(1000,0.1,0.05), formula='y~d').fit().summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,9.927
Date:,"Fri, 21 Aug 2020",Prob (F-statistic):,0.00165
Time:,20:26:19,Log-Likelihood:,-509.11
No. Observations:,2000,AIC:,1022.0
Df Residuals:,1998,BIC:,1033.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0880,0.010,8.911,0.000,0.069,0.107
d,0.0440,0.014,3.151,0.002,0.017,0.071

0,1,2,3
Omnibus:,948.288,Durbin-Watson:,2.012
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3491.207
Skew:,2.474,Prob(JB):,0.0
Kurtosis:,7.172,Cond. No.,2.62


In [4]:
# 给定期望收益，样本太小不显著
ols(data=generate_sample_data(100,0.1,0.05), formula='y~d').fit().summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.003
Model:,OLS,Adj. R-squared:,-0.002
Method:,Least Squares,F-statistic:,0.5745
Date:,"Fri, 21 Aug 2020",Prob (F-statistic):,0.449
Time:,20:26:19,Log-Likelihood:,-28.104
No. Observations:,200,AIC:,60.21
Df Residuals:,198,BIC:,66.81
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0700,0.028,2.501,0.013,0.015,0.125
d,0.0300,0.040,0.758,0.449,-0.048,0.108

0,1,2,3
Omnibus:,137.951,Durbin-Watson:,1.934
Prob(Omnibus):,0.0,Jarque-Bera (JB):,680.012
Skew:,2.963,Prob(JB):,2.17e-148
Kurtosis:,9.818,Cond. No.,2.62


In [5]:
# 给定样本大小，希望估计的预期收益变小，不显著
ols(data=generate_sample_data(1000,0.1,0.01), formula='y~d').fit().summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,0.5923
Date:,"Fri, 21 Aug 2020",Prob (F-statistic):,0.442
Time:,20:26:19,Log-Likelihood:,-364.81
No. Observations:,2000,AIC:,733.6
Df Residuals:,1998,BIC:,744.8
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0880,0.009,9.578,0.000,0.070,0.106
d,0.0100,0.013,0.770,0.442,-0.015,0.035

0,1,2,3
Omnibus:,1108.329,Durbin-Watson:,2.004
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5469.661
Skew:,2.801,Prob(JB):,0.0
Kurtosis:,8.852,Cond. No.,2.62
