### ライブラリの読込

In [29]:
import pandas as pd
from scipy import stats

### サンプルデータでお試し

In [30]:
# サンプルデータの生成
import numpy as np

x = np.linspace(-5, 5, 20)
np.random.seed(1)

# normal distributed noise
y = -5 + 3*x + 4 * np.random.normal(size=x.shape)

# Create a data frame containing all the relevant variables
data_test = pd.DataFrame({'x': x, 'y': y})

In [31]:
# R-formatの場合
import statsmodels.formula.api as smf

model = smf.ols('y ~ x', data_test)
result = model.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.804
Model:                            OLS   Adj. R-squared:                  0.794
Method:                 Least Squares   F-statistic:                     74.03
Date:                Sun, 15 Apr 2018   Prob (F-statistic):           8.56e-08
Time:                        15:30:47   Log-Likelihood:                -57.988
No. Observations:                  20   AIC:                             120.0
Df Residuals:                      18   BIC:                             122.0
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -5.5335      1.036     -5.342      0.0

In [32]:
# python-format
import statsmodels.api as sm

model = sm.OLS(y, x)
result = model.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.614
Model:                            OLS   Adj. R-squared:                  0.594
Method:                 Least Squares   F-statistic:                     30.23
Date:                Sun, 15 Apr 2018   Prob (F-statistic):           2.65e-05
Time:                        15:30:48   Log-Likelihood:                -67.486
No. Observations:                  20   AIC:                             137.0
Df Residuals:                      19   BIC:                             138.0
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
x1             2.9369      0.534      5.498      0.0

### brain_sizeデータで試す

In [33]:
data = pd.read_csv('examples/brain_size.csv', sep=';', na_values=".")
data.head(10)

Unnamed: 0.1,Unnamed: 0,Gender,FSIQ,VIQ,PIQ,Weight,Height,MRI_Count
0,1,Female,133,132,124,118.0,64.5,816932
1,2,Male,140,150,124,,72.5,1001121
2,3,Male,139,123,150,143.0,73.3,1038437
3,4,Male,133,129,128,172.0,68.8,965353
4,5,Female,137,132,134,147.0,65.0,951545
5,6,Female,99,90,110,146.0,69.0,928799
6,7,Female,138,136,131,138.0,64.5,991305
7,8,Female,92,90,98,175.0,66.0,854258
8,9,Male,89,93,84,134.0,66.3,904858
9,10,Male,133,114,147,172.0,68.8,955466


In [34]:
model = smf.ols('VIQ ~ Gender + 1', data)
result = model.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                    VIQ   R-squared:                       0.015
Model:                            OLS   Adj. R-squared:                 -0.010
Method:                 Least Squares   F-statistic:                    0.5969
Date:                Sun, 15 Apr 2018   Prob (F-statistic):              0.445
Time:                        15:30:56   Log-Likelihood:                -182.42
No. Observations:                  40   AIC:                             368.8
Df Residuals:                      38   BIC:                             372.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept        109.4500      5.308     20.

In [39]:
data_FSIQ = data['FSIQ']
data_PIQ = data['PIQ']

df_fsiq = pd.DataFrame({'iq': data_FSIQ, 'type': 'fsiq'})
df_piq = pd.DataFrame({'iq': data_PIQ, 'type': 'piq'})
df_long = pd.concat((df_fsiq, df_piq))

model = smf.ols('iq ~ type', df_long)
result.model.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                    VIQ   R-squared:                       0.015
Model:                            OLS   Adj. R-squared:                 -0.010
Method:                 Least Squares   F-statistic:                    0.5969
Date:                Sun, 15 Apr 2018   Prob (F-statistic):              0.445
Time:                        15:41:51   Log-Likelihood:                -182.42
No. Observations:                  40   AIC:                             368.8
Df Residuals:                      38   BIC:                             372.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept        109.4500      5.308     20.

In [41]:
# t検定の結果との比較

stats.ttest_rel(data_FSIQ, data_PIQ)

Ttest_relResult(statistic=1.7842019405859857, pvalue=0.082172638183642358)