# 회귀분석

## 1. 다중선형회귀
![image.png](attachment:image.png)

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
np.random.seed(2)
u = np.random.rand(10) * 11        # 0 ~ 11
v = (np.random.rand(10) * 11) + 11 # 11 ~ 22
w = (np.random.rand(10) * 29) + 1  # 1 ~ 30
y = 3 + 0.1*u + 2*v - 3*w + np.random.rand(10) * 0.1

data = {'y':y, 'u':u, 'v':v, 'w':w}
df = pd.DataFrame(data)
df

Unnamed: 0,y,u,v,w
0,-7.765092,4.795944,17.832472,15.652137
1,28.009893,0.285189,16.820563,2.893309
2,-11.617214,6.046287,12.480379,13.415548
3,25.427691,4.788546,16.649359,3.799397
4,15.507688,4.624046,13.028839,4.687639
5,-12.237411,3.633683,19.638687,18.305614
6,21.42889,2.251135,20.393728,7.554348
7,24.308134,6.811981,16.436605,4.101425
8,21.803563,3.296201,20.312176,7.38888
9,-6.319101,2.9351,11.8761,11.144962


In [5]:
# 상수항 추가
X = sm.add_constant(df[['u','v','w']])

# OLS 검정
multi_model = sm.OLS(df[['y']], X)
fitted_multi_model = multi_model.fit()
fitted_multi_model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,1.0
Model:,OLS,Adj. R-squared:,1.0
Method:,Least Squares,F-statistic:,1780000.0
Date:,"Sun, 27 Dec 2020",Prob (F-statistic):,3.1e-18
Time:,15:45:43,Log-Likelihood:,26.482
No. Observations:,10,AIC:,-44.96
Df Residuals:,6,BIC:,-43.75
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.0694,0.048,63.601,0.000,2.951,3.187
u,0.1023,0.004,24.656,0.000,0.092,0.112
v,1.9980,0.002,815.885,0.000,1.992,2.004
w,-2.9995,0.001,-2170.504,0.000,-3.003,-2.996

0,1,2,3
Omnibus:,7.802,Durbin-Watson:,2.221
Prob(Omnibus):,0.02,Jarque-Bera (JB):,3.012
Skew:,1.194,Prob(JB):,0.222
Kurtosis:,4.237,Cond. No.,135.0


## 2. 식이요법을 적용한 닭 데이터
![image.png](attachment:image.png)

In [6]:
ChickWeight = pd.read_csv('ChickWeight.csv')
ChickWeight.head()

Unnamed: 0,weight,Time,Chick,Diet
0,42,0,1,1
1,51,2,1,1
2,59,4,1,1
3,64,6,1,1
4,76,8,1,1


In [7]:
Chick = ChickWeight[ChickWeight['Chick']==1]

In [8]:
import statsmodels.formula.api as smf

result = smf.ols(formula='weight ~ Time', data=Chick).fit()
result.summary()

  "anyway, n=%i" % int(n))


0,1,2,3
Dep. Variable:,weight,R-squared:,0.959
Model:,OLS,Adj. R-squared:,0.955
Method:,Least Squares,F-statistic:,232.7
Date:,"Sun, 27 Dec 2020",Prob (F-statistic):,2.97e-08
Time:,16:01:20,Log-Likelihood:,-46.039
No. Observations:,12,AIC:,96.08
Df Residuals:,10,BIC:,97.05
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,24.4654,6.728,3.636,0.005,9.475,39.456
Time,7.9879,0.524,15.255,0.000,6.821,9.155

0,1,2,3
Omnibus:,3.84,Durbin-Watson:,0.342
Prob(Omnibus):,0.147,Jarque-Bera (JB):,1.217
Skew:,0.211,Prob(JB):,0.544
Kurtosis:,1.498,Cond. No.,24.5
