# Linear Regression - sklearn 패키지

---

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

from matplotlib import rcParams
rcParams['font.family'] = 'NanumGothic'
rcParams['font.size'] = 10

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split


#### 데이터 로드 (보스턴 집값)

In [None]:
from sklearn.datasets import load_boston

In [None]:
boston = load_boston()

In [None]:
boston

#### 데이터 분리: 학습데이터 + 테스트 데이터

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    boston['data'], boston['target'], random_state=123)

In [None]:
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)

In [None]:
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)

#### 모델 생성

In [None]:
model = LinearRegression(fit_intercept=True)

#### 모델 학습

In [None]:
model.fit(X_train, y_train)

#### 회귀식 계수 확인

In [None]:
boston.feature_names

In [None]:
model.coef_

In [None]:
model.intercept_

In [None]:
# R square
model.score(X_train, y_train)

#### 결과값 예측

In [None]:
prediction = model.predict(X_test)
prediction

In [None]:
plt.scatter(y_test, prediction)

plt.xlabel('실제 집값')
plt.ylabel('집값 예측치')
plt.title('집값 예측치와 실제 집값의 관계')
plt.show()

---

# Linear Regression - statsmodels 패키지

In [None]:
#!pip install statsmodels
#!pip install patsy

In [None]:
import statsmodels.api as sm

In [None]:
dfX0 = pd.DataFrame(boston.data, columns=boston.feature_names)
dfX0.head()

#### 상수항 추가

In [None]:
dfX = sm.add_constant(dfX0)
dfX.head()

In [None]:
dfy = pd.DataFrame(boston.target, columns=["MEDV"])
dfy.head()

#### 데이터 분리: 학습데이터 + 테스트 데이터

In [None]:
X_train, X_test, y_train, y_test = train_test_split(dfX, dfy, random_state=123)

In [None]:
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)

In [None]:
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)

#### 모델 생성

In [None]:
model = sm.OLS(y_train, X_train)

#### 모델 학습

In [None]:
result = model.fit()

#### 학습 결과

In [None]:
print(result.summary())

#### 독립변수 계수

In [None]:
result.params

#### 잔차 ( residual )

In [None]:
result.resid

---

In [None]:
# end of file