## 회귀 (Regression)

##### 베이스라인 모델 - 선형 회귀

In [None]:
# 선형 회귀 모형
from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
lr=LinearRegression()
lr.fit(x_train,y_train)

print('회귀계수(기울기):',np.round(lr.coef_,1))
print('상수항(절편):',np.round(lr.intercept_,1))

In [None]:
# 예측
y_test_pred=lr.predict(x_test)

# 예측값, 실제값의 분포
plt.figure(figsize(10,5))
plt.scatter(x_test['LSTAT'],y_test,label='y_test')
plt.scatter(x_test['LSTAT'],y_test_pred,c='r',label='y_pred')
plt.legend(loc='best')
plt.show()

##### 모델 성능 평가

In [None]:
# 평가
from sklearn.metrics import mean_squared_error
y_train_pred=lr.predict(x_train)

train_mse=mean_squared_error(y_train,y_train_pred)
print('Train MSE:%.4f' % train_mse)

test_mse=mean_squared_error(y_test,y_test_pred)
print('Test MSE:%.4f' % test_mse)

In [None]:
# cross_val_score 함수
from sklearn.model_selection import cross_val_score
lr=LinearRegression()
mse_scores=-1*cross_val_score(lr,x_train,y_train,cv=5,
                              scoring='neg_mean_squared_error')
print('개별 Fold의 MSE:',np.round(mse_scores,4))
print('평균 MSE:%.4f' % np.mean(mse_scores))

##### 과대적합 회피(L2/L1 규제)

In [None]:
# 2차 다항식 변환
from sklearn.preprocessing import PolynomialFeatures
pf=PolynomialFeatures(degree=2)
x_train_poly=pf.fit_transform(x_train)
print('원본 학습 데이터셋:',x_train.shape)
print('2차 다항식 변환 데이터셋:',x_train_poly.shape)

In [None]:
# 2차 다항식 변환 데이터셋으로 선형 회귀 모형 학습
lr=LinearRegression()
lr.fit(x_train_poly,y_train)

# 테스트 데이터에 대한 예측 및 평가
y_train_pred=lr.predict(x_train_poly)
train_mse=mean_squared_error(y_train,y_train_pred)
print('Train MSE:%.4f' % train_mse)

x_test_poly=pf.fit_transform(x_test)
y_test_pred=lr.predict(x_test_poly)
test_mse=mean_squared_error(y_test,y_test_pred)
print('Test MSE:%.4f' % test_mse)

In [None]:
# 15차 다항식 변환 데이터셋으로 선형 회귀 모형 학습
pf=PolynomialFeatures(degree=15)
x_train_poly=pf.fit_transform(x_train)

lr=LinearRegression()
lr.fit(x_train_poly,y_train)

# 테스트 데이터에 대한 예측 및 평가
y_train_pred=lr.predict(x_train_poly)
train_mse=mean_squared_error(y_train,y_train_pred)
print('Train MSE:%.4f' % train_mse)

x_test_poly=pf.fit_transform(x_test)
y_test_pred=lr.predict(x_test_poly)
test_mse=mean_squared_error(y_test,y_test_pred)
print('Test MSE:%.4f' % test_mse)

In [None]:
# 다항식 차수에 따른 모델 적합도 변화
plt.figure(figsize=(15,5))
for n, deg in enumerate([1,2,15]):
    axl=plt.subplot(1,3,n+1)
    # plt.axis('off')
    # degree별 다항 회귀 모형 적용
    pf=PolynomialFeatures(degree=deg)
    x_train_poly=pf.fit_transform(x_train.loc[:,['LSTAT']])
    x_test_poly=pf.fit_transform(x_test.loc[:,['LSTAT']])
    lr=LinearRegression()
    lr.fit(x_train_poly,y_train)
    y_test_pred=lr.predict(x_test_poly
    
    # 실제값 분포
    plt.scatter(x_test.loc[:,['LSTAT']],y_test,label='Targets')
    
    # 예측값 분포
    plt.scatter(x_test.loc[:,['LSTAT']],y_test_pred,label='Predictions')
    
    # 제목 표시
    plt.title('Degree %d' % deg)
    
    # 범례 표시
    plt.legend()
plt.show()

##### Ridge (L2 규제)

In [None]:
from sklearn.linear_model import Ridge
rdg=Ridge(alpha=2.5)
rdg.fit(x_train_poly,y_train)

y_train_pred=rdg.predict(x_train_poly)
train_mse=mean_squared_error(y_train,y_train_pred)
print('Train MSE:%.4f' % train_mse)
y_test_pred=rdg.predict(x_test_poly)
test_mse=mean_squared_error(y_test,y_test_pred)
print('Test MSE:%.4f' % test_mse)

##### Lasso (L1 규제)

In [None]:
from sklearn.linear_model import Lasso
las=Lasso(alpha=0.05)
las.fit(x_train_poly,y_train)

y_train_pred=las.predict(x_train_poly)
train_mse=mean_squared_error(y_train,y_train_pred)
print('Train MSE:%.4f' % train_mse)
y_test_pred=las.predict(x_test_poly)
test_mse=mean_squared_error(y_test,y_test_pred)
print('Test MSE:%.4f' % test_mse)

In [None]:
# ElasticNet(L2/L1 규제)
from sklearn.linear_model import ElasticNet
ela=ElasticNet(alpha=0.01,l1_ratio=0.7)
ela.fit(x_train_poly,y_train)

y_train_pred=ela.predict(x_train_poly)
train_mse=mean_squared_error(y_train,y_train_pred)
print('Train MSE:%.4f' % train_mse)
y_test_pred=ela.predict(x_test_poly)
test_mse=mean_squared_error(y_test,y_test_pred)
print('Test MSE:%.4f' % test_mse)