### 선형 회귀 분석 실습

In [1]:
# 피쳐, 타겟 데이터 지정
import pandas as pd
data_url = 'https://raw.githubusercontent.com/blackdew/tensorflow1/master/csv/boston.csv'
raw_boston = pd.read_csv(data_url)
X = raw_boston.iloc[:, 0:-1]
y = raw_boston.iloc[:, -1]

In [2]:
# 트레이닝/테스트 데이터 분할
from sklearn.model_selection import train_test_split
X_tn, X_te, y_tn, y_te=train_test_split(X,y,random_state=1)

In [3]:
#데이터 표준화
from sklearn.preprocessing import StandardScaler
std_scale = StandardScaler()
std_scale.fit(X_tn)
X_tn_std = std_scale.transform(X_tn)
X_te_std  = std_scale.transform(X_te)

In [4]:
# 선형 회귀분석 학습
from sklearn.linear_model import LinearRegression
clf_lr =  LinearRegression()
clf_lr.fit(X_tn_std, y_tn)

LinearRegression()

In [5]:
clf_lr.get_params()

{'copy_X': True,
 'fit_intercept': True,
 'n_jobs': None,
 'normalize': 'deprecated',
 'positive': False}

In [6]:
# 선형 회귀분석 모형 추정 계수 확인
print(clf_lr.coef_)
print(clf_lr.intercept_)

[-1.07145146  1.34036243  0.26298069  0.66554537 -2.49842551  1.97524314
  0.19516605 -3.14274974  2.66736136 -1.80685572 -2.13034748  0.56172933
 -4.03223518]
22.344591029023746


In [7]:
# 릿지 회귀분석(L2 제약식 적용)
from sklearn.linear_model import Ridge
clf_ridge = Ridge(alpha=1)
clf_ridge.fit(X_tn_std, y_tn)

Ridge(alpha=1)

In [8]:
clf_ridge.get_params()

{'alpha': 1,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': None,
 'normalize': 'deprecated',
 'positive': False,
 'random_state': None,
 'solver': 'auto',
 'tol': 0.001}

In [9]:
# 릿지 회귀분석 모형 추정 계수 확인
print(clf_ridge.coef_)
print(clf_ridge.intercept_)

[-1.05933451  1.31050717  0.23022789  0.66955241 -2.45607567  1.99086611
  0.18119169 -3.09919804  2.56480813 -1.71116799 -2.12002592  0.56264409
 -4.00942448]
22.344591029023746


In [10]:
# 라쏘 회귀분석(L1 제약식 적용)
from sklearn.linear_model import Lasso
clf_lasso = Lasso(alpha=0.01)
clf_lasso.fit(X_tn_std, y_tn)

Lasso(alpha=0.01)

In [11]:
clf_lasso.get_params()

{'alpha': 0.01,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': 1000,
 'normalize': 'deprecated',
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [12]:
# 라쏘 회귀분석 모형 추정 계수 확인
print(clf_lasso.coef_)
print(clf_lasso.intercept_)

[-1.04326518  1.27752711  0.1674367   0.66758228 -2.41559964  1.99244179
  0.14733958 -3.09473711  2.46431135 -1.60552274 -2.11046422  0.55200229
 -4.00809905]
22.344591029023746


In [13]:
# 엘라스틱넷
from sklearn.linear_model import ElasticNet
clf_elastic = ElasticNet(alpha=0.01, l1_ratio=0.01)
clf_elastic.fit(X_tn_std, y_tn)

ElasticNet(alpha=0.01, l1_ratio=0.01)

In [14]:
clf_elastic.get_params()

{'alpha': 0.01,
 'copy_X': True,
 'fit_intercept': True,
 'l1_ratio': 0.01,
 'max_iter': 1000,
 'normalize': 'deprecated',
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [15]:
# 엘라스틱넷 모형 추정 계수 확인
print(clf_elastic.coef_)
print(clf_elastic.intercept_)

[-1.02916603  1.23681955  0.15236504  0.67859622 -2.34646781  2.02965524
  0.14575132 -2.98592423  2.32013379 -1.48829485 -2.09271972  0.56506801
 -3.9495281 ]
22.344591029023746


In [16]:
# 예측
pred_lr = clf_lr.predict(X_te_std)
pred_ridge = clf_ridge.predict(X_te_std)
pred_lasso = clf_lasso.predict(X_te_std)
pred_elastic = clf_elastic.predict(X_te_std)

In [17]:
# 모형 평가-R제곱값(0과 1 사잇값을 가지며, 높을수록 좋은 성능을 의미)
from sklearn.metrics import r2_score
print(r2_score(y_te, pred_lr))
print(r2_score(y_te, pred_ridge))
print(r2_score(y_te, pred_lasso))
print(r2_score(y_te, pred_elastic))

0.7789410172622857
0.7789704562726606
0.7787621490259894
0.7787876079239252


In [18]:
# 모형 평가-MSE(작을 수록 좋은 성능을 의미)
from sklearn.metrics import mean_squared_error
print(mean_squared_error(y_te, pred_lr))
print(mean_squared_error(y_te, pred_ridge))
print(mean_squared_error(y_te, pred_lasso))
print(mean_squared_error(y_te, pred_elastic))

21.8977653960495
21.894849212618738
21.915483810504835
21.912961890936874
