In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.model_selection import cross_val_score

warnings.filterwarnings('ignore')
plt.style.use('seaborn')
# plt.style.use('default')
plt.rc('font', family = 'Malgun Gothic') # 한글 입력
plt.rcParams['axes.unicode_minus'] = False # 글꼴 변경시 마이너스 표기 가능으로 처리

In [4]:
from sklearn.datasets import load_boston

boston = load_boston()
boston_df = pd.DataFrame(boston.data, columns = boston.feature_names)
boston_df['PRICE'] = boston.target

X = boston_df[['RM', 'ZN', 'INDUS', 'NOX', 'AGE', 'PTRATIO', 'LSTAT', 'RAD']]
y = boston_df['PRICE']

##### [과제] 모델이름과 alpha 리스트를 대입하면 각 alpha별로 rmse를 구해주는 사용자 함수를 작성하여 아래를 수행하세요
---
- lasso : [0.07, 0.1, 0.5, 1.3]
- elasticnet : [0.07, 0.1, 0.5, 1.3]

In [124]:
def get_linear_reg_eval(model_name, params, X_data_n, y_target_n, verbose = True, return_coeff = True):
    coeff_df = pd.DataFrame()
    if verbose == True: print('####', model_name, '#####')
    for param in params:
        if model_name == 'Ridge': model = Ridge(alpha = param)
        elif model_name == 'Lasso': model = Lasso(alpha = param)
        elif model_name == 'ElasticNet': model = ElasticNet(alpha = param, l1_ratio = 0.7)
        neg_mse_scores = cross_val_score(model, X_data_n, y_target_n, scoring = 'neg_mean_squared_error', cv = 5)
        avg_rmse = np.mean(np.sqrt(-1 * neg_mse_scores))
        print(f'alpha{param}일 때 5폴드 세트의 평균 RMSE : {round(avg_rmse, 3)}')
        
        model.fit(X_data_n, y_target_n)
        if return_coeff == True:
            coeff = pd.Series(data = model.coef_, index = X_data_n.columns)
            colname = f'alpha: {param}'
            coeff_df[colname] = coeff
    return coeff_df

In [125]:
lasso_alpha = [0.07, 0.1, 0.5, 1.3]
get_linear_reg_eval('Lasso', lasso_alpha, X, y)

#### Lasso #####
alpha0.07일 때 5폴드 세트의 평균 RMSE : 5.998
alpha0.1일 때 5폴드 세트의 평균 RMSE : 5.999
alpha0.5일 때 5폴드 세트의 평균 RMSE : 6.054
alpha1.3일 때 5폴드 세트의 평균 RMSE : 6.062


Unnamed: 0,alpha: 0.07,alpha: 0.1,alpha: 0.5,alpha: 1.3
RM,4.086844,3.991926,2.731116,0.198991
ZN,-0.000613,-0.0,0.006849,0.021448
INDUS,-0.038434,-0.038484,-0.040171,-0.044186
NOX,-0.0,-0.0,-0.0,-0.0
AGE,0.020217,0.020774,0.02773,0.041949
PTRATIO,-0.945481,-0.941245,-0.888191,-0.782938
LSTAT,-0.629861,-0.635686,-0.712994,-0.868743
RAD,0.014574,0.014129,0.008604,0.0


In [127]:
elasticnet_alpha = [0.07, 0.1, 0.5, 1.3]
get_linear_reg_eval('ElasticNet', elasticnet_alpha, X, y)

#### ElasticNet #####
alpha0.07일 때 5폴드 세트의 평균 RMSE : 5.955
alpha0.1일 때 5폴드 세트의 평균 RMSE : 5.94
alpha0.5일 때 5폴드 세트의 평균 RMSE : 5.896
alpha1.3일 때 5폴드 세트의 평균 RMSE : 5.971


Unnamed: 0,alpha: 0.07,alpha: 0.1,alpha: 0.5,alpha: 1.3
RM,3.855576,3.682434,2.082871,0.640414
ZN,-2.7e-05,0.000466,0.008822,0.019535
INDUS,-0.043734,-0.046001,-0.060007,-0.058207
NOX,-0.0,-0.0,-0.0,-0.0
AGE,0.021983,0.023046,0.033367,0.041022
PTRATIO,-0.95858,-0.959653,-0.927169,-0.785582
LSTAT,-0.645507,-0.656602,-0.757044,-0.838687
RAD,0.019085,0.020351,0.02436,0.005735
