In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures, StandardScaler, MinMaxScaler

def get_scaled_data(method='None', p_degree=None, input_data=None):
    if method == 'Standard':
        scaled_data = StandardScaler().fit_transform(input_data)
    elif method == 'MinMax':
        scaled_data = MinMaxScaler().fit_transform(input_data)
    elif method == 'Log':
        scaled_data = np.log1p(input_data)
    else:
        scaled_data = input_data
        
    if p_degree != None:
        scaled_data = PolynomialFeatures(degree=p_degree, include_bias=False).fit_transform(scaled_data)
        
    return scaled_data


def get_linear_reg_eval(model_name, params=None, X_data_n=None, y_target_n=None, verbose=True):
    coeff_df = pd.DataFrame()
    if verbose : print('#### ', model_name, ' ####')
    for param in params:
        if model_name == 'Ridge' : model = Ridge(alpha=param)
        elif model_name == 'Lasso' : model = Lasso(alpha=param)
        elif model_name == 'ElasticNet' : model = ElasticNet(alpha=param, l1_ratio=0.7)
        
        neg_mse_scores = cross_val_score(model, X_data_n, y_target_n, scoring='neg_mean_squared_error', cv=5)
        avg_rmse = np.mean(np.sqrt(-1*neg_mse_scores))
        
        print('alpha = {0} , Average RMSE of 5 fold set: {1:.3f}'.format(param, avg_rmse))
        
        # cross_val_score 는 evaluation_metric만 반환하므로 Model을 다시 학습하여 회귀계수 추출
        model.fit(X_data, y_target)
        
        coeff = pd.Series(data=model.coef_, index=X_data.columns)
        col_name = 'alpha:'+str(param)
        coeff_df[col_name] = coeff
    
    return coeff_df

In [10]:
boston = load_boston()
boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)
y_target = boston.target
X_data = boston_df

alpha = [0.1, 1, 10, 100]
scale_methods = [(None, None), ('Standard', None), ('Standard', 2), ('MinMax', None), ('MinMax', 2), ('Log', None)]
for scale_method in scale_methods:
    X_data_scaled = get_scaled_data(method=scale_method[0], p_degree=scale_method[1], input_data=X_data)
    
    print('\n## Type: {0}, Polynomial Degree: {1}'.format(scale_method[0], scale_method[1]))
    get_linear_reg_eval('Ridge', params=alpha, X_data_n=X_data_scaled, y_target_n=y_target, verbose=False)


## Type: None, Polynomial Degree: None
alpha = 0.1 , Average RMSE of 5 fold set: 5.788
alpha = 1 , Average RMSE of 5 fold set: 5.653
alpha = 10 , Average RMSE of 5 fold set: 5.518
alpha = 100 , Average RMSE of 5 fold set: 5.330

## Type: Standard, Polynomial Degree: None
alpha = 0.1 , Average RMSE of 5 fold set: 5.826
alpha = 1 , Average RMSE of 5 fold set: 5.803
alpha = 10 , Average RMSE of 5 fold set: 5.637
alpha = 100 , Average RMSE of 5 fold set: 5.421

## Type: Standard, Polynomial Degree: 2
alpha = 0.1 , Average RMSE of 5 fold set: 8.827
alpha = 1 , Average RMSE of 5 fold set: 6.871
alpha = 10 , Average RMSE of 5 fold set: 5.485
alpha = 100 , Average RMSE of 5 fold set: 4.634

## Type: MinMax, Polynomial Degree: None
alpha = 0.1 , Average RMSE of 5 fold set: 5.764
alpha = 1 , Average RMSE of 5 fold set: 5.465
alpha = 10 , Average RMSE of 5 fold set: 5.754
alpha = 100 , Average RMSE of 5 fold set: 7.635

## Type: MinMax, Polynomial Degree: 2
alpha = 0.1 , Average RMSE of 5 fold s