In [10]:
from sklearn.preprocessing import StandardScaler,MinMaxScaler,PolynomialFeatures
import numpy as np
def get_scaled_data(method='None',p_degree= None,input_data=None):
    if method=="Standard":
        scaled_data = StandardScaler().fit_transform(input_data)
    elif method == "MinMax":
        scaled_data = MinMaxScaler().fit_transform(input_data)
    elif method == "Log":
        scaled_data = np.log1p(input_data)
    else:
        scaled_data = input_data
    
    if p_degree != None:
        scaled_data = PolynomialFeatures(degree = p_degree,include_bias=False).fit_transform(scaled_data)
    
    return scaled_data

In [11]:
from sklearn.linear_model import Lasso, ElasticNet,Ridge
from sklearn.model_selection import cross_val_score
def get_linear_reg_eval(model_name,params=None, X_data_n = None, y_target_n = None,verbose=True):
    coeff_df = pd.DataFrame()
    if verbose : print(f"##### {model_name} #####")
    for param in params:
        if model_name == "Ridge": model = Ridge(alpha = param)
        elif model_name == "Lasso": model = Lasso(alpha = param)
        elif model_name == "ElasicNet": model = ElasticNet(alpha=param,l1_ratio= 0.7)
        neg_mse_scores = cross_val_score(model,X_data_n,y_target_n,scoring="neg_mean_squared_error",cv=5)
        avg_rmse = np.mean(np.sqrt(-1*neg_mse_scores))
        print(f"alpha : {param}, cv : 5, 평균 RMSE : {avg_rmse}")
        model.fit(X_data,y_target)
        coeff=pd.Series(data = model.coef_,index = X_data.columns)
        colname = 'alpha:' + str(param)
        coeff_df[colname] = coeff
    return coeff_df

    

In [12]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import stats
from sklearn.datasets import load_boston
%matplotlib inline

# boston 데이타셋 로드
boston = load_boston()
boston_df = pd.DataFrame(boston.data,columns=boston.feature_names)

boston_df["PRICE"] = boston.target
boston_df.head()
from sklearn.model_selection import train_test_split 

y_target = boston_df["PRICE"]
X_data = boston_df.iloc[:,:-1]
X_train,X_test,y_train,y_test = train_test_split(X_data,y_target,test_size=0.3,random_state=156)


In [14]:
scale_methods=[(None,None),("Standard",None),("Standard",2),("MinMax",None),("MinMax",2),("Log",None)]
alphas = [0.07,0.1,0.5,1,3]
for scale_method in scale_methods:
    X_data_scaled = get_scaled_data(scale_method[0],scale_method[1],X_data)
    get_linear_reg_eval("Ridge",alphas,X_data_scaled,y_target,False)

alpha : 0.07, cv : 5, 평균 RMSE : 5.799313946209059
alpha : 0.1, cv : 5, 평균 RMSE : 5.788486627032412
alpha : 0.5, cv : 5, 평균 RMSE : 5.700420357493769
alpha : 1, cv : 5, 평균 RMSE : 5.6525709656135446
alpha : 3, cv : 5, 평균 RMSE : 5.588796474090638
alpha : 0.07, cv : 5, 평균 RMSE : 5.826790492164019
alpha : 0.1, cv : 5, 평균 RMSE : 5.825992799389448
alpha : 0.5, cv : 5, 평균 RMSE : 5.815529963369995
alpha : 1, cv : 5, 평균 RMSE : 5.802889517257744
alpha : 3, cv : 5, 평균 RMSE : 5.756736133142123
alpha : 0.07, cv : 5, 평균 RMSE : 9.011156936594821
alpha : 0.1, cv : 5, 평균 RMSE : 8.827235873000875
alpha : 0.5, cv : 5, 평균 RMSE : 7.507289665421058
alpha : 1, cv : 5, 평균 RMSE : 6.871287525964218
alpha : 3, cv : 5, 평균 RMSE : 6.162714978605982
alpha : 0.07, cv : 5, 평균 RMSE : 5.781974139879262
alpha : 0.1, cv : 5, 평균 RMSE : 5.763570225288312
alpha : 0.5, cv : 5, 평균 RMSE : 5.584502575163953
alpha : 1, cv : 5, 평균 RMSE : 5.465045081564942
alpha : 3, cv : 5, 평균 RMSE : 5.375178904313875
alpha : 0.07, cv : 5, 평균 RMSE :