In [20]:
from sklearn.linear_model import Lasso, ElasticNet,Ridge
from sklearn.model_selection import cross_val_score
def get_linear_reg_eval(model_name,params=None, X_data_n = None, y_target_n = None,verbose=True):
    coeff_df = pd.DataFrame()
    if verbose : print(f"##### {model_name} #####")
    for param in params:
        if model_name == "Ridge": model = Ridge(alpha = param)
        elif model_name == "Lasso": model = Lasso(alpha = param)
        elif model_name == "ElasicNet": model = ElasticNet(alpha=param,l1_ratio= 0.7)
        neg_mse_scores = cross_val_score(model,X_data_n,y_target_n,scoring="neg_mean_squared_error",cv=5)
        avg_rmse = np.mean(np.sqrt(-1*neg_mse_scores))
        print(f"alpha : {param}, cv : 5, 평균 RMSE : {avg_rmse}")
        model.fit(X_data,y_target)
        coeff=pd.Series(data = model.coef_,index = X_data.columns)
        colname = 'alpha:' + str(param)
        coeff_df[colname] = coeff
    return coeff_df

    

In [21]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import stats
from sklearn.datasets import load_boston
%matplotlib inline

# boston 데이타셋 로드
boston = load_boston()
boston_df = pd.DataFrame(boston.data,columns=boston.feature_names)

boston_df["PRICE"] = boston.target
boston_df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [22]:
from sklearn.model_selection import train_test_split

y_target = boston_df["PRICE"]
X_data = boston_df.iloc[:,:-1]
X_train,X_test,y_train,y_test = train_test_split(X_data,y_target,test_size=0.3,random_state=156)

In [23]:
alphas = [0.07,0.1,0.5,1,3]
coeff_lasso_df = get_linear_reg_eval("Lasso",alphas,X_data,y_target)

##### Lasso #####
alpha : 0.07, cv : 5, 평균 RMSE : 5.612284267526675
alpha : 0.1, cv : 5, 평균 RMSE : 5.615116035266936
alpha : 0.5, cv : 5, 평균 RMSE : 5.669123409594897
alpha : 1, cv : 5, 평균 RMSE : 5.776020813823375
alpha : 3, cv : 5, 평균 RMSE : 6.1887632108009045


In [24]:
sort_column = 'alpha:'+str(alphas[0])
coeff_lasso_df.sort_values(by=sort_column,ascending=False)

Unnamed: 0,alpha:0.07,alpha:0.1,alpha:0.5,alpha:1,alpha:3
RM,3.789725,3.703202,2.498212,0.949811,0.0
CHAS,1.434343,0.95519,0.0,0.0,0.0
RAD,0.270936,0.274707,0.277451,0.264206,0.061864
ZN,0.049059,0.049211,0.049544,0.049165,0.037231
B,0.010248,0.010249,0.009469,0.008247,0.00651
NOX,-0.0,-0.0,-0.0,-0.0,0.0
AGE,-0.011706,-0.010037,0.003604,0.02091,0.042495
TAX,-0.01429,-0.01457,-0.015442,-0.015212,-0.008602
INDUS,-0.04212,-0.036619,-0.005253,-0.0,-0.0
CRIM,-0.098193,-0.097894,-0.083289,-0.063437,-0.0


In [25]:
alphas = [0.07,0.1,0.5,1,3]
coeff_elastic_df = get_linear_reg_eval("ElasicNet",alphas,X_data,y_target) 

##### ElasicNet #####
alpha : 0.07, cv : 5, 평균 RMSE : 5.541654347348141
alpha : 0.1, cv : 5, 평균 RMSE : 5.52592849629491
alpha : 0.5, cv : 5, 평균 RMSE : 5.466748649445586
alpha : 1, cv : 5, 평균 RMSE : 5.596874445109748
alpha : 3, cv : 5, 평균 RMSE : 6.068121638621163


In [26]:
sort_column = 'alpha:'+str(alphas[0])
coeff_elastic_df.sort_values(by=sort_column,ascending=False)

Unnamed: 0,alpha:0.07,alpha:0.1,alpha:0.5,alpha:1,alpha:3
RM,3.574162,3.414154,1.918419,0.938789,0.0
CHAS,1.330724,0.979706,0.0,0.0,0.0
RAD,0.27888,0.283443,0.300761,0.289299,0.146846
ZN,0.050107,0.050617,0.052878,0.052136,0.038268
B,0.010122,0.010067,0.009114,0.00832,0.00702
AGE,-0.010116,-0.008276,0.00776,0.020348,0.043446
TAX,-0.014522,-0.014814,-0.016046,-0.016218,-0.011417
INDUS,-0.044855,-0.042719,-0.023252,-0.0,-0.0
CRIM,-0.099468,-0.099213,-0.08907,-0.073577,-0.019058
NOX,-0.175072,-0.0,-0.0,-0.0,-0.0
