In [None]:
# Ridge Regression
    # overfitting 방지
    # 다중공선성 방지
    # scaling 필수
    # -- parameters --
        # alpha : L2-norm penalty term
        # fit_intercept : centering to zero
        # max_iter : maximum number of iteration

# package
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV
from sklearn.metrics import mean_squared_error

# option 1 : for loop
penalty = [.00001, .00005 ,.0001, .001, .01, .1, .3, .5, .6, .7, .9, 1, 10]

for a in penalty:
    model = Ridge(alpha=a).fit(X_scal.iloc[train_idx], Y.iloc[train_idx])
    score = model.score(X_scal.iloc[valid_idx], Y.iloc[valid_idx])
    pred_y = model.predict(X_scal.iloc[valid_idx])
    mse = mean_squared_error(Y.iloc[valid_idx], pred_y)
    print("Alpha : {:.5f} --> R2 : {:.7f} / MSE : {:.7f} /RMSE : {:.7f}\n".format(a, score, mse, np.sqrt(mse)))

# option2 : GridSearchCV
ridge_cv = RidgeCV(alphas=penalty, cv=5)
model = ridge_cv.fit(X_scal.iloc[train_idx], Y.iloc[train_idx])
print('best alpha : {:.5f}, R2 : {:.4f}'.format(model.alpha_, model.best_score_))

In [None]:
# Lasso Regression
    # scaling 필수
    # feature selection 가능
    # 다중공선성이 있는 데이터에 대해서는 좋은 성능 X
    # -- parameters --
        # alpha : L1-norm penlty term
        # fit_intercept : centering to zero
        # max_iter : maximum number of iteration

# package
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import Lasso, LassoCV
from sklearn.metrics import mean_squared_error

# option 1 : for loop
penalty = [.00001, .00005 ,.0001, .001, .01, .1, .3, .5, .6, .7, .9, 1, 10]

for a in penalty:
    model = Lasso(alpha=a).fit(X_scal.iloc[train_idx], Y.iloc[train_idx])
    score = model.score(X_scal.iloc[valid_idx], Y.iloc[valid_idx])
    pred_y = model.predict(X_scal.iloc[valid_idx])
    mse = mean_squared_error(Y.iloc[valid_idx], pred_y)
    print("Alpha : {:.5f} --> R2 : {:.7f} / MSE : {:.7f} /RMSE : {:.7f}\n".format(a, score, mse, np.sqrt(mse)))

# option2 : GridSearchCV
ridge_cv = LassoCV(alphas=penalty, cv=5)
model = ridge_cv.fit(X_scal.iloc[train_idx], Y.iloc[train_idx])
print('best alpha : {:.5f}, R2 : {:.4f}'.format(model.alpha_, model.best_score_))

In [None]:
# ElasticNet
    # scaling 필수
    # L2-norm (Ridge)와 L1-norm (Lasso)을 섞어놓음
    # -- parameters --
        # alpha : L2-norm penalty term -> 0 : Linear Regression
        # l1_ratio : L1-norm penalty term -> 0 <= l1_ratio <= 1, 1 : Ridge Regression
        # fit_intercept : centering to zero
        # max_iter : maximum number of iteration

# package
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

# option 1 : for loop

# alpha
# close to 0 -> linear regression
alphas = [.000001, .000005 ,.00001, .00005, .0001, .001, .005, .01, .05]

# l1_ratio
# close to 1 -> Lasso, close to 0 -> Ridge
l1_ratio = [.9, .7, .5, .3, .1]

for a in alphas:
    for b in l1_ratio:
        model = ElasticNet(alpha=a, l1_ratio=b).fit(X_scal.iloc[train_idx], Y.iloc[train_idx])
        score = model.score(X_scal.iloc[valid_idx], Y.iloc[valid_idx])
        pred_y = model.predict(X_scal.iloc[valid_idx])
        mse = mean_squared_error(Y.iloc[valid_idx], pred_y)
        print("Alpha : {:.7f}, lr_ratio : {:.7f} --> R2 : {:.7f} / MSE : {:.7f} /RMSE : {:.7f}".format(a, b, score, mse, np.sqrt(mse)))

# option 2 : GridSearchCV

# dictionary for grid
grid = dict()
grid['alpha'] = alphas
grid['l1_ratio'] = l1_ratio

# model and grid search preparation
model = ElasticNet()
search = GridSearchCV(model, grid, scoring='neg_root_mean_squared_error', cv=5, n_jobs=-1)
results = search.fit(X_scal.iloc[valid_idx], Y.loc[valid_idx])

# summarize
print('RMSE : {:.4f}'.format(results.best_score_))
print('Config : {}'.format(results.best_params_))
