In [1]:
import numpy as np
import pandas as pd

# 선형회귀, ridge, lasso 관련 sklearn 라이브러리 불러오기
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import mean_squared_error

# K-Fold 교차검증 관련 라이브러리
from sklearn.model_selection import KFold

In [2]:
# Ridge regression 객체 생성

reg = Ridge(alpha = 0.1)
# alpha : tuning parameter(lambda)

In [4]:
# 훈련

train_data = [[0,0], [0,0], [1,1],[0, .1, 1]]

reg.fit(train_data[:3], train_data[-1])

Ridge(alpha=0.1)

In [7]:
# test data > 예측

pred_test = reg.predict([[0,1]])
pred_test

array([0.51395349])

In [8]:
reg.coef_

array([0.44186047, 0.44186047])

In [14]:
# Ridge regression 객체 생성

reg = Ridge(alpha = 1)
# alpha : tuning parameter(lambda)

# 훈련

train_data = [[0,0], [0,0], [1,1],[0, .1, 1]]

reg.fit(train_data[:3], train_data[-1])

# test data > 예측

pred_test = reg.predict([[0,1]])
print(pred_test)
reg.coef_

# alpha(lambda 조절 변수)값이 커지면 coefficient 값이 작아짐.


[0.45714286]


array([0.27142857, 0.27142857])

In [15]:
# 라쏘 L1 맛보기

reg = Lasso(alpha = 0.1)

In [16]:
# 훈련

train_data = [[0,0], [0,0], [1,1],[0, .1, 1]]

reg.fit(train_data[:3], train_data[-1])

Lasso(alpha=0.1)

In [17]:
# test

pred_test = reg.predict([[0,1]])
pred_test

array([0.2])

In [18]:
reg.coef_

# 두 번째 coefficient값이 0이 되는 것을 확인

array([0.5, 0. ])

In [19]:
reg = Lasso(alpha = 1)
# alpha : tuning parameter(lambda)

# 훈련

train_data = [[0,0], [0,0], [1,1],[0, .1, 1]]

reg.fit(train_data[:3], train_data[-1])

# test data > 예측

pred_test = reg.predict([[0,1]])
print(pred_test)
reg.coef_




[0.36666667]


array([0., 0.])

In [21]:
# advertising 데이터 불러오기

ad = pd.read_csv('./Advertising.csv', index_col = 0)
ad

Unnamed: 0,TV,Radio,Newspaper,Sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9
...,...,...,...,...
196,38.2,3.7,13.8,7.6
197,94.2,4.9,8.1,9.7
198,177.0,9.3,6.4,12.8
199,283.6,42.0,66.2,25.5


In [26]:
# 라쏘 적용 >> 최적의 람다(lambda) 선정

# k-fold CV의 fold 수 지정

n_fold = 5

for t_param in (0.01, 1, 1000) : # lambda 값 설정 : 0.01, 1, 1000
    
    print('Lambda :', t_param)
    
    kf = KFold(n_splits = n_fold)
    
    idx = 1
    
    sum_val_mse = 0
    for train, val in kf.split(ad) :
        print('Fold :', idx)
        
        # training set >> features, response 분리
        X = [['TV', 'Radio', 'Newspaper']]
        y = [['Sales']]
        
        train_X = ad.iloc[train, :-1]
        train_y = ad.iloc[train, -1]
        
        # validation set >> features, response 분리
        
        val_X = ad.iloc[val, :-1]
        val_y = ad.iloc[val, -1]
        
        # Lasso 객체 생성
        regr = Lasso(alpha = t_param)
        
        # train data 활용 model 적합
        regr.fit(train_X, train_y)
        
        # validation data, 예측
        pred_y = regr.predict(val_X)
        
        # coefficients 출력
        print('Coefficients :', regr.coef_)
        
        # validation MSE
        val_mse = mean_squared_error(val_y, pred_y)
              
        # validation MSE 합계
        sum_val_mse += val_mse
        
        print('-'*100)
        
        
        idx += 1       
        
    print('Average Validation MSE : %.3f'%(sum_val_mse / n_fold))
    print('*'*100)

Lambda : 0.01
Fold : 1


Lasso(alpha=0.01)

Coefficients : [0.04585721 0.18786616 0.0035984 ]
----------------------------------------------------------------------------------------------------
Fold : 2


Lasso(alpha=0.01)

Coefficients : [0.04513011 0.18790415 0.00140072]
----------------------------------------------------------------------------------------------------
Fold : 3


Lasso(alpha=0.01)

Coefficients : [ 0.04697919  0.18866641 -0.00232364]
----------------------------------------------------------------------------------------------------
Fold : 4


Lasso(alpha=0.01)

Coefficients : [ 0.0431589   0.20006652 -0.00754594]
----------------------------------------------------------------------------------------------------
Fold : 5


Lasso(alpha=0.01)

Coefficients : [ 0.04725115  0.17985369 -0.00090631]
----------------------------------------------------------------------------------------------------
Average Validation MSE : 3.073
****************************************************************************************************
Lambda : 1
Fold : 1


Lasso(alpha=1)

Coefficients : [0.04582866 0.18396308 0.00216856]
----------------------------------------------------------------------------------------------------
Fold : 2


Lasso(alpha=1)

Coefficients : [0.04501995 0.1840591  0.00038057]
----------------------------------------------------------------------------------------------------
Fold : 3


Lasso(alpha=1)

Coefficients : [ 0.04686138  0.18301965 -0.        ]
----------------------------------------------------------------------------------------------------
Fold : 4


Lasso(alpha=1)

Coefficients : [ 0.04313154  0.19314056 -0.0035882 ]
----------------------------------------------------------------------------------------------------
Fold : 5


Lasso(alpha=1)

Coefficients : [0.04715708 0.17475785 0.        ]
----------------------------------------------------------------------------------------------------
Average Validation MSE : 3.041
****************************************************************************************************
Lambda : 1000
Fold : 1


Lasso(alpha=1000)

Coefficients : [0. 0. 0.]
----------------------------------------------------------------------------------------------------
Fold : 2


Lasso(alpha=1000)

Coefficients : [0. 0. 0.]
----------------------------------------------------------------------------------------------------
Fold : 3


Lasso(alpha=1000)

Coefficients : [0. 0. 0.]
----------------------------------------------------------------------------------------------------
Fold : 4


Lasso(alpha=1000)

Coefficients : [0. 0. 0.]
----------------------------------------------------------------------------------------------------
Fold : 5


Lasso(alpha=1000)

Coefficients : [0. 0. 0.]
----------------------------------------------------------------------------------------------------
Average Validation MSE : 27.283
****************************************************************************************************
