In [1]:
import pandas as pd

# 선형회귀, ridge regression, lasso 관련 scikit-learn 라이브러리
from sklearn import linear_model
from sklearn.metrics import mean_squared_error 

# K-fold cross validation 관련 라이브러리
from sklearn.model_selection import KFold

In [8]:

# Ridge regression object 생성
reg = linear_model.Ridge(alpha=1.0) 

# training
reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1]) 

# test
pred_test = reg.predict([[0, 1]])

In [9]:
pred_test

array([0.45714286])

In [10]:
reg.coef_

array([0.27142857, 0.27142857])

In [18]:
# Lasso object 생성
reg = linear_model.Lasso(alpha=1.0) 

# training
reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1]) 

# test
pred_test = reg.predict([[0, 1]])

In [19]:
# test data 예측값
pred_test

array([0.36666667])

In [20]:
# coefficient
reg.coef_

array([0., 0.])

In [21]:
# 데이터 loading
from google.colab import drive
drive.mount('/content/drive')

ad = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/data/Advertising.csv", index_col=0)

Mounted at /content/drive


In [22]:
ad

Unnamed: 0,TV,Radio,Newspaper,Sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9
...,...,...,...,...
196,38.2,3.7,13.8,7.6
197,94.2,4.9,8.1,9.7
198,177.0,9.3,6.4,12.8
199,283.6,42.0,66.2,25.5


In [24]:
# k-fold CV의 fold 수 지정
n_fold = 5

for t_param in (0.001, 1, 2):
  print("Lambda: ", t_param)
  kf = KFold(n_splits=n_fold) 
  idx = 1

  sum_val_mse = 0
  for train, val in kf.split(ad):
    print("Fold: #", idx)

    # training set의 feature와 response 분리
    train_X = ad.iloc[train][["TV", "Radio", "Newspaper"]]
    train_y = ad.iloc[train][["Sales"]]

    # validation set의 feature와 response 분리
    val_X = ad.iloc[val][["TV", "Radio", "Newspaper"]]
    val_y = ad.iloc[val][["Sales"]]

    # Lasso object 생성
    regr = linear_model.Lasso(alpha=t_param)

    # training set을 이용하여 적합
    regr.fit(train_X[["TV", "Radio", "Newspaper"]], train_y)

    # coefficients 출력
    print("Coefficients: \n", regr.coef_)

    # validation set을 이용하여 예측
    val_y_pred = regr.predict(val_X[["TV", "Radio", "Newspaper"]])
    
    # validation MSE
    val_mse = mean_squared_error(val_y, val_y_pred)

    # validation MSE 합계
    sum_val_mse += val_mse

    print("------------------------------")
    idx+=1
  print("Average Validation MSE: %.3f" % (sum_val_mse / n_fold))
  print("******************************")

Lambda:  0.001
Fold: # 1
Coefficients: 
 [0.04585753 0.18790125 0.00361145]
------------------------------
Fold: # 2
Coefficients: 
 [0.04513111 0.1879391  0.00140999]
------------------------------
Fold: # 3
Coefficients: 
 [ 0.04698029  0.18872464 -0.00235756]
------------------------------
Fold: # 4
Coefficients: 
 [ 0.04315907  0.20013028 -0.00758214]
------------------------------
Fold: # 5
Coefficients: 
 [ 0.047252    0.17991513 -0.00094029]
------------------------------
Average Validation MSE: 3.073
******************************
Lambda:  1
Fold: # 1
Coefficients: 
 [0.04582866 0.18396308 0.00216856]
------------------------------
Fold: # 2
Coefficients: 
 [0.04501995 0.1840591  0.00038057]
------------------------------
Fold: # 3
Coefficients: 
 [ 0.04686138  0.18301965 -0.        ]
------------------------------
Fold: # 4
Coefficients: 
 [ 0.04313154  0.19314056 -0.0035882 ]
------------------------------
Fold: # 5
Coefficients: 
 [0.04715708 0.17475785 0.        ]
---------