# REGRESI LINEAR DENGAN RIDGE, LASSO, ELASTICNET

In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso, Ridge, ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

# NORM (panjang atau jarak)


$$|x| = abs(x)$$
$$||x||_1 = |x_1|+|x_2|+...+|x_p| $$


$$||x||_2=\sqrt{x_1^2+x_2^2+...+x_p^2}$$

$$||x||_2^2=x_1^2+x_2^2+...+x_p^2 = \sum_{i=1}^px_i^2$$

$$||x-y||_2^2=(x_1-y_1)^2+(x_1-y_1)_2^2+...+(x_1-y_1)_p^2$$

$$||Xw - y||_2^2 + \alpha ||w||_2^2$$

$$\sum  ((Xw)_i -y_i)^2 + \alpha \sum w_i^2$$

In [2]:
from sklearn.datasets import fetch_california_housing

data = fetch_california_housing()

In [3]:
dir(data)

['DESCR', 'data', 'feature_names', 'frame', 'target', 'target_names']

In [4]:
data.feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [5]:
X = data.data
y = data.target

In [6]:
y

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

In [7]:
# Scaling data per kolom
scaler = StandardScaler()
Xc = scaler.fit_transform(X)

NameError: name 'StandardScaler' is not defined

In [9]:
np.max(Xc, axis=0)

array([  5.85828581,   1.85618152,  55.16323628,  69.57171326,
        30.25033022, 119.41910319,   2.95806762,   2.62528006])

In [10]:
np.max(X, axis=0)

array([ 1.50001000e+01,  5.20000000e+01,  1.41909091e+02,  3.40666667e+01,
        3.56820000e+04,  1.24333333e+03,  4.19500000e+01, -1.14310000e+02])

In [11]:
(X[:,0]-np.mean(X[:,0]))/np.std(X[:,0])

array([ 2.34476576,  2.33223796,  1.7826994 , ..., -1.14259331,
       -1.05458292, -0.78012947])

In [12]:
Xc[:,0]

array([ 2.34476576,  2.33223796,  1.7826994 , ..., -1.14259331,
       -1.05458292, -0.78012947])

In [13]:
(X - np.mean(X, axis=0))/np.std(X, axis=0)

array([[ 2.34476576,  0.98214266,  0.62855945, ..., -0.04959654,
         1.05254828, -1.32783522],
       [ 2.33223796, -0.60701891,  0.32704136, ..., -0.09251223,
         1.04318455, -1.32284391],
       [ 1.7826994 ,  1.85618152,  1.15562047, ..., -0.02584253,
         1.03850269, -1.33282653],
       ...,
       [-1.14259331, -0.92485123, -0.09031802, ..., -0.0717345 ,
         1.77823747, -0.8237132 ],
       [-1.05458292, -0.84539315, -0.04021111, ..., -0.09122515,
         1.77823747, -0.87362627],
       [-0.78012947, -1.00430931, -0.07044252, ..., -0.04368215,
         1.75014627, -0.83369581]])

In [14]:
alpha=0.1
ridge = Ridge(alpha=alpha)
lasso = Lasso(alpha=alpha)
elnet = ElasticNet(alpha=alpha, l1_ratio=0.5)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(Xc,
                                                    y, 
                                                    test_size=0.33, 
                                                    random_state=42)

In [16]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)

(13828, 8)
(6812, 8)
(13828,)


In [17]:
ridge.fit(X_train, y_train)

In [18]:
lasso.fit(X_train, y_train)

In [19]:
elnet.fit(X_train, y_train)

In [20]:
ypred_r = ridge.predict(X_test)
ypred_l = lasso.predict(X_test)
ypred_e = elnet.predict(X_test)

mse_r = mean_squared_error(y_test, ypred_r) 
mse_l = mean_squared_error(y_test, ypred_l)
mse_e = mean_squared_error(y_test, ypred_e) #np.mean((y_test-ypred_r)**2)

print('MSE Ridge :', mse_r)
print('MSE Lasso :', mse_l)
print('MSE Elasticnet :', mse_e)


MSE Ridge : 0.5369661838855766
MSE Lasso : 0.6766256251412097
MSE Elasticnet : 0.6307004819859948


In [21]:
data.feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [1]:
ridge.coef_

NameError: name 'ridge' is not defined

In [23]:
lasso.coef_

array([ 0.70472826,  0.10464912, -0.        ,  0.        , -0.        ,
       -0.        , -0.00883159, -0.        ])

In [24]:
[data.feature_names[i] for i in np.where(ridge.coef_!=0)[0]]

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [25]:
[data.feature_names[i] for i in np.where(lasso.coef_!=0)[0]]

['MedInc', 'HouseAge', 'Latitude']

In [26]:
[data.feature_names[i] for i in np.where(elnet.coef_!=0)[0]]

['MedInc', 'HouseAge', 'AveOccup', 'Latitude', 'Longitude']

# PR 
Kumpulkan 6 Mei 2024 jam 15.00 WIB ke suryasatriya@uksw.edu

1. Masukkan data sklearn California Housing ke sqlite
2. Baca data tsb
3. Pakailah Ridge, Lasso dan Elastic-Net dengan nilai $\alpha$ dan L1_ratio berbeda2. Buat tabel perbandingan MSE masing2. Buat table features mana yg mempunyai coef_ tidak nol