# Kodekladd

According to Week 36:

OLS:
$$
\nabla_{\theta} C(\theta) = \frac{2}{n}X^T(X\theta - \mathbf{y})
$$

Ridge:
$$
\frac{\partial C(\boldsymbol{X},\boldsymbol{\theta})}{\partial \boldsymbol{\theta}}=-\frac{2}{n}\boldsymbol{X}^T(\boldsymbol{y}-\boldsymbol{X}\boldsymbol{\theta})+2\lambda \theta
$$

Lasso:
$$
\frac{\partial C(\boldsymbol{X},\boldsymbol{\theta})}{\partial \boldsymbol{\theta}}=-\frac{2}{n}\boldsymbol{X}^T(\boldsymbol{y}-\boldsymbol{X}\boldsymbol{\theta})+\lambda sgn(\boldsymbol{\theta})
$$

In [4]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from functions import runge, MSE, R2, Ridge_parameters, OLS_parameters
from functions import Lasso_gradient, OLS_gradient, Ridge_gradient, polynomial_features
from sklearn import linear_model

In [None]:
# Comparison Lasso, Ridge and OLS regression with GD and momentum

n = 1000

np.random.seed(42)

x = np.linspace(-1,1, n)
y = runge(x) + 0.1*np.random.normal(0,1)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = polynomial_features(x_train, 10)
X_test = polynomial_features(x_test, 10)
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)
y_offset = np.mean(y_train)

lmbda = 0.001

beta_r = Ridge_parameters(X_train_s, y_train, lmbda)
beta_o = OLS_parameters(X_train_s, y_train)
print('Ridge parameters:', beta_r)
print('OLS parameters:', beta_o)
y_ols = X_test_s @ beta_o
y_ridge = X_test_s @ beta_r
mse_ols = MSE(y_test, y_ols)
mse_ridge = MSE(y_test, y_ridge)
r2_ols = R2(y_test, y_ols)
r2_ridge = R2(y_test, y_ridge)

lr = 0.2
num_iters = 1000000000
momentum = 0.3

# Initialize weights for gradient descent
beta_gd_r = np.zeros(len(beta_r))
beta_gd_o = np.zeros(len(beta_o))
beta_gd_lasso = np.zeros(len(beta_o))

stopping_criteria = [1e-10]*len(beta_r)
change = 0.0

# Gradient descent loop
# for t in range(num_iters):
#     # Compute gradients for Ridge
#     grad_Ridge = Ridge_gradient(X_train_s, y_train, beta_gd_r, lmbda)
#     # Calculate change with momentum
#     new_change = lr * grad_Ridge + momentum * change
#     # Update parameters beta
#     beta_gd_r = beta_gd_r - new_change
#     # Save change for next iteration
#     change = new_change
#     # Check for convergence
#     if (np.abs(- lr * grad_Ridge) < stopping_criteria).all():
#         print("Convergence reached at iteration for Ridge", t)
#         break

# change = 0.0
# for t in range(num_iters):
#     # Compute gradients for OLS
#     grad_OLS = OLS_gradient(X_train_s, y_train, beta_gd_o)
#     # Calculate change with momentum
#     new_change = lr * grad_OLS + momentum * change
#     # Update parameters beta
#     beta_gd_o = beta_gd_o - new_change
#     # Save change for next iteration
#     change = new_change
#     # Check for convergence
#     if (np.abs(- lr * grad_OLS) < stopping_criteria).all():
#         print("Convergence reached at iteration for OLS", t)
#         break

change = 0.0
for t in range(num_iters):
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    # Calculate change with momentum
    new_change = lr * grad_Lasso + momentum * change
    # Update parameters beta
    beta_gd_lasso = beta_gd_lasso - new_change
    # Save change for next iteration
    change = new_change
    # Check for convergence
    if (np.abs(- lr * grad_Lasso) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

# y_gd_ols = X_test_s @ beta_gd_o + y_offset
# y_gd_ridge = X_test_s @ beta_gd_r + y_offset
y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE OLS: {mse_ols}, MSE Ridge: {mse_ridge},")
#print(f"MSE GD OLS: {MSE(y_test, y_gd_ols)}, MSE GD Ridge: {MSE(y_test, y_gd_ridge)}, ")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 OLS: {r2_ols}, R2 Ridge: {r2_ridge},")
#print(f"R2 GD OLS: {R2(y_test, y_gd_ols)}, R2 GD Ridge: {R2(y_test, y_gd_ridge)}, ")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
#print(f"Beta GD OLS: {beta_gd_o}, Beta GD Ridge: {beta_gd_r}, ")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

# include lasso using Scikit-Learn
RegLasso = linear_model.Lasso(lmbda,fit_intercept=False)
RegLasso.fit(X_train_s,y_train)
y_lasso_sklearn = RegLasso.predict(X_test_s) + y_offset
mse_lasso_sklearn = MSE(y_test, y_lasso_sklearn)
r2_lasso_sklearn = R2(y_test, y_lasso_sklearn)
print(f"MSE Lasso Scikit-Learn: {mse_lasso_sklearn}")
print(f"R2 Lasso Scikit-Learn: {r2_lasso_sklearn}")
print(f"Beta Lasso Scikit-Learn: {RegLasso.coef_} ")
print("--------------------------------------------------")

Ridge parameters: [ 0.00000000e+00  5.54300134e-03 -2.59074814e+00 -7.44146292e-03
  9.69505866e+00 -9.50339784e-03 -1.68243883e+01  2.65540645e-02
  1.37520890e+01 -1.42341245e-02 -4.27508408e+00]
OLS parameters: [ 0.00000000e+00  3.74356233e-03 -2.97426630e+00  9.69057072e-03
  1.25136327e+01 -7.10156122e-02 -2.40999002e+01  1.12354140e-01
  2.15580712e+01 -5.44668224e-02 -7.24703004e+00]


In [9]:
# include lasso using Scikit-Learn
RegLasso = linear_model.Lasso(lmbda,fit_intercept=False, max_iter=10000000)
RegLasso.fit(X_train_s,y_train)
y_lasso_sklearn = RegLasso.predict(X_test_s) + y_offset
mse_lasso_sklearn = MSE(y_test, y_lasso_sklearn)
r2_lasso_sklearn = R2(y_test, y_lasso_sklearn)
print(f"MSE Lasso Scikit-Learn: {mse_lasso_sklearn}")
print(f"R2 Lasso Scikit-Learn: {r2_lasso_sklearn}")
print(f"Beta Lasso Scikit-Learn: {RegLasso.coef_} ")
print("--------------------------------------------------")

MSE Lasso Scikit-Learn: 0.009931854710884884
R2 Lasso Scikit-Learn: 0.8665146819730404
Beta Lasso Scikit-Learn: [ 0.00000000e+00 -4.06494043e-04 -1.00759002e+00 -0.00000000e+00
  1.16444503e+00 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
 -3.81997637e-01 -0.00000000e+00 -0.00000000e+00] 
--------------------------------------------------
