# Kodekladd

According to Week 36:

OLS:
$$
\nabla_{\theta} C(\theta) = \frac{2}{n}X^T(X\theta - \mathbf{y})
$$

Ridge:
$$
\frac{\partial C(\boldsymbol{X},\boldsymbol{\theta})}{\partial \boldsymbol{\theta}}=-\frac{2}{n}\boldsymbol{X}^T(\boldsymbol{y}-\boldsymbol{X}\boldsymbol{\theta})+2\lambda \theta
$$

Lasso:
$$
\frac{\partial C(\boldsymbol{X},\boldsymbol{\theta})}{\partial \boldsymbol{\theta}}=-\frac{2}{n}\boldsymbol{X}^T(\boldsymbol{y}-\boldsymbol{X}\boldsymbol{\theta})+\lambda sgn(\boldsymbol{\theta})
$$

In [1]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from functions import runge, MSE, R2, Ridge_parameters, OLS_parameters
from functions import Lasso_gradient, OLS_gradient, Ridge_gradient, polynomial_features
from sklearn import linear_model
import json

In [None]:
data = np.load('data/data_arrays_last.npz')
X_train_s = data['X_train_s']
X_test_s = data['X_test_s']
y_test = data['y_test']
y_train = data['y_train']
y_offset = data['y_offset']

lmbda = 0.001
rho = 0.9
lr = 0.01

# Calculate parameters using OLS and Ridge closed form solutions
beta_r = Ridge_parameters(X_train_s, y_train, lmbda)
beta_o = OLS_parameters(X_train_s, y_train)

In [None]:
#Lasso with GD

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Initialize hyperparameters
num_iters = 100000000
stopping_criteria = [1e-10]*len(beta_r)

for t in range(num_iters):
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    # Update parameters beta
    beta_gd_lasso = beta_gd_lasso - lr * grad_Lasso
    # Check for convergence
    if (np.abs(- lr * grad_Lasso) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_plain_gd = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                       'R2 GD Lasso': R2(y_test, y_gd_lasso),
                       'Beta GD Lasso': beta_gd_lasso,}
with open('data/lasso_plain_gd_results.json', 'w') as f:
    json.dump(dict_lasso_plain_gd, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

# include lasso using Scikit-Learn
RegLasso = linear_model.Lasso(lmbda,fit_intercept=False, max_iter=10000000)
RegLasso.fit(X_train_s,y_train)
y_lasso_sklearn = RegLasso.predict(X_test_s) + y_offset
mse_lasso_sklearn = MSE(y_test, y_lasso_sklearn)
r2_lasso_sklearn = R2(y_test, y_lasso_sklearn)
print(f"MSE Lasso Scikit-Learn: {mse_lasso_sklearn}")
print(f"R2 Lasso Scikit-Learn: {r2_lasso_sklearn}")
print(f"Beta Lasso Scikit-Learn: {RegLasso.coef_} ")
print("--------------------------------------------------")

dict_lasso_sklearn = {'MSE': mse_lasso_sklearn,
                      'R2': r2_lasso_sklearn,
                      'Beta': RegLasso.coef_,}
with open('data/lasso_sklearn_results.json', 'w') as f:
    json.dump(dict_lasso_sklearn, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Learning rate: 0.01
MSE GD Lasso: 0.007935647615323806
R2 GD Lasso: 0.8932715756804516
Beta GD Lasso: [ 0.00000000e+00 -1.11129748e-05 -1.21672643e+00 -5.51574249e-04
  1.93544325e+00 -1.57784376e-05 -9.49709007e-01 -1.95245519e-05
 -1.73734070e-05 -2.73017772e-04  1.53601352e-05]
--------------------------------------------------
MSE Lasso Scikit-Learn: 0.009931854710884884
R2 Lasso Scikit-Learn: 0.8665146819730404
Beta Lasso Scikit-Learn: [ 0.00000000e+00 -4.06494043e-04 -1.00759002e+00 -0.00000000e+00
  1.16444503e+00 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
 -3.81997637e-01 -0.00000000e+00 -0.00000000e+00] 
--------------------------------------------------


In [None]:
#Lasso with GD and momentum

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Initialize hyperparameters
num_iters = 100000000
momentum = 0.3
stopping_criteria = [1e-10]*len(beta_r)
change = 0.0

for t in range(num_iters):
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    # Calculate change with momentum
    new_change = lr * grad_Lasso + momentum * change
    # Update parameters beta
    beta_gd_lasso = beta_gd_lasso - new_change
    # Save change for next iteration
    change = new_change
    # Check for convergence
    if (np.abs(new_change) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_momentum = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                       'R2 GD Lasso': R2(y_test, y_gd_lasso),
                       'Beta GD Lasso': beta_gd_lasso,}
with open('data/lasso_momentum_results.json', 'w') as f:
    json.dump(dict_lasso_momentum, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Learning rate: 0.01
MSE GD Lasso: 0.007935440271519616
R2 GD Lasso: 0.8932744293428048
Beta GD Lasso: [ 0.00000000e+00 -1.33359334e-05 -1.21672627e+00 -5.41103174e-04
  1.93544471e+00 -1.17704250e-05 -9.49709099e-01 -1.57072195e-05
 -1.18485563e-05 -2.65642710e-04 -2.48185351e-06]
--------------------------------------------------


In [None]:
#Lasso with GD and ADAgrad

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Initialize hyperparameters
num_iters = 100000000
stopping_criteria = [1e-10]*len(beta_r)
delta = 1e-8
G_iter = 0.0

for t in range(num_iters):
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    G_iter += grad_Lasso*grad_Lasso
    # Update parameters beta
    update = (lr / (np.sqrt(G_iter) + delta)) * grad_Lasso
    beta_gd_lasso = beta_gd_lasso - update
    # Check for convergence
    if (np.abs(update) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_adagrad = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                       'R2 GD Lasso': R2(y_test, y_gd_lasso),
                       'Beta GD Lasso': beta_gd_lasso,}
with open('data/lasso_adagrad_results.json', 'w') as f:
    json.dump(dict_lasso_adagrad, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Learning rate: 0.01
MSE GD Lasso: 0.007935349500569516
R2 GD Lasso: 0.8932756476237397
Beta GD Lasso: [ 0.00000000e+00  3.15620603e-07 -1.21672492e+00 -5.64916873e-04
  1.93543697e+00 -1.53175044e-06 -9.49704717e-01 -1.73155353e-06
 -2.33669793e-07 -2.74752685e-04  2.00224677e-06]
--------------------------------------------------


In [None]:
#Lasso with GD and RMSprop

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Initialize hyperparameters
num_iters = 100000000
stopping_criteria = [1e-10]*len(beta_r)
delta = 1e-8
G_iter = np.zeros(len(beta_r))
rho = 0.9

for t in range(num_iters):
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    G_iter = (rho*G_iter + (1-rho)*grad_Lasso*grad_Lasso)
    # Update parameters beta
    update = (lr / (np.sqrt(G_iter) + delta)) * grad_Lasso
    beta_gd_lasso -= update
    # Check for convergence
    if (np.abs(update) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_rmsprop = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                       'R2 GD Lasso': R2(y_test, y_gd_lasso),
                       'Beta GD Lasso': beta_gd_lasso,}
with open('data/lasso_rmsprop_results.json', 'w') as f:
    json.dump(dict_lasso_rmsprop, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Learning rate: 0.01
MSE GD Lasso: 0.008647651527696799
R2 GD Lasso: 0.8837785702656817
Beta GD Lasso: [ 0.00000000e+00  5.98784184e-03 -1.22337161e+00  1.38673373e-04
  1.93704997e+00  7.65733176e-03 -9.59437264e-01  9.76294502e-03
 -9.94940853e-03  1.93866259e-05  4.39352084e-05]
--------------------------------------------------


In [None]:
#Lasso with GD and ADAM

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Initialize hyperparameters
num_iters = 100000000
stopping_criteria = [1e-10]*len(beta_r)
delta = 1e-8
rho_1 = 0.9
rho_2 = 0.99
first_moment = 0.0
second_moment = 0.0

for t in range(num_iters):
    t += 1
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    # Computing moments first
    first_moment = rho_1 * first_moment + (1 - rho_1) * grad_Lasso
    second_moment = rho_2 * second_moment + (1 - rho_2) * grad_Lasso * grad_Lasso
    first_term = first_moment / (1 - rho_1**(t))
    second_term = second_moment / (1 - rho_2**(t))
    # Update parameters beta
    update = (lr / (np.sqrt(second_term) + delta)) * first_term
    beta_gd_lasso = beta_gd_lasso - update
    # Check for convergence
    if (np.abs(update) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_adam = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                   'R2 GD Lasso': R2(y_test, y_gd_lasso),
                   'Beta GD Lasso': beta_gd_lasso,}
with open('data/lasso_adam_results.json', 'w') as f:
    json.dump(dict_lasso_adam, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Learning rate: 0.01
MSE GD Lasso: 0.007959630651241063
R2 GD Lasso: 0.8929437432944295
Beta GD Lasso: [ 0.00000000e+00 -7.95422451e-04 -1.21745529e+00 -8.84745178e-04
  1.93648322e+00  9.59189860e-04 -9.48975389e-01 -5.96204216e-04
 -1.25524243e-03 -1.78463690e-03  3.97872212e-04]
--------------------------------------------------
