# Kodekladd

According to Week 36:

OLS:
$$
\nabla_{\theta} C(\theta) = \frac{2}{n}X^T(X\theta - \mathbf{y})
$$

Ridge:
$$
\frac{\partial C(\boldsymbol{X},\boldsymbol{\theta})}{\partial \boldsymbol{\theta}}=-\frac{2}{n}\boldsymbol{X}^T(\boldsymbol{y}-\boldsymbol{X}\boldsymbol{\theta})+2\lambda \theta
$$

Lasso:
$$
\frac{\partial C(\boldsymbol{X},\boldsymbol{\theta})}{\partial \boldsymbol{\theta}}=-\frac{2}{n}\boldsymbol{X}^T(\boldsymbol{y}-\boldsymbol{X}\boldsymbol{\theta})+\lambda sgn(\boldsymbol{\theta})
$$

In [8]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from functions import runge, MSE, R2, Ridge_parameters, OLS_parameters
from functions import Lasso_gradient, OLS_gradient, Ridge_gradient, polynomial_features
from sklearn import linear_model
import json

In [9]:
data = np.load('data_arrays_last.npz')
X_train_s = data['X_train_s']
X_test_s = data['X_test_s']
y_test = data['y_test']
y_train = data['y_train']
y_offset = data['y_offset']

lmbda = 0.001
rho = 0.9
# Calculate parameters using OLS and Ridge closed form solutions
beta_r = Ridge_parameters(X_train_s, y_train, lmbda)
beta_o = OLS_parameters(X_train_s, y_train)

# Hessian matrix
H = (2.0/len(X_train_s))* X_train_s.T @ X_train_s 
EigValues, EigVectors = np.linalg.eig(H)

In [3]:
#Lasso with GD

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Initialize hyperparameters
lr = 1.0 / np.max(EigValues)
num_iters = 100000000
stopping_criteria = [1e-10]*len(beta_r)

for t in range(num_iters):
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    # Update parameters beta
    beta_gd_lasso = beta_gd_lasso - lr * grad_Lasso
    # Check for convergence
    if (np.abs(- lr * grad_Lasso) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_plain_gd = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                       'R2 GD Lasso': R2(y_test, y_gd_lasso),
                       'Beta GD Lasso': beta_gd_lasso,}
with open('lasso_plain_gd_results.json', 'w') as f:
    json.dump(dict_lasso_plain_gd, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

# include lasso using Scikit-Learn
RegLasso = linear_model.Lasso(lmbda,fit_intercept=False, max_iter=10000000)
RegLasso.fit(X_train_s,y_train)
y_lasso_sklearn = RegLasso.predict(X_test_s) + y_offset
mse_lasso_sklearn = MSE(y_test, y_lasso_sklearn)
r2_lasso_sklearn = R2(y_test, y_lasso_sklearn)
print(f"MSE Lasso Scikit-Learn: {mse_lasso_sklearn}")
print(f"R2 Lasso Scikit-Learn: {r2_lasso_sklearn}")
print(f"Beta Lasso Scikit-Learn: {RegLasso.coef_} ")
print("--------------------------------------------------")

dict_lasso_sklearn = {'MSE': mse_lasso_sklearn,
                      'R2': r2_lasso_sklearn,
                      'Beta': RegLasso.coef_,}
with open('lasso_sklearn_results.json', 'w') as f:
    json.dump(dict_lasso_sklearn, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Learning rate: 0.10232951838712687
MSE GD Lasso: 0.007936074929453201
R2 GD Lasso: 0.8932660121834779
Beta GD Lasso: [ 0.00000000e+00 -1.56368408e-06 -1.21676335e+00 -4.17382287e-04
  1.93548651e+00 -8.38969049e-05 -9.49785778e-01 -9.47570514e-05
 -1.83661618e-04 -2.20410477e-04  4.42187738e-05]
--------------------------------------------------
MSE Lasso Scikit-Learn: 0.009931854710884884
R2 Lasso Scikit-Learn: 0.8665146819730404
Beta Lasso Scikit-Learn: [ 0.00000000e+00 -4.06494043e-04 -1.00759002e+00 -0.00000000e+00
  1.16444503e+00 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
 -3.81997637e-01 -0.00000000e+00 -0.00000000e+00] 
--------------------------------------------------


In [4]:
#Lasso with GD and momentum

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Initialize hyperparameters
lr = 1.0 / np.max(EigValues)
num_iters = 100000000
momentum = 0.3
stopping_criteria = [1e-10]*len(beta_r)
change = 0.0

for t in range(num_iters):
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    # Calculate change with momentum
    new_change = lr * grad_Lasso + momentum * change
    # Update parameters beta
    beta_gd_lasso = beta_gd_lasso - new_change
    # Save change for next iteration
    change = new_change
    # Check for convergence
    if (np.abs(new_change) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_momentum = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                       'R2 GD Lasso': R2(y_test, y_gd_lasso),
                       'Beta GD Lasso': beta_gd_lasso,}
with open('lasso_momentum_results.json', 'w') as f:
    json.dump(dict_lasso_momentum, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Learning rate: 0.10232951838712687
MSE GD Lasso: 0.007936154830549234
R2 GD Lasso: 0.8932649399507046
Beta GD Lasso: [ 0.00000000e+00 -3.17512438e-05 -1.21677082e+00 -4.13853054e-04
  1.93548408e+00 -6.06833232e-05 -9.49796530e-01 -3.77266403e-05
 -1.55312974e-04 -2.82572622e-04  3.51955363e-05]
--------------------------------------------------


In [5]:
#Lasso with GD and ADAgrad

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Initialize hyperparameters
lr = 1.0 / np.max(EigValues)
num_iters = 100000000
stopping_criteria = [1e-10]*len(beta_r)
delta = 1e-8
G_iter = 0.0

for t in range(num_iters):
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    G_iter += grad_Lasso*grad_Lasso
    # Update parameters beta
    update = (lr / (np.sqrt(G_iter) + delta)) * grad_Lasso
    beta_gd_lasso = beta_gd_lasso - update
    # Check for convergence
    if (np.abs(update) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_adagrad = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                       'R2 GD Lasso': R2(y_test, y_gd_lasso),
                       'Beta GD Lasso': beta_gd_lasso,}
with open('lasso_adagrad_results.json', 'w') as f:
    json.dump(dict_lasso_adagrad, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Learning rate: 0.10232951838712687
MSE GD Lasso: 0.007935236216839241
R2 GD Lasso: 0.8932771867908823
Beta GD Lasso: [ 0.00000000e+00 -6.55182416e-06 -1.21672494e+00 -5.08337194e-04
  1.93544800e+00 -6.04865978e-05 -9.49708157e-01 -2.98676618e-05
 -6.76531910e-06 -2.29621487e-04 -1.44015892e-06]
--------------------------------------------------


In [10]:
#Lasso with GD and RMSprop

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Initialize hyperparameters
lr = 1.0 / np.max(EigValues)
num_iters = 100000000
stopping_criteria = [1e-10]*len(beta_r)
delta = 1e-8
G_iter = np.zeros(len(beta_r))
rho = 0.99

for t in range(num_iters):
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    G_iter = (rho*G_iter + (1-rho)*grad_Lasso*grad_Lasso)
    # Update parameters beta
    update = (lr / (np.sqrt(G_iter) + delta)) * grad_Lasso
    beta_gd_lasso -= update
    # Check for convergence
    if (np.abs(update) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_rmsprop = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                       'R2 GD Lasso': R2(y_test, y_gd_lasso),
                       'Beta GD Lasso': beta_gd_lasso,}
with open('lasso_rmsprop_results.json', 'w') as f:
    json.dump(dict_lasso_rmsprop, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Learning rate: 0.10232951838712687
MSE GD Lasso: 0.10029331082790165
R2 GD Lasso: -0.3480019063569775
Beta GD Lasso: [ 0.00000000e+00 -4.83169925e-02 -1.18184367e+00 -5.74619878e-02
  2.04930952e+00 -7.00480340e-02 -9.44774882e-01 -2.42379608e-04
 -5.28371593e-06 -8.10821084e-02  1.02441702e-01]
--------------------------------------------------


In [7]:
#Lasso with GD and ADAM

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Initialize hyperparameters
lr = 1.0 / np.max(EigValues)
num_iters = 100000000
stopping_criteria = [1e-10]*len(beta_r)
delta = 1e-8
rho_1 = 0.9
rho_2 = 0.99
first_moment = 0.0
second_moment = 0.0

for t in range(num_iters):
    t += 1
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    # Computing moments first
    first_moment = rho_1 * first_moment + (1 - rho_1) * grad_Lasso
    second_moment = rho_2 * second_moment + (1 - rho_2) * grad_Lasso * grad_Lasso
    first_term = first_moment / (1 - rho_1**(t))
    second_term = second_moment / (1 - rho_2**(t))
    # Update parameters beta
    update = (lr / (np.sqrt(second_term) + delta)) * first_term
    beta_gd_lasso = beta_gd_lasso - update
    # Check for convergence
    if (np.abs(update) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_adam = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                   'R2 GD Lasso': R2(y_test, y_gd_lasso),
                   'Beta GD Lasso': beta_gd_lasso,}
with open('lasso_adam_results.json', 'w') as f:
    json.dump(dict_lasso_adam, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Learning rate: 0.10232951838712687
MSE GD Lasso: 0.008353793710590302
R2 GD Lasso: 0.8876018234893229
Beta GD Lasso: [ 0.00000000e+00 -4.36256680e-03 -1.21474397e+00 -1.89644900e-03
  1.94133100e+00 -6.04465321e-03 -9.49571294e-01 -2.74912382e-03
 -5.05651743e-03 -3.39683797e-03  8.78749616e-03]
--------------------------------------------------
