# Kodekladd

According to Week 36:

OLS:
$$
\nabla_{\theta} C(\theta) = \frac{2}{n}X^T(X\theta - \mathbf{y})
$$

Ridge:
$$
\frac{\partial C(\boldsymbol{X},\boldsymbol{\theta})}{\partial \boldsymbol{\theta}}=-\frac{2}{n}\boldsymbol{X}^T(\boldsymbol{y}-\boldsymbol{X}\boldsymbol{\theta})+2\lambda \theta
$$

Lasso:
$$
\frac{\partial C(\boldsymbol{X},\boldsymbol{\theta})}{\partial \boldsymbol{\theta}}=-\frac{2}{n}\boldsymbol{X}^T(\boldsymbol{y}-\boldsymbol{X}\boldsymbol{\theta})+\lambda sgn(\boldsymbol{\theta})
$$

In [1]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from functions import runge, MSE, R2, Ridge_parameters, OLS_parameters
from functions import Lasso_gradient, OLS_gradient, Ridge_gradient, polynomial_features
from sklearn import linear_model
import json

In [None]:
#Lasso with GD

# Data generation
n = 1000
np.random.seed(42)
x = np.linspace(-1,1, n)
y = runge(x) + 0.1*np.random.normal(0,1)

# Split into training and test sets, scale data and create polynomial features
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = polynomial_features(x_train, 10)
X_test = polynomial_features(x_test, 10)
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)
y_offset = np.mean(y_train)

lmbda = 0.001

# Calculate parameters using OLS and Ridge closed form solutions
beta_r = Ridge_parameters(X_train_s, y_train, lmbda)
beta_o = OLS_parameters(X_train_s, y_train)

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Hessian matrix
H = (2.0/n)* X_train_s.T @ X_train_s
EigValues, EigVectors = np.linalg.eig(H)
print(f"Eigenvalues of Hessian Matrix:{EigValues}")

# Initialize hyperparameters
lr = 1.0 / np.max(EigValues)
num_iters = 100000000
stopping_criteria = [1e-10]*len(beta_r)

for t in range(num_iters):
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    # Update parameters beta
    beta_gd_lasso = beta_gd_lasso - lr * grad_Lasso
    # Check for convergence
    if (np.abs(- lr * grad_Lasso) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_plain_gd = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                       'R2 GD Lasso': R2(y_test, y_gd_lasso),
                       'Beta GD Lasso': beta_gd_lasso,}
with open('lasso_plain_gd_results.json', 'w') as f:
    json.dump(dict_lasso_plain_gd, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

# include lasso using Scikit-Learn
RegLasso = linear_model.Lasso(lmbda,fit_intercept=False, max_iter=10000000)
RegLasso.fit(X_train_s,y_train)
y_lasso_sklearn = RegLasso.predict(X_test_s) + y_offset
mse_lasso_sklearn = MSE(y_test, y_lasso_sklearn)
r2_lasso_sklearn = R2(y_test, y_lasso_sklearn)
print(f"MSE Lasso Scikit-Learn: {mse_lasso_sklearn}")
print(f"R2 Lasso Scikit-Learn: {r2_lasso_sklearn}")
print(f"Beta Lasso Scikit-Learn: {RegLasso.coef_} ")
print("--------------------------------------------------")

dict_lasso_sklearn = {'MSE': mse_lasso_sklearn,
                      'R2': r2_lasso_sklearn,
                      'Beta': RegLasso.coef_,}
with open('lasso_sklearn_results.json', 'w') as f:
    json.dump(dict_lasso_sklearn, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Eigenvalues of Hessian Matrix:[7.81788102e+00 7.04451821e+00 6.43820952e-01 4.32618809e-01
 4.14213818e-02 1.79862028e-02 1.31440644e-03 4.18975683e-04
 4.13804044e-06 1.58989315e-05 0.00000000e+00]


In [2]:
#Lasso with GD and momentum

# Data generation
n = 1000
np.random.seed(42)
x = np.linspace(-1,1, n)
y = runge(x) + 0.1*np.random.normal(0,1)

# Split into training and test sets, scale data and create polynomial features
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = polynomial_features(x_train, 10)
X_test = polynomial_features(x_test, 10)
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)
y_offset = np.mean(y_train)

lmbda = 0.001

# Calculate parameters using OLS and Ridge closed form solutions
beta_r = Ridge_parameters(X_train_s, y_train, lmbda)
beta_o = OLS_parameters(X_train_s, y_train)

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Hessian matrix
H = (2.0/n)* X_train_s.T @ X_train_s 
EigValues, EigVectors = np.linalg.eig(H)
print(f"Eigenvalues of Hessian Matrix:{EigValues}")

# Initialize hyperparameters
lr = 1.0 / np.max(EigValues)
num_iters = 100000000
momentum = 0.3
stopping_criteria = [1e-10]*len(beta_r)
change = 0.0

for t in range(num_iters):
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    # Calculate change with momentum
    new_change = lr * grad_Lasso + momentum * change
    # Update parameters beta
    beta_gd_lasso = beta_gd_lasso - new_change
    # Save change for next iteration
    change = new_change
    # Check for convergence
    if (np.abs(- lr * grad_Lasso) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_momentum = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                       'R2 GD Lasso': R2(y_test, y_gd_lasso),
                       'Beta GD Lasso': beta_gd_lasso,}
with open('lasso_momentum_results.json', 'w') as f:
    json.dump(dict_lasso_momentum, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Eigenvalues of Hessian Matrix:[7.81788102e+00 7.04451821e+00 6.43820952e-01 4.32618809e-01
 4.14213818e-02 1.79862028e-02 1.31440644e-03 4.18975683e-04
 4.13804044e-06 1.58989315e-05 0.00000000e+00]
Learning rate: 0.12791189798390853
MSE GD Lasso: 0.007934224673617768
R2 GD Lasso: 0.8932906495136685
Beta GD Lasso: [ 0.00000000e+00 -7.57911899e-06 -1.21670308e+00 -3.68643214e-04
  1.93558629e+00 -1.05223176e-04 -9.49707198e-01 -9.74198273e-05
 -1.44871451e-04 -2.46502864e-04  4.15480986e-05]
--------------------------------------------------


In [3]:
#Lasso with GD and ADAgrad

# Data generation
n = 1000
np.random.seed(42)
x = np.linspace(-1,1, n)
y = runge(x) + 0.1*np.random.normal(0,1)

# Split into training and test sets, scale data and create polynomial features
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = polynomial_features(x_train, 10)
X_test = polynomial_features(x_test, 10)
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)
y_offset = np.mean(y_train)

lmbda = 0.001

# Calculate parameters using OLS and Ridge closed form solutions
beta_r = Ridge_parameters(X_train_s, y_train, lmbda)
beta_o = OLS_parameters(X_train_s, y_train)

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Hessian matrix
H = (2.0/n)* X_train_s.T @ X_train_s 
EigValues, EigVectors = np.linalg.eig(H)
print(f"Eigenvalues of Hessian Matrix:{EigValues}")

# Initialize hyperparameters
lr = 1.0 / np.max(EigValues)
num_iters = 100000000
stopping_criteria = [1e-10]*len(beta_r)
delta = 1e-8
G_iter = 0.0

for t in range(num_iters):
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    G_iter += grad_Lasso*grad_Lasso
    # Update parameters beta
    beta_gd_lasso = beta_gd_lasso - (lr / (np.sqrt(G_iter) + delta)) * grad_Lasso
    # Check for convergence
    if (np.abs(- lr * grad_Lasso) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_adagrad = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                       'R2 GD Lasso': R2(y_test, y_gd_lasso),
                       'Beta GD Lasso': beta_gd_lasso,}
with open('lasso_adagrad_results.json', 'w') as f:
    json.dump(dict_lasso_adagrad, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Eigenvalues of Hessian Matrix:[7.81788102e+00 7.04451821e+00 6.43820952e-01 4.32618809e-01
 4.14213818e-02 1.79862028e-02 1.31440644e-03 4.18975683e-04
 4.13804044e-06 1.58989315e-05 0.00000000e+00]
Learning rate: 0.12791189798390853
MSE GD Lasso: 0.007935177537664118
R2 GD Lasso: 0.8932779672450609
Beta GD Lasso: [ 0.00000000e+00  5.86753916e-07 -1.21672941e+00 -4.89667917e-04
  1.93544673e+00 -7.92736280e-05 -9.49713742e-01 -4.89517761e-05
 -1.24026829e-05 -2.14103633e-04  1.98979754e-05]
--------------------------------------------------


In [None]:
#Lasso with GD and RMSprop

# Data generation
n = 1000
np.random.seed(42)
x = np.linspace(-1,1, n)
y = runge(x) + 0.1*np.random.normal(0,1)

# Split into training and test sets, scale data and create polynomial features
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = polynomial_features(x_train, 10)
X_test = polynomial_features(x_test, 10)
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)
y_offset = np.mean(y_train)

lmbda = 0.001

# Calculate parameters using OLS and Ridge closed form solutions
beta_r = Ridge_parameters(X_train_s, y_train, lmbda)
beta_o = OLS_parameters(X_train_s, y_train)

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Hessian matrix
H = (2.0/n)* X_train_s.T @ X_train_s 
EigValues, EigVectors = np.linalg.eig(H)
print(f"Eigenvalues of Hessian Matrix:{EigValues}")

# Initialize hyperparameters
lr = 1.0 / np.max(EigValues)
num_iters = 100000000
stopping_criteria = [1e-10]*len(beta_r)
delta = 1e-8
G_iter = 0.0
rho = 0.99

for t in range(num_iters):
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    G_iter = (rho*G_iter + (1-rho)*grad_Lasso*grad_Lasso)
    # Update parameters beta
    beta_gd_lasso = beta_gd_lasso - (lr / (np.sqrt(G_iter) + delta)) * grad_Lasso
    # Check for convergence
    if (np.abs(- lr * grad_Lasso) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_rmsprop = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                       'R2 GD Lasso': R2(y_test, y_gd_lasso),
                       'Beta GD Lasso': beta_gd_lasso,}
with open('lasso_rmsprop_results.json', 'w') as f:
    json.dump(dict_lasso_rmsprop, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

Eigenvalues of Hessian Matrix:[7.81788102e+00 7.04451821e+00 6.43820952e-01 4.32618809e-01
 4.14213818e-02 1.79862028e-02 1.31440644e-03 4.18975683e-04
 4.13804044e-06 1.58989315e-05 0.00000000e+00]


In [None]:
#Lasso with GD and ADAM

# Data generation
n = 1000
np.random.seed(42)
x = np.linspace(-1,1, n)
y = runge(x) + 0.1*np.random.normal(0,1)

# Split into training and test sets, scale data and create polynomial features
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = polynomial_features(x_train, 10)
X_test = polynomial_features(x_test, 10)
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)
y_offset = np.mean(y_train)

lmbda = 0.001

# Calculate parameters using OLS and Ridge closed form solutions
beta_r = Ridge_parameters(X_train_s, y_train, lmbda)
beta_o = OLS_parameters(X_train_s, y_train)

# Initialize weights for gradient descent
beta_gd_lasso = np.zeros(len(beta_r))

# Hessian matrix
H = (2.0/n)* X_train_s.T @ X_train_s 
EigValues, EigVectors = np.linalg.eig(H)
print(f"Eigenvalues of Hessian Matrix:{EigValues}")

# Initialize hyperparameters
lr = 1.0 / np.max(EigValues)
num_iters = 100000000
stopping_criteria = [1e-10]*len(beta_r)
delta = 1e-8
rho_1 = 0.9
rho_2 = 0.999
first_moment = 0.0
second_moment = 0.0

for t in range(num_iters):
    t += 1
    # Compute gradients for Lasso
    grad_Lasso = Lasso_gradient(X_train_s, y_train, beta_gd_lasso, lmbda)
    # Computing moments first
    first_moment = rho_1 * first_moment + (1 - rho_1) * grad_Lasso
    second_moment = rho_2 * second_moment + (1 - rho_2) * grad_Lasso * grad_Lasso
    first_term = first_moment / (1 - rho_1**(t+1))
    second_term = second_moment / (1 - rho_2**(t+1))
    # Update parameters beta
    beta_gd_lasso = beta_gd_lasso - (lr / (np.sqrt(second_term) + delta)) * first_term
    # Check for convergence
    if (np.abs(- lr * grad_Lasso) < stopping_criteria).all():
        print("Convergence reached at iteration for Lasso", t)
        break

y_gd_lasso = X_test_s @ beta_gd_lasso + y_offset

print(f"Learning rate: {lr}")
print(f"MSE GD Lasso: {MSE(y_test, y_gd_lasso)}")
print(f"R2 GD Lasso: {R2(y_test, y_gd_lasso)}")
print(f"Beta GD Lasso: {beta_gd_lasso}")
print("--------------------------------------------------")

dict_lasso_adam = {'MSE GD Lasso': MSE(y_test, y_gd_lasso),
                   'R2 GD Lasso': R2(y_test, y_gd_lasso),
                   'Beta GD Lasso': beta_gd_lasso,}
with open('lasso_adam_results.json', 'w') as f:
    json.dump(dict_lasso_adam, f, indent=4, default=lambda x: x.tolist() if hasattr(x, 'tolist') else x)

NameError: name 'np' is not defined