**installing and importing useful libraries**

In [5]:
import matplotlib.pyplot as plt
print(plt.style.available)

['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn', 'seaborn-bright', 'seaborn-colorblind', 'seaborn-dark', 'seaborn-dark-palette', 'seaborn-darkgrid', 'seaborn-deep', 'seaborn-muted', 'seaborn-notebook', 'seaborn-paper', 'seaborn-pastel', 'seaborn-poster', 'seaborn-talk', 'seaborn-ticks', 'seaborn-white', 'seaborn-whitegrid', 'tableau-colorblind10']


In [6]:
# Standard libraries
import os
import sys
import math
import random
import datetime

# Numerical computing
import numpy as np

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-whitegrid')
sns.set_theme()

#importing optimization techniques
from implementations import *

**importing data**

In [None]:
# import data from dataprocessing section...
# so we have x_train and y_train dataprocessed IMPORTANT y _train must be from {-1, 1} values to {0, 1} values and dataprocessed as well (clean outliers)


parameter tuning for regularized logistic regression with GD and Adam using k-fold cross validation

In [None]:
def make_balanced_subset(x_train_filtered, y_train, majority_class=-1, minority_class=1,
                         seed_major=0, seed_minor=42, seed_shuffle=7):
    # Boolean masks
    maj_mask = (y_train == majority_class)
    min_mask = (y_train == minority_class)

    # Indices per class
    maj_idx = np.nonzero(maj_mask)[0]
    min_idx = np.nonzero(min_mask)[0]

    # Target size = size of minority (undersample majority)
    n = len(min_idx)

    # Sample without replacement
    rs_maj = np.random.RandomState(seed_major)
    rs_min = np.random.RandomState(seed_minor)
    sampled_maj = rs_maj.choice(maj_idx, size=n, replace=False)
    sampled_min = rs_min.choice(min_idx, size=n, replace=False)

    # Combine and shuffle
    balanced_idx = np.concatenate([sampled_maj, sampled_min])
    rs_shuf = np.random.RandomState(seed_shuffle)
    rs_shuf.shuffle(balanced_idx)

    # Slice arrays
    x_bal = x_train_filtered[balanced_idx]
    y_bal = y_train[balanced_idx]
    return x_bal, y_bal, balanced_idx

x_train_balanced, y_train_balanced, balanced_indices = make_balanced_subset(
    x_train_filtered, y_train,
    majority_class=-1, minority_class=1,
    seed_major=0, seed_minor=42, seed_shuffle=7
)

# x_train_balanced.shape, y_train_balanced.shape


In [None]:
#### hyperparamter tuning regularized logistic regression with GD

param_grid = np.array([0.001, 0.01, 0.1, 1, 10, 100, 1000])  # lambdas_
N = len(y_train)
d = x_train.shape[1]
k = 10  # folds
dim = int(np.ceil(N / k))  # length of the test set
initial_w = np.zeros(d, )
max_iters = 10000
gamma = 0.01

#### validation metrics
logistic_loss = np.zeros(len(param_grid),)
MSE = np.zeros(len(param_grid),)
AUC=  np.zeros(len(param_grid),)

####  k-fold cross validation
for p, lam in enumerate(param_grid):
    fold_loss = 0
    fold_MSE = 0
    fold_AUC = 0

    for i in range(k):
    # test fold
        x_test_i = x_train[i*dim : min((i+1)*dim, N), :]
        y_test_i = y_train[i*dim : min((i+1)*dim, N), :]
    
    # train folds
        x_train_i = np.concatenate([x_train[:i*dim, :], x_train[min((i+1)*dim, N):, :]], axis=0)
        y_train_i = np.concatenate([y_train[:i*dim, :], y_train[min((i+1)*dim, N):, :]], axis=0)

    #train the model with parameter p
        w, loss = reg_logistic_regression(y_train_i, x_train_i, lam ,initial_w,max_iters, gamma) 

    # storing validation results
        fold_loss += loss
        fold_MSE += compute_mse_loss(y_train_i, x_train_i, w) #train error SEE IF IT IS BETTER TO USE np.mean((y_train_i - y_pred_train)**2)
        predictions = sigmoid (x_test_i @ w)
        fold_AUC += compute_auc(y_test_i, predictions)


#### mean of results  
    logistic_loss[p] = fold_loss / k
    MSE[p] = fold_MSE / k
    AUC[p] = fold_AUC / k


#### Plotting results per lambda

num_par = len(param_grid)
w = 0.3 # bar width
pos = np.arange(num_par)
plt.bar(pos - w, AUC, width = w, label='AUC' )
plt.bar(pos, MSE, width=w, label= 'MSE')
plt.bar(pos + w, logistic_loss, width = w, label = 'logistic loss' )

plt.xticks(pos, param_grid)
plt.xlabel('Different regularization hyperparameter values')
plt.title('Finding the best regularization hyperparamter - GD case')
plt.legend()

plt.show()
    
     





In [None]:

#### hyperparamter tuning regularized logistic regression with Adam

param_grid = np.array([0.001, 0.01, 0.1, 1, 10, 100, 1000])  # lambdas_
N = len(y_train)
d = x_train.shape[1]
k = 10  # folds
dim = int(np.ceil(N / k))  # length of the test set
initial_w = np.zeros(d, )
max_iters = 10000
gamma = 0.01
beta_1 = 0.9 #using Adam paper as benchmark
beta_2 = 0.999 #using Adam paper as benchmark
mini_batch_size = 700 #using Adam paper as benchmark 

#### validation metrics
logistic_loss = np.zeros(len(param_grid),)
MSE = np.zeros(len(param_grid),)
AUC=  np.zeros(len(param_grid),)

####  k-fold cross validation
for p, lam in enumerate(param_grid):
    fold_loss = 0
    fold_MSE = 0
    fold_AUC = 0

    for i in range(k):
    # test fold
        x_test_i = x_train[i*dim : min((i+1)*dim, N), :]
        y_test_i = y_train[i*dim : min((i+1)*dim, N), :]
    
    # train folds
        x_train_i = np.concatenate([x_train[:i*dim, :], x_train[min((i+1)*dim, N):, :]], axis=0)
        y_train_i = np.concatenate([y_train[:i*dim, :], y_train[min((i+1)*dim, N):, :]], axis=0)

    #train the model with parameter p
        w, loss = reg_logistic_regression_adam(y_train_i, x_train_i, lam ,initial_w,max_iters, beta_1, beta_2, gamma, mini_batch_size) 

    # storing validation results
        fold_loss += loss
        fold_MSE += compute_mse_loss(y_train_i, x_train_i, w) #train error SEE IF IT IS BETTER TO USE np.mean((y_train_i - y_pred_train)**2)
        predictions = sigmoid (x_test_i @ w)
        fold_AUC += compute_auc(y_test_i, predictions)


#### mean of results  
    logistic_loss[p] = fold_loss / k
    MSE[p] = fold_MSE / k
    AUC[p] = fold_AUC / k


#### Plotting results per lambda

num_par = len(param_grid)
w = 0.3 # bar width
pos = np.arange(num_par)
plt.bar(pos - w, AUC, width = w, label='AUC' )
plt.bar(pos, MSE, width=w, label= 'MSE')
plt.bar(pos + w, logistic_loss, width = w, label = 'logistic loss' )

plt.xticks(pos, param_grid)
plt.xlabel('Different regularization hyperparameter values')
plt.title('Finding the best regularization hyperparamter - Adam case')
plt.legend()

plt.show()
    
     






AUC calcolata manualmente: 0.64
AUC con scikit-learn: 0.64
Differenza: 0.0


Now that we have found the best regularization hyperparameter for Adam reg log and GD reg log, let's test which model is the best to make predictions using a k-fold cross validation. 

In [None]:
##### CONFRONTING ALL THE MODELS WITH K-FOLD CROSS VALIDATION 




N = len(y_train)
d = x_train.shape[1]
k = 10  # folds
dim = int(np.ceil(N / k))  # length of the test set
initial_w = np.zeros(d, )
max_iters = 10000
gamma = 0.01
beta_1 = 0.9 #using Adam paper as benchmark
beta_2 = 0.999 #using Adam paper as benchmark
mini_batch_size = 700 #using Adam paper as benchmark 

models = [
    ("MSE GD", lambda: mean_squared_error_gd(initial_w=initial_w, max_iters=max_iters, gamma=gamma)),
    ("MSE SGD", lambda: mean_squared_error_sgd(initial_w=initial_w, max_iters=max_iters, gamma=gamma, mini_batch_size=mini_batch_size)),
    ("Least Squares", lambda: least_squares()),
    ("Ridge Regression", lambda: ridge_regression()), #understand which lambda here 
    ("Logistic Regression GD", lambda: logistic_regression(initial_w=initial_w, max_iters=max_iters, gamma=gamma)),
    ("Reg Logistic ADAM", lambda: reg_logistic_regression_adam(lambda_adam, initial_w=initial_w, max_iters=max_iters,  beta_1=beta_1, beta_2=beta_2,gamma=gamma, mini_batch_size=mini_batch_size)),
    ("Reg Logistic GD", lambda: reg_logistic_regression(lambda_gd, initial_w=initial_w, max_iters=max_iters, gamma=gamma))
]  ### does it make sense to include all the models ????


#### validation metrics
logistic_loss = np.zeros(len(models),)
MSE = np.zeros(len(models),)
AUC=  np.zeros(len(models),)

####  k-fold cross validation
for p, model in enumerate(models):
    fold_loss = 0
    fold_MSE = 0
    fold_AUC = 0

    for i in range(k):
    # test fold
        x_test_i = x_train[i*dim : min((i+1)*dim, N), :]
        y_test_i = y_train[i*dim : min((i+1)*dim, N), :]
    
    # train folds
        x_train_i = np.concatenate([x_train[:i*dim, :], x_train[min((i+1)*dim, N):, :]], axis=0)
        y_train_i = np.concatenate([y_train[:i*dim, :], y_train[min((i+1)*dim, N):, :]], axis=0)

    #train the model 
        name, model_fn = models[p] 
        w, loss = model_fn(y_train_i, x_train_i)   

    # storing validation results
        fold_loss += loss
        fold_MSE += compute_mse_loss(y_train_i, x_train_i, w) #train error SEE IF IT IS BETTER TO USE np.mean((y_train_i - y_pred_train)**2)
        predictions = sigmoid (x_test_i @ w)
        fold_AUC += compute_auc(y_test_i, predictions)


#### mean of results  
    logistic_loss[p] = fold_loss / k
    MSE[p] = fold_MSE / k
    AUC[p] = fold_AUC / k


#### Plotting results per model   FIX HERE 

num_models = len(models)
w = 0.3
pos = np.arange(num_models)
labels = [name for name, _ in models]

plt.bar(pos - w, AUC, width=w, label='AUC')
plt.bar(pos, MSE, width=w, label='MSE')
plt.bar(pos + w, logistic_loss, width=w, label='logistic loss')

plt.xticks(pos, labels, rotation=45)
plt.xlabel('Different models')
plt.title('Finding the best model')
plt.legend()
plt.show()

     




