In [1]:
# STEP 1: IMPORT FUNZIONI DAL FILE .py E RELOAD
import importlib
import Functions_22_Avino_Lombardi
importlib.reload(Functions_22_Avino_Lombardi)
from Functions_22_Avino_Lombardi import *

# STEP 2: CARICAMENTO DATI E PREPROCESSING
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load dataset
df = pd.read_csv("../dataset/GENDER_CLASSIFICATION.csv")

# X: features, y: labels
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
y = np.where(y == 0, -1, 1)  # Converti etichette in {-1, +1}

# Suddivisione in training e test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, random_state=123
)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# STEP 3: DEFINIZIONE DELLA GRIGLIA DEGLI IPERPARAMETRI
grid = {
    "C": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    "p": [2, 3, 4, 5]  # Se usi RBF, sostituisci "p" con "gamma"
}

# STEP 4: CROSS-VALIDATION PER SELEZIONARE I MIGLIORI IPERPARAMETRI
params_comb, val_acc = crossval_svm(
    X=X_train,
    y=y_train,
    k=5,
    grid=grid,
    kernel_func=polynomial_kernel,
    kernel_param_name="p",
    use_seed=True
)

# Selezione dei migliori parametri
best_idx = int(np.argmax(val_acc))
print(val_acc)
print(params_comb)
print(val_acc[best_idx])
print(max(val_acc))
best_params = params_comb[best_idx]
print(best_params)
best_C = best_params["C"]
best_p = best_params["p"]
print(f"Best hyperparameters found: C = {best_C}, p = {best_p}")

# STEP 5: TRAINING COMPLETO CON I MIGLIORI PARAMETRI
K_final = compute_kernel_matrix(X_train, y_train, polynomial_kernel, p=best_p)
lambdas, num_iter, cpu_time, _ = solve_dual_svm(K_final, y_train, best_C)
dual_obj_value = compute_dual_objective(lambdas, K_final)
b_final = compute_b(X_train, y_train, lambdas, polynomial_kernel, C=best_C, p=best_p)

print(f"🔹 Final value of dual objective: {dual_obj_value:.4f}")
print(f"🔹 Iterations: {num_iter}")
print(f"🔹 CPU time: {cpu_time:.4f} seconds")

# STEP 6: PREDIZIONI E ACCURACY
y_train_pred = predict(X_train, y_train, lambdas, b_final, X_train, polynomial_kernel, p=best_p)
y_test_pred = predict(X_train, y_train, lambdas, b_final, X_test, polynomial_kernel, p=best_p)

train_acc = compute_accuracy(y_train, y_train_pred)
test_acc = compute_accuracy(y_test, y_test_pred)

# STEP 7: REPORT FINALE
summary = pd.DataFrame([{
    "KERNEL": "polynomial",
    "C": best_C,
    "p (or lambda)": best_p,
    "TRAIN ACCURACY": round(train_acc, 4),
    "TEST ACCURACY": round(test_acc, 4),
    "NUMBER OF ITERATIONS": num_iter,
    "CPU TIME (s)": round(cpu_time, 4),
    "DUAL OBJ VALUE": round(dual_obj_value, 4)
}])

print("\n FINAL REPORT - QUESTION 2")
display(summary)


[0.89875, 0.71, 0.72375, 0.6512500000000001, 0.88125, 0.6937500000000001, 0.7050000000000001, 0.625, 0.8825, 0.7, 0.6975, 0.6325000000000001, 0.8825, 0.7224999999999999, 0.6725, 0.6300000000000001, 0.8799999999999999, 0.69375, 0.6824999999999999, 0.6449999999999999, 0.8787499999999999, 0.665, 0.6925000000000001, 0.66125, 0.8787499999999999, 0.70125, 0.6975, 0.67125, 0.8774999999999998, 0.68875, 0.6812499999999999, 0.6662500000000001, 0.87625, 0.6874999999999999, 0.6912499999999999, 0.66, 0.87375, 0.6875, 0.6849999999999999, 0.65125]
[{'C': 1, 'p': 2}, {'C': 1, 'p': 3}, {'C': 1, 'p': 4}, {'C': 1, 'p': 5}, {'C': 2, 'p': 2}, {'C': 2, 'p': 3}, {'C': 2, 'p': 4}, {'C': 2, 'p': 5}, {'C': 3, 'p': 2}, {'C': 3, 'p': 3}, {'C': 3, 'p': 4}, {'C': 3, 'p': 5}, {'C': 4, 'p': 2}, {'C': 4, 'p': 3}, {'C': 4, 'p': 4}, {'C': 4, 'p': 5}, {'C': 5, 'p': 2}, {'C': 5, 'p': 3}, {'C': 5, 'p': 4}, {'C': 5, 'p': 5}, {'C': 6, 'p': 2}, {'C': 6, 'p': 3}, {'C': 6, 'p': 4}, {'C': 6, 'p': 5}, {'C': 7, 'p': 2}, {'C': 7, '

Unnamed: 0,KERNEL,C,p (or lambda),TRAIN ACCURACY,TEST ACCURACY,NUMBER OF ITERATIONS,CPU TIME (s),DUAL OBJ VALUE
0,polynomial,1,2,0.9288,0.895,15,0.8343,-122.4522


In [4]:
# STEP 1: IMPORT FUNZIONI DAL FILE .py E RELOAD
import importlib
import Functions_22_Avino_Lombardi
importlib.reload(Functions_22_Avino_Lombardi)
from Functions_22_Avino_Lombardi import *

# STEP 2: CARICAMENTO DATI E PREPROCESSING
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Caricamento dataset
df = pd.read_csv("../dataset/GENDER_CLASSIFICATION.csv")

# Separazione features e labels
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
y = np.where(y == 0, -1, 1)  # Etichette in {-1, +1}

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, random_state=123
)

# Normalizzazione
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# STEP 3: DEFINIZIONE DELLA GRIGLIA IPERPARAMETRI PER RBF (gamma = 1 / (2 * sigma^2))
grid = {
    "C": np.logspace(-5, 3, 9),         # es. da 1e-5 a 1e3
    "gamma": np.logspace(-8, 4, 13)     # es. da 1e-8 a 1e4
}

# STEP 4: CROSS-VALIDATION PER SELEZIONARE I MIGLIORI IPERPARAMETRI
params_comb, val_acc = crossval_svm(
    X=X_train,
    y=y_train,
    k=5,
    grid=grid,
    kernel_func=gaussian_kernel,
    kernel_param_name="gamma",
    use_seed=True
)

# Selezione dei migliori parametri
best_idx = int(np.argmax(val_acc))
best_params = params_comb[best_idx]
best_C = best_params["C"]
best_gamma = best_params["gamma"]

print(f"Best hyperparameters found: C = {best_C}, gamma = {best_gamma}")

# STEP 5: TRAINING COMPLETO CON I MIGLIORI PARAMETRI
K_final = compute_kernel_matrix(X_train, y_train, gaussian_kernel, gamma=best_gamma)
lambdas, num_iter, cpu_time, _ = solve_dual_svm(K_final, y_train, best_C)
dual_obj_value = compute_dual_objective(lambdas, K_final)
b_final = compute_b(X_train, y_train, lambdas, gaussian_kernel, C=best_C, gamma=best_gamma)

# STEP 6: PREDIZIONE SU TRAIN E TEST SET
y_train_pred = predict(X_train, y_train, lambdas, b_final, X_train, gaussian_kernel, gamma=best_gamma)
y_test_pred = predict(X_train, y_train, lambdas, b_final, X_test, gaussian_kernel, gamma=best_gamma)

train_acc = compute_accuracy(y_train, y_train_pred)
test_acc = compute_accuracy(y_test, y_test_pred)

# STEP 7: REPORT FINALE
summary = pd.DataFrame([{
    "KERNEL": "gaussian",
    "C": best_C,
    "gamma": best_gamma,
    "TRAIN ACCURACY": round(train_acc, 4),
    "TEST ACCURACY": round(test_acc, 4),
    "NUMBER OF ITERATIONS": num_iter,
    "CPU TIME (s)": round(cpu_time, 4),
    "DUAL OBJ VALUE": round(dual_obj_value, 4)
}])

print("\n FINAL REPORT - QUESTION 2")
display(summary)


Best hyperparameters found: C = 0.1, gamma = 0.1

 FINAL REPORT - QUESTION 2


Unnamed: 0,KERNEL,C,gamma,TRAIN ACCURACY,TEST ACCURACY,NUMBER OF ITERATIONS,CPU TIME (s),DUAL OBJ VALUE
0,gaussian,0.1,0.1,0.9225,0.905,17,0.9109,-17.1021


In [1]:
# STEP 1: IMPORT FUNZIONI DAL FILE .py E RELOAD
import importlib
import Functions_22_Avino_Lombardi
importlib.reload(Functions_22_Avino_Lombardi)
from Functions_22_Avino_Lombardi import *

# STEP 2: CARICAMENTO DATI E PREPROCESSING
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Caricamento dataset
df = pd.read_csv("../dataset/GENDER_CLASSIFICATION.csv")

# Separazione features e labels
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
y = np.where(y == 0, -1, 1)  # Etichette in {-1, +1}

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, random_state=123
)

# Normalizzazione
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# STEP 3: DEFINIZIONE DELLA GRIGLIA IPERPARAMETRI PER RBF (gamma = 1 / (2 * sigma^2))
grid = {
    "C": np.logspace(-5, 3, 9),         # es. da 1e-5 a 1e3
    "gamma": np.logspace(-8, 4, 13)     # es. da 1e-8 a 1e4
}

# STEP 4: CROSS-VALIDATION PER SELEZIONARE I MIGLIORI IPERPARAMETRI
params_comb, val_acc = crossval_svm(
    X=X_train,
    y=y_train,
    k=5,
    grid=grid,
    kernel_func=gaussian_kernel,
    kernel_param_name="gamma",
    use_seed=True
)

# Selezione dei migliori parametri
best_idx = int(np.argmax(val_acc))
best_params = params_comb[best_idx]
best_C = best_params["C"]
best_gamma = best_params["gamma"]

print(f"Best hyperparameters found: C = {best_C}, gamma = {best_gamma}")

# STEP 5: TRAINING COMPLETO CON I MIGLIORI PARAMETRI
K_final = compute_kernel_matrix(X_train, y_train, gaussian_kernel, gamma=best_gamma)
lambdas, num_iter, cpu_time, _ = solve_dual_svm(K_final, y_train, best_C)
dual_obj_value = compute_dual_objective(lambdas, K_final)
b_final = compute_b(X_train, y_train, lambdas, gaussian_kernel, C=best_C, gamma=best_gamma)

# STEP 6: PREDIZIONE SU TRAIN E TEST SET
y_train_pred = predict(X_train, y_train, lambdas, b_final, X_train, gaussian_kernel, gamma=best_gamma)
y_test_pred = predict(X_train, y_train, lambdas, b_final, X_test, gaussian_kernel, gamma=best_gamma)

train_acc = compute_accuracy(y_train, y_train_pred)
test_acc = compute_accuracy(y_test, y_test_pred)

# STEP 7: REPORT FINALE
summary = pd.DataFrame([{
    "KERNEL": "gaussian",
    "C": best_C,
    "gamma": best_gamma,
    "TRAIN ACCURACY": round(train_acc, 4),
    "TEST ACCURACY": round(test_acc, 4),
    "NUMBER OF ITERATIONS": num_iter,
    "CPU TIME (s)": round(cpu_time, 4),
    "DUAL OBJ VALUE": round(dual_obj_value, 4)
}])

print("\n FINAL REPORT - QUESTION 2")
display(summary)


Best hyperparameters found: C = 100.0, gamma = 0.001

 FINAL REPORT - QUESTION 2


Unnamed: 0,KERNEL,C,gamma,TRAIN ACCURACY,TEST ACCURACY,NUMBER OF ITERATIONS,CPU TIME (s),DUAL OBJ VALUE
0,gaussian,100.0,0.001,0.9238,0.9,23,1.1468,-13365.4401
