## Import

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import time
import numpy as np
import pandas as pd

## Import dei dataset

In [2]:
# Carica i dati
path1 = 'dataset/train_data.csv'
train_data = pd.read_csv(path1)

path2 = 'dataset/test_data.csv'
test_data = pd.read_csv(path2)

target_column = 'income'  # Nome della colonna del target (modifica in base ai tuoi dati)

# Separazione delle feature (X) e dei target (y) per il training set
X_train = train_data.drop(columns=[target_column])  # Rimuove la colonna target dal DataFrame per ottenere le feature
y_train = train_data[target_column]  # Ottiene la colonna target

# Separazione delle feature (X) e dei target (y) per il test set
X_test = test_data.drop(columns=[target_column])  # Rimuove la colonna target dal DataFrame per ottenere le feature
y_test = test_data[target_column]  # Ottiene la colonna target


## Conversione in tensori

In [3]:
# Convertiamo i dati in tensori PyTorch usando NumPy
X_train = torch.tensor(X_train.values, dtype=torch.float32)  # Usa .values per ottenere l'array NumPy
y_train = torch.tensor(y_train.values, dtype=torch.float32)  # Assicurati che y sia un array 1D
X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32)

## Creazione del Dataloader

In [4]:
# Creazione del DataLoader
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

## Definizione del modello

In [5]:
# Definizione del modello della rete neurale
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Linear(input_size, 128)
        self.layer2 = nn.Linear(128, 64)
        self.output_layer = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.sigmoid(self.output_layer(x))
        return x

## Funzione per addestrare e valutare il modello 

In [6]:
# Funzione per addestrare e valutare il modello
def train_and_evaluate(model, optimizer, train_loader, test_loader, criterion, epochs=20):
    train_losses, test_losses = [], []
    start_time = time.time()

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        
        # Training
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs).squeeze()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        avg_train_loss = running_loss / len(train_loader)
        train_losses.append(avg_train_loss)
        
        # Validation
        model.eval()
        test_loss = 0.0
        correct = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                outputs = model(inputs).squeeze()
                loss = criterion(outputs, labels)
                test_loss += loss.item()
                preds = (outputs > 0.5).float()
                correct += (preds == labels).sum().item()
        
        avg_test_loss = test_loss / len(test_loader)
        test_losses.append(avg_test_loss)
        accuracy = correct / len(test_dataset)

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {avg_train_loss:.4f}, Test Loss: {avg_test_loss:.4f}, Accuracy: {accuracy:.4f}")
    
    training_time = time.time() - start_time
    return train_losses, test_losses, accuracy, training_time

In [9]:
# Inizializzazione del modello
input_size = X_train.shape[1]  # Numero di feature dopo la feature engineering
model = NeuralNetwork(input_size)

# Funzione di perdita: Sparse Categorical Cross-Entropy
criterion = nn.CrossEntropyLoss()

# Lista degli ottimizzatori da confrontare
optimizers = {
    'SGD': optim.SGD(model.parameters(), lr=0.001),
    'SGD+Momentum': optim.SGD(model.parameters(), lr=0.001, momentum=0.9),
    'Adam': optim.Adam(model.parameters(), lr=0.001),
    'Adagrad': optim.Adagrad(model.parameters(), lr=0.001),
    'Adamax': optim.Adamax(model.parameters(), lr=0.002),
    'RMSProp': optim.RMSprop(model.parameters(), lr=0.001),
    'Adadelta': optim.Adadelta(model.parameters(), lr=1.0),
    'Nadam': optim.NAdam(model.parameters(), lr=0.002)  # Aggiunto Nadam
 }

# Ciclo su ogni ottimizzatore per addestrare e valutare il modello
results = {}

for opt_name, optimizer in optimizers.items():
    print(f"\nTraining with {opt_name} optimizer:")
    model = NeuralNetwork(input_size)  # Reset del modello per ogni ottimizzatore
    train_losses, test_losses, accuracy, training_time = train_and_evaluate(
        model, optimizer, train_loader, test_loader, criterion, epochs=20
    )
    results[opt_name] = {
        'train_losses': train_losses,
        'test_losses': test_losses,
        'accuracy': accuracy,
        'training_time': training_time
    }

# Analisi dei risultati
for opt_name, res in results.items():
    print(f"\nOptimizer: {opt_name}")
    print(f"Final Test Accuracy: {res['accuracy']:.4f}")
    print(f"Training Time: {res['training_time']:.2f} seconds")
    print(f"Final Train Loss: {res['train_losses'][-1]:.4f}")
    print(f"Final Test Loss: {res['test_losses'][-1]:.4f}")



Training with SGD optimizer:
Epoch 1/20, Train Loss: 63.9038, Test Loss: 62.1400, Accuracy: 0.7674
Epoch 2/20, Train Loss: 63.8997, Test Loss: 62.1400, Accuracy: 0.7674
Epoch 3/20, Train Loss: 63.9003, Test Loss: 62.1400, Accuracy: 0.7674
Epoch 4/20, Train Loss: 63.9036, Test Loss: 62.1400, Accuracy: 0.7674
Epoch 5/20, Train Loss: 63.9018, Test Loss: 62.1400, Accuracy: 0.7674
Epoch 6/20, Train Loss: 63.9060, Test Loss: 62.1400, Accuracy: 0.7674
Epoch 7/20, Train Loss: 63.9051, Test Loss: 62.1400, Accuracy: 0.7674
Epoch 8/20, Train Loss: 63.9023, Test Loss: 62.1400, Accuracy: 0.7674
Epoch 9/20, Train Loss: 63.9023, Test Loss: 62.1400, Accuracy: 0.7674
Epoch 10/20, Train Loss: 63.9094, Test Loss: 62.1400, Accuracy: 0.7674
Epoch 11/20, Train Loss: 63.8969, Test Loss: 62.1400, Accuracy: 0.7674
Epoch 12/20, Train Loss: 63.8961, Test Loss: 62.1400, Accuracy: 0.7674
Epoch 13/20, Train Loss: 63.9007, Test Loss: 62.1400, Accuracy: 0.7674
Epoch 14/20, Train Loss: 63.9051, Test Loss: 62.1400, Ac

Epoch 15/20, Train Loss: 63.9868, Test Loss: 62.2374, Accuracy: 0.2339
Epoch 16/20, Train Loss: 63.9874, Test Loss: 62.2374, Accuracy: 0.2339
Epoch 17/20, Train Loss: 63.9840, Test Loss: 62.2374, Accuracy: 0.2339
Epoch 18/20, Train Loss: 63.9875, Test Loss: 62.2374, Accuracy: 0.2339
Epoch 19/20, Train Loss: 63.9876, Test Loss: 62.2374, Accuracy: 0.2339
Epoch 20/20, Train Loss: 63.9851, Test Loss: 62.2374, Accuracy: 0.2339

Training with Adadelta optimizer:
Epoch 1/20, Train Loss: 63.9526, Test Loss: 62.2095, Accuracy: 0.7656
Epoch 2/20, Train Loss: 63.9570, Test Loss: 62.2095, Accuracy: 0.7656
Epoch 3/20, Train Loss: 63.9537, Test Loss: 62.2095, Accuracy: 0.7656
Epoch 4/20, Train Loss: 63.9559, Test Loss: 62.2095, Accuracy: 0.7656
Epoch 5/20, Train Loss: 63.9505, Test Loss: 62.2095, Accuracy: 0.7656
Epoch 6/20, Train Loss: 63.9505, Test Loss: 62.2095, Accuracy: 0.7656
Epoch 7/20, Train Loss: 63.9526, Test Loss: 62.2095, Accuracy: 0.7656
Epoch 8/20, Train Loss: 63.9548, Test Loss: 62.20