In [None]:
def evaluate_pytorch_model(model, dataloaders, device):
    model.eval()  # Set the model to evaluation mode
    performance = {}
    with torch.no_grad():  # No need to track gradients
            if phase not in dataloaders:  # Skip if DataLoader is not provided for the phase
                continue
            correct, total = 0, 0
            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            accuracy = correct / total
            performance[phase + '_accuracy'] = accuracy
            print(f'Accuracy of the {model.__class__.__name__} model on the {phase} set: {accuracy:.2f}')
    # return performance

def evaluate_sklearn_model(model, X_train, Y_train, X_test, Y_test):
    performance = {}
    for phase, X, Y in [('train', X_train, Y_train), ('test', X_test, Y_test)]:
        predicted = model.predict(X)
        accuracy = np.mean(predicted == Y)
        performance[phase + '_accuracy'] = accuracy
        print(f'Accuracy of the {model.__class__.__name__} model on the {phase} set: {accuracy:.2f}')
    return performance



In [ ]:
def map_predictions_to_genres(predictions, label_encoder):
    genre_predictions = label_encoder.inverse_transform(predictions)
    return genre_predictions

def make_predictions_pytorch(model, X_test_tensor, device):
    model.eval()  # Set the model to evaluation mode
    X_test_tensor = X_test_tensor.to(device)
    with torch.no_grad():  # No need to track the gradients
        outputs = model(X_test_tensor)
        _, predictions = torch.max(outputs, 1)
    # Convert predictions to CPU and numpy for easier handling
    predictions = predictions.cpu().numpy()
    return predictions

def make_predictions_sklearn(model, X_test_scaled):
    predictions = model.predict(X_test_scaled)
    return predictions


In [ ]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

In [ ]:
class CustomNN(nn.Module):
    def __init__(self, input_size, num_classes, num_layers=2, hidden_size=100):
        super(CustomNN, self).__init__()
        self.layers = nn.ModuleList()
        
        for i in range(num_layers):
            if i == 0:
                self.layers.append(nn.Linear(input_size, hidden_size))
            else:
                self.layers.append(nn.Linear(hidden_size, hidden_size))
            self.layers.append(nn.ReLU())

        self.output_layer = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = self.output_layer(x)
        return x


In [ ]:
def train_and_validate(model, train_loader, val_loader, device, optimizer, criterion, epochs=10):
    model.train()  # Set the model to training mode
    for epoch in range(epochs):
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
    # Validation phase
    model.eval()  # Set the model to evaluation mode
    total, correct = 0, 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    val_accuracy = correct / total
    return val_accuracy


In [ ]:
import optuna
all_models = []
def objective(trial):
    # Hyperparameters to tune
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
    num_layers = trial.suggest_int('num_layers', 1, 3)
    hidden_size = trial.suggest_int('hidden_size', 50, 500)
    
    # Model initialization
    model = CustomNN(input_size=X_train.shape[1], num_classes=len(np.unique(Y_train)),
                     num_layers=num_layers, hidden_size=hidden_size).to(device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # Train and validate
    val_accuracy = train_and_validate(model, train_loader, val_loader, device, optimizer, criterion, epochs=10)
    
    # Save the model and trial information
    all_models.append({'trial_id': trial.number, 'model': model, 'val_accuracy': val_accuracy})
    
    return val_accuracy


In [ ]:
study = optuna.create_study(direction='maximize', study_name='MLP')
study.optimize(objective, n_trials=50)  # Adjust the number of trials as needed

print("Best trial:")
trial = study.best_trial

print(f"Value: {trial.value}")
print("Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


In [ ]:
# Find the best model based on validation accuracy
best_model_info = max(all_models, key=lambda x: x['val_accuracy'])
best_model = best_model_info['model']

# Test the best model
evaluate_pytorch_model(best_model, dataloaders={'train': train_loader, 'val': val_loader, 'test': test_loader}, device=device)

## Try more complex type of MLP that includes batch norm and dropout layers, weight decay

In [ ]:
class AdvancedCustomNN(nn.Module):
    def __init__(self, input_size, num_classes, num_layers=2, hidden_size=100, dropout_rate=0.0, use_batch_norm=False):
        super(AdvancedCustomNN, self).__init__()
        layers = []
        
        for i in range(num_layers):
            if i == 0:
                layers.append(nn.Linear(input_size, hidden_size))
            else:
                layers.append(nn.Linear(hidden_size, hidden_size))
            if use_batch_norm:
                layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.ReLU())
            if dropout_rate > 0:
                layers.append(nn.Dropout(dropout_rate))
                
        layers.append(nn.Linear(hidden_size, num_classes))
        self.layers = nn.Sequential(*layers)
        
    def forward(self, x):
        return self.layers(x)


In [ ]:
all_advanced_mlps = []
def advanced_objective(trial):
    lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
    num_layers = trial.suggest_int('num_layers', 2, 8)
    hidden_size = trial.suggest_int('hidden_size', 50, 600)
    dropout_rate = trial.suggest_float('dropout_rate', 0.0, 0.5)
    use_batch_norm = trial.suggest_categorical('use_batch_norm', [True, False])
    weight_decay = trial.suggest_float('weight_decay', 1e-5, 1e-1, log=True)
    
    model = AdvancedCustomNN(input_size=X_train.shape[1], num_classes=len(np.unique(Y_train)),
                             num_layers=num_layers, hidden_size=hidden_size,
                             dropout_rate=dropout_rate, use_batch_norm=use_batch_norm).to(device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.CrossEntropyLoss()

    val_accuracy = train_and_validate(model, train_loader, val_loader, device, optimizer, criterion, epochs=10)
    # Save the model and trial information
    all_advanced_mlps.append({'trial_id': trial.number, 'model': model, 'val_accuracy': val_accuracy})
    
    return val_accuracy


In [ ]:
study = optuna.create_study(direction='maximize', study_name="MLP Advanced")
study.optimize(advanced_objective, n_trials=100)  # Adjust n_trials based on computational resources

print("Best trial:")
trial = study.best_trial
print(f"Value: {trial.value}")
print("Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

In [ ]:
# Find the best model based on validation accuracy
best_adv_mlp_info = max(all_advanced_mlps, key=lambda x: x['val_accuracy'])
best_adv_mlp = best_adv_mlp_info['model']

evaluate_pytorch_model(best_adv_mlp, dataloaders={'train': train_loader, 'val': val_loader, 'test': test_loader}, device=device)

## Manually experiment with MLP Architectures

In [ ]:
import time
import matplotlib.pyplot as plt
import torch.nn.functional as F
def train_and_validate_with_plot(model, train_loader, val_loader, device, optimizer, criterion, epochs=10):
    # Lists to keep track of losses and timing
    train_losses = []
    val_losses = []
    start_time = time.time()
    
    for epoch in range(epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        # Calculate and store the average training loss
        train_losses.append(running_loss / len(train_loader))
        running_loss = 0.0  # Reset running loss for validation
        
        # Validation phase
        model.eval()  # Set the model to evaluation mode
        total, correct = 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)  # Corrected to use labels
                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            # Calculate and store the average validation loss
            val_losses.append(running_loss / len(val_loader))

    end_time = time.time()
    elapsed_time = (end_time - start_time) / 60  # Time in minutes
    
    # Plotting the training and validation loss
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()
    
    val_accuracy = correct / total
    print(f"Training completed in: {elapsed_time:.2f} minutes")
    return val_accuracy


In [ ]:
class ManualNN(nn.Module):
    def __init__(self, input_size=518, num_classes=8, dropout_rate=0.3, hidden_units=[256, 128, 64, 32]):
        super(ManualNN, self).__init__()

        # Define the layers
        self.dropout = nn.Dropout(p=dropout_rate)
        self.bn_layers = nn.ModuleList([nn.BatchNorm1d(hidden_units[i]) for i in range(len(hidden_units))])

        self.input_layer = nn.Linear(input_size, hidden_units[0])
        self.hidden_layers = nn.ModuleList([nn.Linear(hidden_units[i], hidden_units[i+1]) for i in range(len(hidden_units)-1)])
        self.output_layer = nn.Linear(hidden_units[-1], num_classes)

    def forward(self, x):
        # Forward pass through each layer
        x = self.bn_layers[0](self.input_layer(x))
        x = F.relu(x)
        for i, layer in enumerate(self.hidden_layers):
            x = self.bn_layers[i+1](layer(x))
            x = F.relu(x)
            x = self.dropout(x)
        x = self.output_layer(x)
        return x

In [ ]:
# Instantiate
NN = ManualNN(hidden_units=[2056] * 4 + [1028] * 4 + [512]*4 + [256]*4 + [128]*4 + [64]*4 + [32,16], dropout_rate=0.4).to(device)
# Define optimizer and loss function
# optimizer = torch.optim.Adam(NN.parameters(), lr=0.001)
optimizer = torch.optim.SGD(NN.parameters(), lr=0.01, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Train and validate the model
epochs = 400
val_accuracy = train_and_validate_with_plot(NN, train_loader, val_loader, device, optimizer, criterion, epochs)
evaluate_pytorch_model(NN, dataloaders={'train': train_loader, 'val': val_loader, 'test': test_loader}, device=device)

In [ ]:
def train_and_validate_with_plot_es(model, train_loader, val_loader, device, optimizer, criterion, epochs=10, patience=5):
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    epochs_no_improve = 0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        train_losses.append(running_loss / len(train_loader))

        model.eval()
        running_loss = 0.0
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
        average_val_loss = running_loss / len(val_loader)
        val_losses.append(average_val_loss)

        # Early Stopping
        if average_val_loss < best_val_loss:
            best_val_loss = average_val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
        if epochs_no_improve == patience:
            print('Early stopping!')
            break

    # Plot losses
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()
    
    return model, train_losses, val_losses


In [ ]:
# Try varying batch sizes:
bs = 64
train_loader_vbs = DataLoader(train_dataset, batch_size=bs, shuffle=True)
val_loader_vbs = DataLoader(val_dataset, batch_size=bs, shuffle=False)
test_loader_vbs = DataLoader(test_dataset, batch_size=bs, shuffle=False)
# Instantiate #10360 works fairly well
NN = ManualNN(hidden_units=[268324] + [33540]*0 + [256]*0 + [128] + [64]*1 + [32]* 1 + [16], dropout_rate=0.5).to(device)
# Define optimizer and loss function
# optimizer = torch.optim.Adam(NN.parameters(), lr=0.001)
optimizer = torch.optim.SGD(NN.parameters(), lr=0.01, momentum=0.9)
criterion = nn.MSE()

# Train and evaluate the model
epochs = 500
patience = 50
val_acc = train_and_validate_with_plot_es(NN, train_loader_vbs, val_loader_vbs, device, optimizer, criterion, epochs, patience)
evaluate_pytorch_model(NN, dataloaders={'train': train_loader, 'val': val_loader, 'test': test_loader}, device=device)

In [ ]:
print([518]  + [10360] + [5,180]*0 + [512] + [128] + [64]*1 + [16])

In [ ]:
print([518*1000] + [5,180]*0 + [256]*0 + [128]*0 + [64]*0 + [32]*1 + [16]*0)