In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import pandas as pd
X_train = pd.read_csv('/content/drive/My Drive/SFSU/CSC671/project/features_train_scaled.csv')
X_test = pd.read_csv('/content/drive/My Drive/SFSU/CSC671/project/features_test_scaled.csv')
y_train = pd.read_csv('/content/drive/My Drive/SFSU/CSC671/project/target_train.csv')
y_test = pd.read_csv('/content/drive/My Drive/SFSU/CSC671/project/target_test.csv')

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, f1_score, classification_report

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

class MLP(nn.Module):
    def __init__(self, input_dim, hidden1=128, hidden2=64, hidden3=None, dropout=0.3):
        super(MLP, self).__init__()

        layers = [
            nn.Linear(input_dim, hidden1),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden1, hidden2),
            nn.ReLU(),
            nn.Dropout(dropout),
        ]

        if hidden3 is not None:
            layers.append(nn.Linear(hidden2, hidden3))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            layers.append(nn.Linear(hidden3, 1))
        else:
            layers.append(nn.Linear(hidden2, 1))

        layers.append(nn.Sigmoid())

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)


def train_and_evaluate(config):

    # only some configs have hidden3
    if 'hidden3' in config:
        model = MLP(
            input_dim=X_train.shape[1],
            hidden1=config['hidden1'],
            hidden2=config['hidden2'],
            hidden3=config['hidden3'],
            dropout=config['dropout']
        )
    else:
        model = MLP(
            input_dim=X_train.shape[1],
            hidden1=config['hidden1'],
            hidden2=config['hidden2'],
            dropout=config['dropout']
        )

    criterion = nn.BCELoss()

    # optimizers
    optimizer_type = config.get('optimizer', 'adam').lower()
    lr = config.get('lr', 0.001)
    weight_decay = config.get('weight_decay', 0.0)

    if optimizer_type == 'sgd':
        momentum = config.get('momentum', 0.9)
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
    elif optimizer_type == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay)
    else:
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # train
    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
    model.train()
    for epoch in range(config['epochs']):
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

    # evaluate
    model.eval()
    with torch.no_grad():
        predictions = model(X_test_tensor)
        predicted_labels = (predictions > 0.5).float()
        acc = accuracy_score(y_test_tensor.numpy(), predicted_labels.numpy())
        f1 = f1_score(y_test_tensor.numpy(), predicted_labels.numpy())

    print(f"\nConfig: {config}")
    print(f"Accuracy: {acc:.4f}, F1 Score: {f1:.4f}")
    print(classification_report(y_test_tensor.numpy(), predicted_labels.numpy()))
    return acc, f1


configs = [
    #{"hidden1": 128, "hidden2": 64, "dropout": 0.3, "lr": 0.001, "batch_size": 32, "epochs": 20},
    #{"hidden1": 256, "hidden2": 128, "dropout": 0.3, "lr": 0.001, "batch_size": 32, "epochs": 20},
    #{"hidden1": 128, "hidden2": 64, "dropout": 0.5, "lr": 0.001, "batch_size": 32, "epochs": 20},
    #{"hidden1": 128, "hidden2": 64, "dropout": 0.3, "lr": 0.0005, "batch_size": 32, "epochs": 20},

    # deeper or wider
    {"hidden1": 256, "hidden2": 128, "hidden3": 64, "dropout": 0.4, "lr": 0.001, "batch_size": 64, "epochs": 30},
    {"hidden1": 512, "hidden2": 256, "dropout": 0.3, "lr": 0.0005, "batch_size": 64, "epochs": 50},

    # sgd / rmsprop
    {"hidden1": 128, "hidden2": 64, "dropout": 0.3, "lr": 0.001, "batch_size": 32, "epochs": 20, "optimizer": "sgd", "momentum": 0.9},
    {"hidden1": 128, "hidden2": 64, "dropout": 0.3, "lr": 0.001, "batch_size": 32, "epochs": 20, "optimizer": "rmsprop"},

    # weight decay
    {"hidden1": 128, "hidden2": 64, "dropout": 0.3, "lr": 0.001, "batch_size": 32, "epochs": 20, "weight_decay": 0.0001}
]

for cfg in configs:
    train_and_evaluate(cfg)



Config: {'hidden1': 256, 'hidden2': 128, 'hidden3': 64, 'dropout': 0.4, 'lr': 0.001, 'batch_size': 64, 'epochs': 30}
Accuracy: 0.7166, F1 Score: 0.7271
              precision    recall  f1-score   support

         0.0       0.72      0.69      0.71       970
         1.0       0.71      0.74      0.73      1006

    accuracy                           0.72      1976
   macro avg       0.72      0.72      0.72      1976
weighted avg       0.72      0.72      0.72      1976


Config: {'hidden1': 512, 'hidden2': 256, 'dropout': 0.3, 'lr': 0.0005, 'batch_size': 64, 'epochs': 50}
Accuracy: 0.6943, F1 Score: 0.6912
              precision    recall  f1-score   support

         0.0       0.68      0.72      0.70       970
         1.0       0.71      0.67      0.69      1006

    accuracy                           0.69      1976
   macro avg       0.69      0.69      0.69      1976
weighted avg       0.70      0.69      0.69      1976


Config: {'hidden1': 128, 'hidden2': 64, 'dropout': 0.