In [11]:
"Assignment 2 - MLP for classification problem"

import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,accuracy_score,classification_report
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import pandas

In [12]:
"Import datasets for both problems"

#binary classification dataset
data_b = datasets.fetch_openml("diabetes",version=1, as_frame=True)

#regression dataset
data_r = datasets.load_diabetes(as_frame=True)

#user input to select the dataset/task
task = input("Choose binary (b) or regression (r) dataset: ")

if task == "b":
    data = data_b
    y = data.target.map({'tested_negative': 0, 'tested_positive': 1}).values #convert the text data to binary
elif task == "r":
    data = data_r
    y = data.target.values
else:
    sys.exit()

X = data.data.values

#verifying the data
print(X.shape)
#print(X)
#print(y.shape)
#print(y)

(768, 8)


In [13]:
"Train test spliting"
#choosing the percentage of data used for testing 
#and spliting the train and test data into different variables

test_size=0.2
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=test_size, random_state=42)

In [14]:
"Standardizing the features"
#each feature will have mean=0 and std=1
#improves numerical stability and distance-based methods

scaler=StandardScaler()
Xtr = scaler.fit_transform(Xtr)
Xte = scaler.transform(Xte)

In [15]:
class MLP(nn.Module):
    def __init__(self, input_size, output_size=1, dropout_prob=0.5):
        super(MLP, self).__init__()
        
        self.fc1 = nn.Linear(input_size, 64)   #input_size=10 (regression) input_size=8 (classification)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 64)
        self.out = nn.Linear(64, output_size)
        
        self.dropout = nn.Dropout(p=dropout_prob)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        
        x = F.relu(self.fc4(x))
        x = self.dropout(x)
        
        x = self.out(x)
        return x

This cell defines a Multilayer Perceptron (MLP) neural network. The network consists of four hidden layers with 64 neurons each, using ReLU activation functions and dropout for regularization. The final layer outputs a single value.

Inputs for this class:

input_size – Number of input features

output_size – Number of output neurons (default is 1 for regression)

dropout_prob – Probability of dropping a neuron during training for regularization

forward(x) – Defines the forward pass: input x is passed through hidden layers with ReLU activations and dropout, and finally through the output layer to produce predictions.


In [16]:
num_epochs=250
lr=0.002
dropout=0.1
batch_size=64

num_epochs – Number of training epochs, how many times the network will see the entire dataset.

lr – Learning rate for gradient descent; should be slow because the network has many parameters to deal with.

dropout – each neuron has this probability of being temporarily "turned off" in a forward pass, which forces the network to not rely too much on any single neuron and helps prevent overfitting; usually up to 0.3 is recommended, never above 0.5 to avoid underfitting.

batch_size – Number of samples processed in one forward/backward pass.

In [17]:
#convert datasets to PyTorch tensors
Xtr = torch.tensor(Xtr, dtype=torch.float32)
ytr = torch.tensor(ytr, dtype=torch.float32)
Xte = torch.tensor(Xte, dtype=torch.float32)
yte = torch.tensor(yte, dtype=torch.float32)

# Wrap Xtr and ytr into a dataset
train_dataset = TensorDataset(Xtr, ytr)

# Create DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [18]:
# Model, Loss, Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = MLP(input_size=Xtr.shape[1], dropout_prob=dropout).to(device)

if task == "b":
    criterion = nn.BCEWithLogitsLoss() #classification
elif task == "r":
    criterion = nn.MSELoss() #regression
else:
    sys.exit()

optimizer = optim.Adam(model.parameters(), lr=lr)

model – Creates an MLP instance with the given input size and dropout probability, and moves it to the specified device (CPU or GPU).

criterion – Defines the loss function used to measure prediction error; BCEWithLogitsLoss is for binary classification, MSELoss is for regression.

optimizer – Sets up the Adam optimizer to update the model's parameters during training using the specified learning rate.


In [19]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0

    for batch_x, batch_y in train_dataloader:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        logits = model(batch_x)
        loss = criterion(logits, batch_y.view(-1, 1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

Epoch [1/250], Loss: 0.6715
Epoch [2/250], Loss: 0.6038
Epoch [3/250], Loss: 0.5131
Epoch [4/250], Loss: 0.4715
Epoch [5/250], Loss: 0.4845
Epoch [6/250], Loss: 0.4725
Epoch [7/250], Loss: 0.4591
Epoch [8/250], Loss: 0.4494
Epoch [9/250], Loss: 0.4326
Epoch [10/250], Loss: 0.4555
Epoch [11/250], Loss: 0.4163
Epoch [12/250], Loss: 0.4266
Epoch [13/250], Loss: 0.4237
Epoch [14/250], Loss: 0.4287
Epoch [15/250], Loss: 0.4147
Epoch [16/250], Loss: 0.4113
Epoch [17/250], Loss: 0.3990
Epoch [18/250], Loss: 0.4089
Epoch [19/250], Loss: 0.4020
Epoch [20/250], Loss: 0.3936
Epoch [21/250], Loss: 0.4172
Epoch [22/250], Loss: 0.3912
Epoch [23/250], Loss: 0.3950
Epoch [24/250], Loss: 0.3870
Epoch [25/250], Loss: 0.3745
Epoch [26/250], Loss: 0.3742
Epoch [27/250], Loss: 0.3835
Epoch [28/250], Loss: 0.3759
Epoch [29/250], Loss: 0.3550
Epoch [30/250], Loss: 0.3670
Epoch [31/250], Loss: 0.3638
Epoch [32/250], Loss: 0.3526
Epoch [33/250], Loss: 0.3369
Epoch [34/250], Loss: 0.3351
Epoch [35/250], Loss: 0

This cell defines the training loop for the MLP model.

model.train() – Sets the model to training mode (enables dropout and other training behaviors).

batch_x = batch_x.to(device), batch_y = batch_y.to(device) – Moves the batch to the specified device (CPU or GPU).

logits = model(batch_x) – Performs a forward pass through the network to get predictions.

loss = criterion(logits, batch_y.view(-1, 1)) – Computes the loss between predictions and targets.

optimizer.zero_grad() – Clears previous gradients before backpropagation.

loss.backward() – Computes gradients of the loss with respect to model parameters.

optimizer.step() – Updates model parameters using the optimizer.

epoch_loss += loss.item() – Adds the batch loss to the epoch's cumulative loss.

avg_loss = epoch_loss / len(train_dataloader) – Computes the average loss for the epoch.

In [20]:
y_pred=model(Xte)

"Performance metrics: Accuracy for classification problem and MSE for regression"

if task == "b":
    print(f'ACC:{accuracy_score(yte.detach().numpy(),y_pred.detach().numpy()>0.5)}') #classification
elif task == "r":
    print(f'MSE:{mean_squared_error(yte.detach().numpy(),y_pred.detach().numpy())}') #regression
else:
    sys.exit()

ACC:0.7012987012987013
