In [6]:
# import necessary libraries
import torch 
import torch.nn as nn # import neural network module
import torch.optim as optim # import optimizer 
from sklearn.datasets import load_breast_cancer # use premade dataset to practice training and finetuning model and hyperparameters
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

In [7]:
# load the breast cancer dataset from sklearn's built-in datasets
# this dataset is a numerical dataset, therefore processing the units related to this set will take far less time than datasets that are primarily images
data = load_breast_cancer()
X = data.data # all features
y = data.target # contains targets, benign vs malignant

In [8]:
# split the dataset into training and validation sets (80% training, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
# training data in x train and corresponding labels in y train works similarly for x val and y val
# total will be out train and test split 

In [9]:
# standardize the data to have a mean of 0 and a standard deviation of 1
# for linear models standardized data is expected therefore plotting and standardizing is required
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train) # fit on training data then transform, test data should be scaled based on standardized training data
X_val = scaler.transform(X_val)

In [10]:
# convert the numpy arrays to PyTorch tensors and move them to the appropriate device 
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.float32).to(device)
# note: tensor is a three dimensional matrix which is a two dim row/col

In [11]:
# define a neural network class with additional layers, batch normalization, and dropout - FNN setup given non spatial dataset - numerical setup
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)  # first fully connected layer
        self.bn1 = nn.BatchNorm1d(hidden_size)  # batch normalization layer
        self.relu1 = nn.ReLU()  # ReLU activation function
        self.dropout1 = nn.Dropout(0.5)  # dropout layer to prevent overfitting
        
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)  # second fully connected layer
        self.bn2 = nn.BatchNorm1d(hidden_size // 2)  # batch normalization layer
        self.relu2 = nn.ReLU()  # ReLU activation function
        self.dropout2 = nn.Dropout(0.5)  # dropout layer to prevent overfitting
        
        self.fc3 = nn.Linear(hidden_size // 2, output_size)  # Output layer
        self.sigmoid = nn.Sigmoid()  # sigmoid activation function for binary classification

    def forward(self, x):
        out = self.fc1(x)
        out = self.bn1(out)
        out = self.relu1(out)
        out = self.dropout1(out)
        
        out = self.fc2(out)
        out = self.bn2(out)
        out = self.relu2(out)
        out = self.dropout2(out)
        
        out = self.fc3(out)
        out = self.sigmoid(out)
        return out

In [12]:
# define hyperparameters for the neural network
input_size = X_train.shape[1]  # number of input features
hidden_size = 128  # number of neurons in the first hidden layer
output_size = 1  # output size for binary classification
learning_rate = 0.001  # learning rate for the optimizer
num_epochs = 100  # number of epochs to train the model
patience = 10  # patience for early stopping

In [13]:
# initialize the neural network and move it to the appropriate device
model = NeuralNet(input_size, hidden_size, output_size).to(device)

In [14]:
# define the loss function and the optimizer
criterion = nn.BCELoss()  # binary cross-entropy Loss for binary classification
optimizer = optim.Adam(model.parameters(), lr=learning_rate)  # adam optimizer
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)  # learning rate scheduler

In [15]:
# early stopping criteria
best_loss = np.inf  # initialize the best loss as infinity
early_stop_count = 0  # counter for early stopping

In [16]:
# train the model with early stopping and learning rate scheduling
for epoch in range(num_epochs):
    model.train()  # set the model to training mode
    optimizer.zero_grad()  # zero the gradients
    outputs = model(X_train)  # forward pass
    loss = criterion(outputs, y_train.view(-1, 1))  # calculate loss
    loss.backward()  # backward pass
    optimizer.step()  # update model parameters

    # calculate training accuracy
    with torch.no_grad():
        predicted_train = outputs.round()  # round the outputs to get binary predictions
        correct_train = (predicted_train == y_train.view(-1, 1)).float().sum()  # calculate correct predictions
        train_accuracy = correct_train / y_train.size(0)  # calculate training accuracy
    
    # validation step
    model.eval()  # set the model to evaluation mode
    with torch.no_grad():
        val_outputs = model(X_val)  # forward pass
        val_loss = criterion(val_outputs, y_val.view(-1, 1))  # Calculate validation loss
        predicted_val = val_outputs.round()  # Round the outputs to get binary predictions
        correct_val = (predicted_val == y_val.view(-1, 1)).float().sum()  # Calculate correct predictions
        val_accuracy = correct_val / y_val.size(0)  # Calculate validation accuracy
    
    scheduler.step(val_loss)  # step the learning rate scheduler

    # Check for early stopping
    if val_loss < best_loss:
        best_loss = val_loss  # Update the best loss
        early_stop_count = 0  # Reset the early stop count
        torch.save(model.state_dict(), 'best_model.pth')  # Save the best model
    else:
        early_stop_count += 1  # increment the early stop count

    if early_stop_count >= patience:
        print("Early stopping")
        break

    # print training and validation metrics every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Train Accuracy: {train_accuracy.item() * 100:.2f}%, "
              f"Val Loss: {val_loss.item():.4f}, Val Accuracy: {val_accuracy.item() * 100:.2f}%")
        print(f"Current learning rate: {scheduler.get_last_lr()[0]}")

Epoch [10/100], Loss: 0.4011, Train Accuracy: 89.89%, Val Loss: 0.4238, Val Accuracy: 94.74%
Current learning rate: 0.001
Epoch [20/100], Loss: 0.2924, Train Accuracy: 93.85%, Val Loss: 0.2549, Val Accuracy: 95.61%
Current learning rate: 0.001
Epoch [30/100], Loss: 0.2301, Train Accuracy: 95.60%, Val Loss: 0.1933, Val Accuracy: 96.49%
Current learning rate: 0.001
Epoch [40/100], Loss: 0.1897, Train Accuracy: 97.14%, Val Loss: 0.1693, Val Accuracy: 96.49%
Current learning rate: 0.001
Epoch [50/100], Loss: 0.1569, Train Accuracy: 97.80%, Val Loss: 0.1537, Val Accuracy: 96.49%
Current learning rate: 0.001
Epoch [60/100], Loss: 0.1333, Train Accuracy: 98.46%, Val Loss: 0.1407, Val Accuracy: 95.61%
Current learning rate: 0.001
Epoch [70/100], Loss: 0.1089, Train Accuracy: 98.24%, Val Loss: 0.1281, Val Accuracy: 95.61%
Current learning rate: 0.001
Epoch [80/100], Loss: 0.0974, Train Accuracy: 98.46%, Val Loss: 0.1177, Val Accuracy: 95.61%
Current learning rate: 0.001
Epoch [90/100], Loss: 0.

In [17]:
# load the best model
model.load_state_dict(torch.load('best_model.pth'))

# evaluate the model on the training data
model.eval()
with torch.no_grad():
    outputs = model(X_train)
    predicted = outputs.round()
    correct = (predicted == y_train.view(-1, 1)).float().sum()
    accuracy = correct / y_train.size(0)
    print(f"Accuracy on training data: {accuracy.item() * 100:.2f}%")

# evaluate the model on the test data
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    predicted = outputs.round()
    correct = (predicted == y_val.view(-1, 1)).float().sum()
    accuracy = correct / y_val.size(0)
    print(f"Accuracy on test data: {accuracy.item() * 100:.2f}%")

Accuracy on training data: 99.12%
Accuracy on test data: 95.61%
