In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

import configparser
config = configparser.ConfigParser()
config.read('config.ini')

import torch
from torch import nn

print(torch.__version__)
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")

import wandb
wandb.login()


1.12.1
Using mps device


True

## Data loading and train-test split

First, we load cleaned data and created two datasets - labels (y) and predictors (X) - which were further split into train (80%) and test (20%) sets. To ensure the split will always be the same, we also set the random state seed.

In [2]:
df = pd.read_csv('Data/cleaned.csv', index_col=0)
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,0.352941,0.670968,0.489796,0.304348,0.186899,0.314928,0.234415,0.483333,1
1,0.058824,0.264516,0.428571,0.239130,0.106370,0.171779,0.116567,0.166667,0
2,0.470588,0.896774,0.408163,0.271739,0.186899,0.104294,0.253629,0.183333,1
3,0.058824,0.290323,0.428571,0.173913,0.096154,0.202454,0.038002,0.000000,0
4,0.000000,0.600000,0.163265,0.304348,0.185096,0.509202,0.943638,0.200000,1
...,...,...,...,...,...,...,...,...,...
763,0.588235,0.367742,0.530612,0.445652,0.199519,0.300613,0.039710,0.700000,0
764,0.117647,0.503226,0.469388,0.217391,0.106370,0.380368,0.111870,0.100000,0
765,0.294118,0.496774,0.489796,0.173913,0.117788,0.163599,0.071307,0.150000,0
766,0.058824,0.529032,0.367347,0.271739,0.186899,0.243354,0.115713,0.433333,1


In [3]:
X, y = df.drop('Outcome', axis=1).values, df.Outcome.values

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, shuffle=True, random_state=42)

X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.FloatTensor(y_train).unsqueeze(1)
y_test = torch.FloatTensor(y_test).unsqueeze(1)

Our neural network consists of input layer, one hidden and an output layer. All layers except output have ReLU activation function and the output layer has Sigmoid activation function, because we need the output to be in interval <0, 1>. Because we are doing binary classification, we used binary cross entropy loss (BCELoss) and as an optimizer we used Adam.

Additionally, learning rate scheduler has been used to make the learning rate variable - higher at the beginning of the training and getting gradually smaller as we are nearing the minimum.

In [21]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_features=8, hidden1=8, hidden2=4, out_features=1):
        super().__init__()
        self.f_connected1 = nn.Linear(input_features, hidden1)
        self.f_connected2 = nn.Linear(hidden1, hidden2)
        self.out = nn.Linear(hidden2, out_features)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self,x):
        x = self.relu(self.f_connected1(x))
        x = self.relu(self.f_connected2(x))
        x = self.sigmoid(self.out(x))
        
        return x


model = NeuralNetwork()
print(model)

loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config['torch'].getfloat('start_lr'))

scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, 
                                                max_lr=config['torch'].getfloat('max_lr'),
                                                #base_lr=config['torch'].getfloat('start_lr'),
                                                epochs=config['default'].getint('epochs'),
                                                steps_per_epoch=1,
                                                anneal_strategy=config['torch']['strategy'],
                                                cycle_momentum=False)

run = wandb.init(project="basic-nn-torch", id="onecycle-cos-1")
wandb.config.update(config)
wandb.watch(model)

NeuralNetwork(
  (f_connected1): Linear(in_features=8, out_features=8, bias=True)
  (f_connected2): Linear(in_features=8, out_features=4, bias=True)
  (out): Linear(in_features=4, out_features=1, bias=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
)


0,1
f1_macro,▁▁▁▁▁▁▁▁▅▇▇▇▇▇▇▇████████████████████████
f1_none/0,▁▁▁▁▁▁▁▁▄▄▄▄▃▄▄▆▆▆▇▇▇███████████████████
f1_none/1,▁▁▁▁▁▁▁▁▆▇▇▇▇▇▇█████████████████████████
learning_rate,▁▁▂▂▃▄▅▆▇▇███████▇▇▇▇▆▆▅▅▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁
train_accuracy,▁▁▁▁▁▁▁▁▃▅▅▅▅▅▆▆▆▇▇█████████████████████
training_loss,██████▇▇▆▄▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▅▆▅▅▅▅▆▇▇▇██████████████████████
validation_loss,█████▇▇▆▅▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
f1_macro,0.83405
f1_none/0,0.875
f1_none/1,0.7931
learning_rate,0.0
train_accuracy,0.88925
training_loss,0.28607
val_accuracy,0.84416
validation_loss,0.42198


[]

During training is calculated loss and accuracy and the learning rate is adjusted by the scheduler. During testing is calculated the testing loss and accuracy, as well as F1-score, and all metrics are logged to wandb.

In [22]:
train_losses, val_losses = [], []

for i in range(config['default'].getint('epochs')):
    i += 1
    y_pred = model.forward(X_train)
    train_loss = loss_fn(y_pred, y_train)
    train_losses.append(train_loss)

    y_pred = (y_pred > 0.5).int()
    train_accuracy = accuracy_score(y_train.squeeze(1).int(), y_pred)

    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    wandb.log({'learning_rate': optimizer.param_groups[0]['lr']})
    scheduler.step()
    
    with torch.no_grad():
        y_pred = model(X_test)
        val_loss = loss_fn(y_pred, y_test)
        val_losses.append(val_loss)

        y_pred = (y_pred > 0.5).int()
        wandb.log({'training_loss': train_loss, 'validation_loss': val_loss}, commit=False)

        f1_none = f1_score(y_test.squeeze(1).int(), y_pred, average=None)
        f1_none = {'f1_none/' + str(e): v for e,v in enumerate(f1_none)}
        wandb.log(f1_none, commit=False)

        f1_macro = f1_score(y_test.squeeze(1).int(), y_pred, average='macro')
        wandb.log({'f1_macro': f1_macro}, commit=False)
        
        val_accuracy = accuracy_score(y_test.squeeze(1).int(), y_pred)
        wandb.log({'train_accuracy': train_accuracy, 'val_accuracy': val_accuracy})

    if i % 10 == 0:
        print(f'Epoch {i}')
        print('-' * 40)
        print(f'Training loss: {train_loss}, validation loss: {val_loss}', end='\n\n')

Epoch 10
----------------------------------------
Training loss: 0.7008240222930908, validation loss: 0.6999808549880981

Epoch 20
----------------------------------------
Training loss: 0.6997541785240173, validation loss: 0.6990656852722168

Epoch 30
----------------------------------------
Training loss: 0.698097825050354, validation loss: 0.6974795460700989

Epoch 40
----------------------------------------
Training loss: 0.6955265998840332, validation loss: 0.6948810815811157

Epoch 50
----------------------------------------
Training loss: 0.6917133927345276, validation loss: 0.6909738779067993

Epoch 60
----------------------------------------
Training loss: 0.686138391494751, validation loss: 0.6852716207504272

Epoch 70
----------------------------------------
Training loss: 0.6774418354034424, validation loss: 0.6754773855209351

Epoch 80
----------------------------------------
Training loss: 0.6470667719841003, validation loss: 0.6417847871780396

Epoch 90
-----------------

In [24]:
torch.save(model.state_dict(), "models/model.pth")
wandb.save('runs/pima_run_2023-03-22')
wandb.finish()

0,1
f1_macro,▁▁▁▁▄▃▃▄▇▇▇▇▇▇▇▇▇███████████████████████
f1_none/0,▁▁▁▁▇▇▇▇████▇▇██████████████████████████
f1_none/1,▅▅▅▅▂▁▁▃▇▇▇▇▇▇▇▇▇███████████████████████
learning_rate,▁▁▂▂▃▄▅▆▇▇███████▇▇▇▇▆▆▅▅▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁
train_accuracy,▁▁▁▁▅▅▅▅▆▇▇▇▇▇▇▇▇▇▇█████████████████████
training_loss,██████▇▆▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▅▅▅▅▇▇▇▇▇▇▇▇▇█▇█████████████████████
validation_loss,██████▇▅▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
f1_macro,0.83405
f1_none/0,0.875
f1_none/1,0.7931
learning_rate,0.0
train_accuracy,0.88436
training_loss,0.29379
val_accuracy,0.84416
validation_loss,0.42358


At the end of training, we can calculate the final testing accuracy.

In [25]:
predictions=[]

with torch.no_grad():
    for i, data in enumerate(X_test):
        y_pred = model(data)
        predictions.append((y_pred > 0.5).int().item())

score = accuracy_score(y_test, predictions)
score

0.8441558441558441