In [165]:
import numpy as np
import pandas as pd
import torch
from torch import nn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from pathlib import Path

data = pd.read_csv("/home/kadenw/Workspace/lab/data/wisnconsin_breast_cancer/wdbc.data") # loads data
data.iloc[:, 1] = data.iloc[:, 1].map({'M' : 1, 'B' : 0}) # replaces malignant and benign to boolean values
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42) # split into training and testing data

# loads data into pytorch tensor for training
train_tensor_data = torch.from_numpy(train_data.iloc[:,2 : 32].values)
train_tensor_data = train_tensor_data.float()
train_tensor_results = torch.from_numpy(train_data.iloc[:, 1].values.astype(float))
train_tensor_results = train_tensor_results.float()
train_tensor_results = train_tensor_results.unsqueeze(dim = 1)

test_tensor_data = torch.from_numpy(test_data.iloc[:,2 : 32].values)
test_tensor_data = test_tensor_data.float()
test_tensor_results = torch.from_numpy(test_data.iloc[:, 1].values.astype(float))
test_tensor_results = test_tensor_results.float()
test_tensor_results = test_tensor_results.unsqueeze(dim = 1)

# neural network implementation
class breast_cancer_nn(nn.Module):
    def __init__(self):
        super().__init__()
        self.double()
        self.fc1 = nn.Linear(in_features = 30,
                          out_features = 10)
        self.fc2 = nn.Linear(in_features = 10,
                          out_features = 5)
        self.output = nn.Linear(in_features = 5,
                          out_features = 1)
        self.relu = nn.ReLU()
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = torch.sigmoid(self.output(x))
        return x

breast_cancer_nn = breast_cancer_nn()
loss_bce = nn.BCELoss()  
optimizer = torch.optim.Adam(breast_cancer_nn.parameters(), lr=0.001)

In [167]:
epochs = 2000
for epoch in range(epochs):
    breast_cancer_nn.train()
    pred = breast_cancer_nn(train_tensor_data)
    loss = loss_bce(pred, train_tensor_results)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    breast_cancer_nn.eval()
    with torch.inference_mode():
        test_pred = breast_cancer_nn(test_tensor_data)
        test_loss = loss_bce(test_pred, test_tensor_results)
        if epoch % 100 == 0:
            print(f"Epoch: {epoch} | Train loss: {loss} | Test loss: {test_loss}")
            # Convert tensors to numpy arrays for metric computation
            test_pred_np = ((test_pred > 0.5).float()).numpy()
            test_labels_np = test_tensor_results.numpy()

            # Calculate metrics
            accuracy = accuracy_score(test_labels_np, test_pred_np)
            precision = precision_score(test_labels_np, test_pred_np)
            recall = recall_score(test_labels_np, test_pred_np)
            f1 = f1_score(test_labels_np, test_pred_np)

            print(f'Test Accuracy: {accuracy:.4f}')
            print(f'Test Precision: {precision:.4f}')
            print(f'Test Recall: {recall:.4f}')
            print(f'Test F1 Score: {f1:.4f}')
            if test_loss < 0.2:
                break

Epoch: 0 | Train loss: 0.5983840823173523 | Test loss: 0.5602752566337585
Test Accuracy: 0.5263
Test Precision: 0.4565
Test Recall: 0.9130
Test F1 Score: 0.6087
Epoch: 100 | Train loss: 0.33586227893829346 | Test loss: 0.31355613470077515
Test Accuracy: 0.9123
Test Precision: 0.9737
Test Recall: 0.8043
Test F1 Score: 0.8810
Epoch: 200 | Train loss: 0.252197265625 | Test loss: 0.2512303292751312
Test Accuracy: 0.9123
Test Precision: 0.9500
Test Recall: 0.8261
Test F1 Score: 0.8837
Epoch: 300 | Train loss: 0.2179548591375351 | Test loss: 0.22647219896316528
Test Accuracy: 0.9123
Test Precision: 0.9500
Test Recall: 0.8261
Test F1 Score: 0.8837
Epoch: 400 | Train loss: 0.19605572521686554 | Test loss: 0.2086145430803299
Test Accuracy: 0.9211
Test Precision: 0.9512
Test Recall: 0.8478
Test F1 Score: 0.8966
Epoch: 500 | Train loss: 0.1783408671617508 | Test loss: 0.19401642680168152
Test Accuracy: 0.9211
Test Precision: 0.9512
Test Recall: 0.8478
Test F1 Score: 0.8966


In [169]:
breast_cancer_nn.state_dict()

OrderedDict([('fc1.weight',
              tensor([[ 0.0274,  0.1347,  0.1354,  0.0499, -0.1184, -0.0371, -0.0034,  0.1289,
                        0.0496,  0.0845, -0.0568,  0.1593,  0.1096, -0.0311,  0.0845,  0.1250,
                        0.0318, -0.0635,  0.0811,  0.1668,  0.1623,  0.1160,  0.0222, -0.1637,
                        0.0850, -0.1623,  0.0117,  0.0889,  0.1621, -0.0146],
                      [ 0.3296,  0.0187,  0.0461,  0.0505,  0.0314, -0.2509, -0.4381, -0.2413,
                        0.0773,  0.2708,  0.3204,  0.0334,  0.2285, -0.0678,  0.1725, -0.3738,
                       -0.4579, -0.2591,  0.2309, -0.0687,  0.3179,  0.0241,  0.1457,  0.0347,
                       -0.1004, -0.4787, -0.5576, -0.3667,  0.0646,  0.0383],
                      [ 0.1870, -0.1557,  0.1574,  0.1087,  0.1892, -0.3348, -0.3104, -0.4469,
                        0.2519,  0.1639,  0.2451, -0.0016,  0.1715, -0.1271,  0.0015, -0.2245,
                       -0.2504, -0.0195,  0.0518,  0.155

In [171]:
torch.save(obj=breast_cancer_nn.state_dict(), # only saving the state_dict() only saves the models learned parameters
           f="models/breast_cancer_nn.pth") 

RuntimeError: Parent directory models does not exist.