In [1]:
import torch

import os

import numpy as np
import statistics

import torch.nn as nn
import torch.nn.functional as F

In [2]:
from preprocess import preprocess

The saved dataset was loaded, and a custom preprocessing function (preprocess.py) was applied to simplify the data loading process. The function was designed to take the raw data, convert it into a Dataset object, and then split it into separate DataLoader instances for train, test, and validation. The function also enables to apply additional transformations, but for this basic test it was used without any additional processing. 

### Preprocessing

In [3]:
dataset = torch.load("dataset.pth", weights_only=False)

In [4]:
weights = torch.load("weights.pth")

Resampled data was split into test, train validation and prepared with DataLoader

In [5]:
trainloader,testloader,valloader = preprocess(dataset, 128)

In [6]:
dataiter = iter(trainloader)
images, labels = next(dataiter)

In [7]:
tensor_example = images[5]

### Model

In next step the network architecture was copied from the PyTorch tutorial on training a classifier (https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#training-a-classifier) to maintain a relatively simple architecture and use it as a baseline.

The layer dimensions were adjusted by testing the input and output shapes step by step while gradually adding layers from the example network. While it’s possible to calculate the dimensions of the convolutional layers manually, I find this approach simpler and less prone to errors.

In [8]:
conv_1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=2)
pool = nn.MaxPool2d(2,2)
conv_2 = nn.Conv2d(6, 16, 4)

In [9]:
pool(conv_2(pool(conv_1(tensor_example)))).shape

torch.Size([16, 30, 30])

In [10]:
torch.flatten(pool(conv_2(pool(conv_1(tensor_example))))).shape

torch.Size([14400])

In [11]:
16*30*30

14400

In [12]:
conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=2)
pool = nn.MaxPool2d(2, 2)
conv2 = nn.Conv2d(6, 16, 4)
fc1 = nn.Linear(16*30*30, 256)
fc3 = nn.Linear(256, 64)
fc4 = nn.Linear(64, 4)

x = tensor_example
x = pool(F.relu(conv1(x)))
x = pool(F.relu(conv2(x)))
x = torch.flatten(x)
x.shape
x = F.relu(fc1(x))
x = F.relu(fc3(x))
x = fc4(x)
x.shape

torch.Size([4])

In [13]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=2)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 4)
        self.fc1 = nn.Linear(16*30*30, 256)
        self.fc3 = nn.Linear(256, 64)
        self.fc4 = nn.Linear(64, 4)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x,1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

net = Net()

After the layer dimensions were calculated, a new model class was created and used for training. The optimizer and loss function from the tutorial were adopted. 

A custom training loop was implemented with an added early stopping mechanism to break training when the loss on the test dataset no longer improves.

In [14]:
device = torch.device(os.getenv("DEVICE"))
net.to(device)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(2, 2), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(4, 4), stride=(1, 1))
  (fc1): Linear(in_features=14400, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=4, bias=True)
)

### Training

In [15]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss(weight=weights.to(device))
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

In [16]:
best_result = np.inf
test_loss_array = []
n_epochs = 15
patience = 3

for epoch in range(n_epochs):

    total_loss = 0
    for X_batch, y_batch in trainloader:

        X_batch = X_batch.to(device,dtype=torch.float32)
        y_batch = y_batch.to(device)


        optimizer.zero_grad()

        outputs = net(X_batch)

        loss = criterion(outputs.to(device), y_batch)

        loss.backward()
        optimizer.step()

        total_loss += loss.item() 

    test_loss = 0
    with torch.no_grad():  
            
            for X_batch, y_batch in testloader:

                X_batch = X_batch.to(device,dtype=torch.float32)
                y_batch = y_batch.to(device)

                outputs = net(X_batch)
                t_loss = criterion(outputs.to(device), y_batch)

                test_loss+=t_loss

    loss = total_loss / (len(trainloader))
    loss_test = test_loss / (len(testloader))

    test_loss_array.append(loss_test)

    if loss_test < best_result:
        torch.save(net.state_dict(), "./base_net.pth")

    print(f"Epoch: {epoch}, Train loss: {loss}, Test loss: {loss_test}")

    if len(test_loss_array)>patience+1:
        if not (any(x > test_loss_array[-1] for x in test_loss_array[len(test_loss_array)-patience-1:-1])):
            break

Epoch: 0, Train loss: 1.3491527549922466, Test loss: 1.2519595623016357
Epoch: 1, Train loss: 1.11840146407485, Test loss: 1.025646448135376
Epoch: 2, Train loss: 0.9153427947312593, Test loss: 0.8508386015892029
Epoch: 3, Train loss: 0.7443269863724709, Test loss: 0.7712833881378174
Epoch: 4, Train loss: 0.6505684684962034, Test loss: 0.6088495254516602
Epoch: 5, Train loss: 0.5756431622430682, Test loss: 0.5529963970184326
Epoch: 6, Train loss: 0.5227594068273902, Test loss: 0.5518519878387451
Epoch: 7, Train loss: 0.496339812874794, Test loss: 0.5747254490852356
Epoch: 8, Train loss: 0.48608595319092274, Test loss: 0.4834904372692108
Epoch: 9, Train loss: 0.4240690218284726, Test loss: 0.4513201415538788
Epoch: 10, Train loss: 0.3842144990339875, Test loss: 0.43257296085357666
Epoch: 11, Train loss: 0.34341618325561285, Test loss: 0.4053606688976288
Epoch: 12, Train loss: 0.33919435925781727, Test loss: 0.42551738023757935
Epoch: 13, Train loss: 0.2704439852386713, Test loss: 0.3617

The best model was loaded from saved path and used for prediction.

In [17]:
net.load_state_dict(torch.load("base_net.pth"))

<All keys matched successfully>

In [18]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [19]:
accuracy = []
f1_scr = []
precision = []
recall=[]

pred = []
actual = []


with torch.no_grad():
    
    for X_batch, y_batch in valloader:
        outputs_pred = net(X_batch.to(device))

        _, topi = outputs_pred.topk(1)

        topi=topi.cpu()

        acc = accuracy_score(y_batch,topi.squeeze(-1))
        f1 = f1_score(y_batch,topi.squeeze(-1), average="macro")
        pr = precision_score(y_batch,topi.squeeze(-1), average="macro")
        rec = recall_score(y_batch,topi.squeeze(-1),average="macro")

        pred += topi.squeeze(-1).tolist()
        actual += y_batch.tolist()

        accuracy.append(float(acc))
        f1_scr.append(float(f1))
        precision.append(float(pr))
        recall.append(float(rec))

### Results

The final result is quite good especially since:
- simple architecture was used with minor changes 
- the dataset is quite small

In [20]:
print(statistics.mean(accuracy))
print(statistics.mean(f1_scr))
print(statistics.mean(precision))
print(statistics.mean(recall))

0.8628615702479339
0.8556276517616439
0.8607067042189462
0.8606469326833457


In [21]:
import pandas as pd

In [32]:
results_df = pd.DataFrame({"pred":pred, "actual":actual})

In [33]:
results_df["correct"] = results_df["pred"] ==results_df["actual"] 
results_df["total"] = 1

In [34]:
result = results_df[["actual", "correct", "total"]].groupby("actual").agg(sum).reset_index()

In [35]:
result["acc"] = result["correct"]/ result["total"]

In [36]:
result

Unnamed: 0,actual,correct,total,acc
0,0,325,336,0.967262
1,1,294,332,0.885542
2,2,215,313,0.686901
3,3,333,365,0.912329


The group most difficult to identify was 2 - meningioma, often mistaken for glioma or even a negative sample. On the other hand with glioma cases ot was sometimes mistaken by a different type of tumor but never mistaken for a negative sample - which mean the pathology was always spotted in the train dataset.

In [43]:
results_df[results_df["actual"] == 0]["pred"].value_counts()

pred
0    325
2      6
3      3
1      2
Name: count, dtype: int64

In [44]:
results_df[results_df["actual"] == 1]["pred"].value_counts()

pred
1    294
2     35
3      3
Name: count, dtype: int64

In [46]:
results_df[results_df["actual"] == 2]["pred"].value_counts()

pred
2    215
1     48
0     33
3     17
Name: count, dtype: int64

In [47]:
results_df[results_df["actual"] == 3]["pred"].value_counts()

pred
3    333
2     20
1      9
0      3
Name: count, dtype: int64