In [5]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
from PIL import Image
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from torch.utils.data import Dataset
from pathlib import Path
from my_net import Net

In [42]:
database_path = r'C:\Users\lzuni\Documents\Tesis\database'

# Dataset reader

In [None]:
class PatchesDataset(Dataset):
    def __init__(
        self, 
        csv_path: str | Path,
        image_folder: str | Path,
        
        
    ) -> None:
        
        if not isinstance(image_folder, Path):
            image_folder = Path(image_folder)

        self._transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Lambda(lambda x: self.normalize_ct(x)),  
            transforms.Resize((61, 61)),
        ])
        self.labels_from_file =  list(pd.read_csv(csv_path)["Label"])
        self.labels = torch.zeros((len(self.labels_from_file), max(self.labels_from_file)))
        for i, l in enumerate(self.labels_from_file):
            self.labels[i, l-1] = 1

        
        images = [Image.open(image_folder / f) for f in os.listdir(image_folder) if ".tiff" in f]
        self.data = [self._transforms(i) for i in images]
        
    def normalize_ct(self,image, min_val=-1000, max_val=100):
        image = torch.clamp(image, min_val, max_val)
        return (image - min_val) / (max_val - min_val)

    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, idx: int) -> tuple[torch.Tensor,torch.Tensor]:
        return self.data[idx], self.labels[idx]

# Funciones

In [256]:
def get_labels_str(labels):
    str_labels = []

    for l in labels:
        if torch.equal(l, torch.tensor([1., 0., 0.])):
            str_labels.append('NT')
        elif torch.equal(l, torch.tensor([0., 1., 0.])):
            str_labels.append('CLE')
        elif torch.equal(l, torch.tensor([0., 0., 1.])):
            str_labels.append('PSE')

    return str_labels


def imshow(images, labels, label_names):

    fig, axes = plt.subplots(1, len(images), figsize=(3*len(images)+1, 4))
    axes = np.atleast_1d(axes)
    for idx, (img, label) in enumerate(zip(images, labels)):
        npimg = np.array(img)
        axes[idx].imshow(np.transpose(npimg, (1, 2, 0)), aspect='equal', cmap='gray')
        axes[idx].set_title(label_names[idx])
        axes[idx].axis('off')
    plt.tight_layout()
    plt.show()

def dataset_reader(csv_path, folder_path, dataset_type):

    dataset = PatchesDataset(csv_path, folder_path)
    ratio = 0.8
    indices = []
    aux = []

    for label in range(1,4):
        
        aux = [i for i,j in enumerate(dataset.labels_from_file) if j == label]
        
        if dataset_type == 'train':
            indices += aux[:int(len(aux)*ratio)]
            print(aux[:int(len(aux)*ratio)])
        elif dataset_type == 'test':
            indices += aux[int(len(aux)*ratio):]
            print(aux[int(len(aux)*ratio):])


    loader = torch.utils.data.DataLoader(torch.utils.data.Subset(dataset, indices), batch_size=len(indices), shuffle=True)
    dataiter = iter(loader)
    images, labels = next(dataiter)
    
    #imshow(images, labels, get_labels_str(labels))
    print(f'{dataset_type.capitalize()} dataset count: ', len(indices))
    return dataset, loader

def normalize_ct(image, min_val=-1000, max_val=100):
    image = torch.clamp(image, min_val, max_val)
    return (image - min_val) / (max_val - min_val)


# Train and test dataset

In [257]:
database_path = r'C:\Users\lzuni\Documents\Tesis\database\patches'
patch_labels_path = r'C:\Users\lzuni\Documents\Tesis\database\patch_labels.csv'

train_dataset, train_loader = dataset_reader(patch_labels_path, database_path, 'train')
test_dataset, test_loader = dataset_reader(patch_labels_path, database_path, 'test')

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46]
[59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98]
[109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155]
Train dataset count:  134
[47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58]
[99, 100, 101, 102, 103, 104, 105, 106, 107, 108]
[156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167]
Test dataset count:  34


# Training the net

In [None]:
net = Net(61)

criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

for epoch in range(100):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 34 == 33 and epoch%5==4:   # print every 40 images
            print(f'[Epoch: {epoch + 1}] [Analized images: {(i+1)*4}] [Loss: {running_loss/34 :.3f}]')
            running_loss = 0.0

print('Finished training')

[Epoch: 5] [Analized images: 136] [Loss: 0.900]
[Epoch: 10] [Analized images: 136] [Loss: 0.588]
[Epoch: 15] [Analized images: 136] [Loss: 0.151]
[Epoch: 20] [Analized images: 136] [Loss: 0.022]
[Epoch: 25] [Analized images: 136] [Loss: 0.004]
[Epoch: 30] [Analized images: 136] [Loss: 0.001]
[Epoch: 35] [Analized images: 136] [Loss: 0.000]
[Epoch: 40] [Analized images: 136] [Loss: 0.000]
[Epoch: 45] [Analized images: 136] [Loss: 0.000]
[Epoch: 50] [Analized images: 136] [Loss: 0.000]
[Epoch: 55] [Analized images: 136] [Loss: 0.000]
[Epoch: 60] [Analized images: 136] [Loss: 0.000]
[Epoch: 65] [Analized images: 136] [Loss: 0.000]
[Epoch: 70] [Analized images: 136] [Loss: 0.000]
[Epoch: 75] [Analized images: 136] [Loss: 0.000]
[Epoch: 80] [Analized images: 136] [Loss: 0.000]
[Epoch: 85] [Analized images: 136] [Loss: 0.000]
[Epoch: 90] [Analized images: 136] [Loss: 0.000]
[Epoch: 95] [Analized images: 136] [Loss: 0.000]
[Epoch: 100] [Analized images: 136] [Loss: 0.000]
Finished training


In [258]:

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=34, shuffle=True)
dataiter = iter(test_loader)
images, labels = next(dataiter)
#imshow(images, labels, label_names)

outputs = net(images)
prob_outputs = torch.nn.functional.softmax(outputs, dim=1)
predicted = torch.argmax(prob_outputs, dim=1)
label_names = get_labels_str(labels)

accuracy_count = 0
nt_count = 0
cle_count = 0
pse_count = 0

for idx, p in enumerate(predicted):
    
    
    if p == 0:
        lab = 'NT'
        nt_count += 1
    if p == 1:
        lab = 'CLE'
        cle_count += 1
    if p == 2:
        lab = 'PSE'
        pse_count += 1
    
    if label_names[idx] == lab:
        accuracy_count += 1
    if label_names[idx] != lab:
        print('Mismatch: ground truth = ', label_names[idx],', predicted ', lab)

print('Accuracy: ', f'{accuracy_count*100/len(predicted):.3f}' , '%')
print('NT predictions: ', nt_count)
print('CLE predictions: ', cle_count)
print('PSE predictions: ', pse_count)


Mismatch: ground truth =  NT , predicted  PSE
Mismatch: ground truth =  CLE , predicted  NT
Mismatch: ground truth =  NT , predicted  CLE
Mismatch: ground truth =  CLE , predicted  PSE
Mismatch: ground truth =  CLE , predicted  NT
Accuracy:  85.294 %
NT predictions:  11
CLE predictions:  10
PSE predictions:  13
