In [None]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
#import torchvision.transforms
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader,Dataset
from torch.utils.data import random_split, RandomSampler
import matplotlib.pyplot as plt
import time
import os
import copya

from PIL import Image

import glob
import pandas as pd
from sklearn.model_selection import train_test_split


In [None]:
# there are seperate transfromation func for PIL and Tensor. SO chcek again if you are using the correct ones
def get_train_transform():
    return torchvision.transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((256, 256)),
    #transforms.GaussianBlur(kernel_size=(5,9), sigma=(0.1, 5)),
    transforms.GaussianBlur(kernel_size=(3,3), sigma=(0.1, 5)),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    #transforms.RandomRotation(degrees=(0,100)),
    #transforms.RandomInvert(),
    #transforms.RandomAdjustSharpness(sharpness_factor=2),isnot a good choice for the dataste
    transforms.RandomAutocontrast(), #isnot a good choice for the dataste
    transforms.RandomVerticalFlip(),
    transforms.RandomHorizontalFlip(),
])

def get_test_transform():
    return torchvision.transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((256, 256))])

In [None]:
#for data of 512x512 px; for training the data was splited into 70:30 ratio.  

data_dir = './dataset/OPM/OPM_512 _original'

data_list = {}

for f in os.listdir(data_dir):
    data_list[f] = {'img':[], 'label':[]}
    for files in os.listdir(os.path.join(data_dir, f)):
        data_list[f]['img'].append(f'{data_dir}/{f}/{files}')
        data_list[f]['label'].append(f)
        
X_train, X_test, y_train, y_test = [], [], [], []
for key in data_list.keys():
    X_tr, X_ts, y_tr, y_ts= train_test_split(data_list[key]['img'], data_list[key]['label'], 
                                         test_size=0.30, random_state=102)
    X_train.append(X_tr)
    X_test.append(X_ts)
    y_train.append(y_tr)
    y_test.append(y_ts)

#train_data merging
train_data =[]
for i in X_train:
    for x in i:
        train_data.append(x)
print(f'train data: {len(train_data)}')

#test_data merging
test_data =[]
for i in X_test:
    for x in i:
        test_data.append(x)
print(f'test data: {len(test_data)}')

#trian_label merging
train_label =[]
for i in y_train:
    for x in i:
        train_label.append(x)
print(f'trian_label: {len(train_label)}')

#test_label merging
test_label =[]
for i in y_test:
    for x in i:
        test_label.append(x)
print(f'test_label: {len(test_label)}')

In [None]:
classes = np.unique(train_label)

for cls in classes:
    class_to_int = {classes[i]: i for i in range(len(classes))}
class_to_int

In [None]:
#data loader
class CancerDataset(Dataset):
    
    def __init__(self, data_set, class_labels, class_to_int, transforms = None):
        
        super().__init__()
        #self.data_path = data_path
        self.data_set = data_set
        self.class_to_int = class_to_int
        self.class_labels = class_labels
        self.transforms = transforms
        
        
    def __getitem__(self, index):
        
        image= Image.open(self.data_set[index])
        
        label = self.class_labels[index]
        label = self.class_to_int[label]
        
        
        #Applying transforms on image
        if self.transforms:
            image = self.transforms(image)
        
        #return image, label
        return image, label
        
    #its not important though   
    def __len__(self):
        #print (len(self.imgs_list))
        return (len(self.data_set))

In [None]:
train_dataset = CancerDataset(train_data, train_label, class_to_int, transforms=get_train_transform())
test_dataset = CancerDataset(test_data, test_label, class_to_int, transforms=get_test_transform())

In [None]:
#Data Loader  -  using Sampler 
train_random_sampler = RandomSampler(train_dataset)
test_random_sampler = RandomSampler(test_dataset)

# dataloader
train_data_loader = DataLoader(dataset=train_dataset, batch_size=8, sampler=train_random_sampler, num_workers=0)
test_data_loader = DataLoader(dataset=test_dataset, batch_size=8, sampler=test_random_sampler, num_workers=0)

In [None]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)    

In [None]:
device = get_default_device()

In [None]:
# Transfering data to the device in use (In our case GPU)
train_dl = DeviceDataLoader(train_data_loader, device)
valid_dl = DeviceDataLoader(test_data_loader, device)

In [None]:
dataloaders = {'train':train_data_loader, 'test':test_data_loader}
dataset_sizes = {'train':len(train_data), 'test':len(test_data)}

In [None]:
from tqdm import tqdm
def train_model(model, criterion, optimizer, scheduler, grad_clip = None, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    result = {'train_acc':[], 'train_loss':[], 'valid_acc':[], 'valid_loss':[]}

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
                #print('training...')
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            #for inputs, labels in tqdm(dataloaders[phase]):
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                outputs = model(inputs)
                pred_values, preds_indc = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                    optimizer.zero_grad()
                    # Gradient clipping
                    if grad_clip: 
                        nn.utils.clip_grad_value_(model.parameters(), grad_clip)
                #print('dataloader phase: training')
                

                '''
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    #outputs = model(inputs).softmax(dim=1)
                    
                    #print(f'outputs: {outputs}')
                    #print(f'softmax: {outputs.softmax(dim=1)}')
                    
                    pred_values, preds_indc = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                  
                    print(f'loss: {loss}')

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                     '''


                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds_indc == labels.data)
                
                #accuracy calculation for each class
                
            if phase == 'train':
                scheduler.step()
                print(scheduler.get_lr())
                print(f'last lr: {scheduler.get_last_lr()}')

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            #print(outputs) # wanna see the outputs
            
            if phase =='train':
                result['train_acc'].append(epoch_acc)
                result['train_loss'].append(epoch_loss)
            else:
                result['valid_acc'].append(epoch_acc)
                result['valid_loss'].append(epoch_loss)

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                #lr = scheduler.get_lr()[0]
                #optimizer = optim.SGD(model_ft.parameters(), lr=lr, momentum=0.5)
                #scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
                #prev_lr = lr
                
    #print(result)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, result

In [None]:
#visualizr and model prediction

def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['test']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            #outputs = model(inputs).softmax(dim=1)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title('predicted: {}'.format(class_names[preds[j]]) + class_names[labels[j]])
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

In [None]:
model_ft = models.resnet50(pretrained=True)

In [None]:
num_ftrs = model_ft.fc.in_features
print(num_ftrs)

In [None]:
ct = 0
for child in model_ft.children():
    ct += 1
    if ct < 3:
        for param in child.parameters():
            param.requires_grad = False

#for triaing with no freezing layers
for child in model_ft.children():
    for param in child.parameters():
        param.requires_grad = True

In [None]:
#model_ft.fc = nn.Linear(num_ftrs, 6, bias=True)
model_ft.fc = nn.Sequential(
    #nn.Dropout(p=0.5),
    nn.Linear(2048, 512),
    nn.Dropout(p=0.5),
    nn.Linear(512, len(classes))
)

model_ft = model_ft.to(device)

In [None]:
##claculating weights for each class
samples = []
#samples = torch.tensor([len(all_files[keys]) for keys in all_files.keys()])
samples = torch.tensor([len(data_list[keys]['img']) for keys in data_list.keys()])
sam_weights = samples/torch.sum(samples)
inv_samples = 1/sam_weights
loss_weight = inv_samples/torch.sum(inv_samples)

In [None]:
loss_weight = loss_weight.to(device)
loss_weight

#loss_weight = torch.tensor([0.1009, 0.1589, 0.2593, 0.2776, 0.2033]).to(device)

In [None]:
#train and evaluate
wts = torch.tensor([0.8, 0.2]).to(device)

criterion = nn.CrossEntropyLoss(weight=wts)#(weight=loss_weight)
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.5, weight_decay= 0.001, nesterov=True)
grad_clip = 0.01

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.9)
#exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.5)
num_epochs=150
model_ft, result = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       grad_clip=grad_clip, num_epochs=num_epochs)

In [None]:
torch.save(model_ft, 'model_name.pth')

In [None]:
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.00001, momentum=0.5)
grad_clip = 0.01

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.9)
#exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.5)
num_epochs=10
model_ft2, result2 = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       grad_clip=grad_clip, num_epochs=num_epochs)

In [None]:
tr_acc = torch.tensor(result['train_acc']).detach()
print(f'trainacc: {tr_acc}')
ts_acc = torch.tensor(result['valid_acc']).detach()
print(f'tsacc: {ts_acc}')

tr_ls = torch.tensor(result['train_loss']).detach()
print(f'trainls: {tr_ls}')
ts_ls = torch.tensor(result['valid_loss']).detach()
print(f'tsls: {ts_ls}')

In [None]:
plt.plot(tr_acc, label='trainacc')
plt.plot(ts_acc, label='test acc')
plt.plot(tr_ls, label='train loss')
plt.plot(ts_ls, label='test loss')
plt.legend()

In [None]:
tr_acc2 = torch.tensor(result2['train_acc']).detach()
print(f'trainacc: {tr_acc2}')
ts_acc2 = torch.tensor(result2['valid_acc']).detach()
print(f'tsacc: {ts_acc2}')
tr_ls2 = torch.tensor(result2['train_loss']).detach()
print(f'trainls: {tr_ls2}')
ts_ls2 = torch.tensor(result2['valid_loss']).detach()
print(f'tsls: {ts_ls2}')

In [None]:
tot_tr_acc, tot_ts_acc = [], []
tot_tr_ls , tot_ts_ls = [], []
for tr in [tr_acc, tr_acc2]:
    for score in tr:
        tot_tr_acc.append(score)
for ts in [ts_acc, ts_acc2]:
    for score in ts:
        tot_ts_acc.append(score)
for ts in [ts_ls, ts_ls2]:
    for score in ts:
        tot_ts_ls.append(score)
for tr in [tr_ls, tr_ls2]:
    for score in tr:
        tot_tr_ls.append(score)

In [None]:
import matplotlib.pyplot as plt
plt.plot(tot_tr_acc)
plt.plot(tot_ts_acc)
plt.plot(tot_tr_ls)
plt.plot(tot_ts_ls)
plt.grid()

In [None]:
model = torch.load('./model_name.pth')

In [None]:
## calculting f1 score for validation dataset

def predict_image2(img, model):
    # Convert to a batch of 1
    xb = to_device(img.unsqueeze(0), get_default_device())

    pred_sf = model(xb).softmax(dim=1)
    probability = format(pred_sf[0][class_to_int['cancer']], '.4f')
    #print(pred_sf)
    
    outputs = model(xb)
    pred_values, preds_indx = torch.max(outputs, 1)
    #print(probability, preds_indx.item())
    #print(pred_values, preds_indx.item(), format(pred_sf[0][preds_indx.item()], '.4f'))

    return probability, preds_indx

In [None]:
y_true, y_pred = [], []


for i in range(len(test_dataset)):
    image = test_dataset[i][0]
    label = test_dataset[i][1]
    prediction, pred_indx = predict_image2(image, model)+
    #print(pred_indx.item())
    
    y_true.append(label)
    y_pred.append(pred_indx.item())

In [None]:
from sklearn.metrics import precision_recall_fscore_support


precision_recall_fscore_support(y_true, y_pred, average='weighted')

In [None]:
precision_recall_fscore_support(y_true, y_pred, average=None)

In [None]:
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

print('f1_score: {}'.format(f1_score(y_true, y_pred, average=None)))
print('precision_score: {}'.format(precision_score(y_true, y_pred, average=None)))
print('recall_score: {}'.format(recall_score(y_true, y_pred, average=None)))
print('accuracy score: {}'.format(accuracy_socre(y_true, y_pred, normalize=False)))

In [None]:
from sklearn.metrics import confusion_matrix
clr_pl = confusion_matrix(y_true, y_pred, labels=[0,1,2,3])

fig, ax = plt.subplots()
clmap = ax.imshow(clr_pl, cmap='viridis')
for i in range(4):
    for j in range(4):
        text = ax.text(j, i, clr_pl[i, j], ha='center', va='center', color='w')
plt.colorbar(clmap)