### Setup:

#### Model: Resnet 50, Pretrained, 
#### Action: Finetune
#### Visualisation:  GradCAM
#### Dataset: Full, unbalanced
#### Uses Weighted Loss: True
#### Uses Oversampling: False
#### Attention Module: None

In [1]:
#imports
import os
import time
import torch
import torchvision
import numpy as np
import torch.nn as nn
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch import optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms, models
from torch.utils.tensorboard import SummaryWriter 
from torch.utils.data.sampler import SubsetRandomSampler
from helper_fns import *
from resnet_models import ResNet50_GradCam

from sklearn.metrics import roc_curve, roc_auc_score, auc, precision_recall_fscore_support, confusion_matrix, accuracy_score

torch.backends.cudnn.benchmark = True

In [2]:
# data preprocessing
data = '/workspace/loid/images/old_uk_not_uk/train/'

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

begin = time.time()

tr_transform = transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean, std)])
#test_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean, std)])

tr_dataset = datasets.ImageFolder(data, transform=tr_transform)

end = time.time() - begin
class_names = tr_dataset.classes
class_names, f'loading complete in {end // 60:.0f}m {end % 60:.0f}s'

(['not_uk', 'uk'], 'loading complete in 0m 6s')

In [3]:
tr_class_counts = np.sum(np.array(tr_dataset.targets) == 0), np.sum(np.array(tr_dataset.targets) == 1)

print(tr_class_counts)

(978827, 30701)


In [4]:
train_count, val_count = int(0.75 * len(tr_dataset)), int(0.15 * len(tr_dataset))
test_count = len(tr_dataset) - (train_count + val_count)
train_set, val_set, test_set = torch.utils.data.random_split(tr_dataset, [train_count, val_count, test_count])
dataset_sizes = len(train_set), len(val_set), len(test_set)
dataset_sizes

(757146, 151429, 100953)

In [5]:
#loaders
batchsize = 32
workers = 1
pinmemory = True
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batchsize, num_workers=workers, shuffle=True, pin_memory=pinmemory)
valid_loader = torch.utils.data.DataLoader(val_set, batch_size=batchsize, num_workers=workers, shuffle=True, pin_memory=pinmemory)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batchsize, num_workers=workers, shuffle=True, pin_memory=pinmemory)

In [6]:
def train(model, criterion_set, optimizer, epochs, scheduler, class_names, device = 'cpu', board_writer=None, save_folder = None):
    model = model.to(device)
    valid_loss_min = np.Inf
    start = time.time()
    for epoch in range(epochs):

        print(f'Epoch {epoch}/{epochs-1}')
        print('-'*15)

        for phase in ['train','val']:
            
            running_loss = 0.0
            running_correct = 0

            class_probs = []
            class_preds = []
            y_target = np.array([])

            if phase == 'train':
                model.train()  # Set model to training mode
                loader = train_loader
                dataset_size = dataset_sizes[0]
                filename = save_folder+'/train.csv'
                criterion = criterion_set[0]
            else:
                model.eval()
                loader = valid_loader
                dataset_size = dataset_sizes[1]
                filename = save_folder+'/val.csv'
                criterion = criterion_set[1]
            i = 0
            begin_phase = time.time()
            for inputs, labels in loader:
                inputs, labels = inputs.to(device), labels.to(device)
                y_target = np.concatenate((y_target,labels.cpu()))
                # zero the parameter gradients
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    class_probs_batch = [F.softmax(output, dim=0) for output in outputs]
                    class_probs.append(class_probs_batch)
                    class_preds.append(preds.cpu())

                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    running_loss += loss.item() * inputs.size(0)
                    running_correct += torch.sum(preds == labels.data)
                    
                end = time.time() - begin_phase
                i+=1
                if i%1000 == 0:
                    print(f'Phase: {phase} Batch {i} complete in {end // 60:.0f}m {end % 60:.0f}s')
                
            if phase == 'train':
                scheduler.step()

            epoch_probs = torch.cat([torch.stack(batch) for batch in class_probs])
            epoch_preds = torch.cat(class_preds)

            for i in range(len(class_names)):
                add_pr_curve_tensorboard(board_writer, phase, i, epoch_probs, epoch_preds, class_names, global_step=epoch)

            epoch_loss = running_loss / dataset_size
            epoch_accuracy = running_correct.double() / dataset_size
            
            # save model if validation loss has decreased
            if phase == 'val' and epoch_loss <= valid_loss_min:
                print('Validation loss decreased ({:.6f} --> {:.6f}).  Saved updated model.'.format(valid_loss_min, epoch_loss))
                torch.save(model.state_dict(), save_folder+'/checkpoint.pt')
                valid_loss_min = epoch_loss

            log_metrics(filename, y_target, epoch_preds, epoch_loss, epoch_loss <= valid_loss_min, writer=board_writer, epoch=epoch)

            time_elapsed = time.time() - start

            print(f'{phase} Loss: {epoch_loss:.4f}; Accuracy: {epoch_accuracy:.4f}; Completed in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')

            if board_writer is not None:
                board_writer.add_scalar(f'{phase}/loss', epoch_loss, epoch)
                board_writer.add_scalar(f'{phase}/accuracy', epoch_accuracy, epoch)

        print()

    if board_writer is not None:
        board_writer.flush()
        board_writer.close()

    time_elapsed = time.time() - start
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    
    return model

In [7]:
#model settings
model_name = 'resnet50_finetuned_gradcam'
results_folder = './results/'
save_folder = results_folder + model_name
train_results = save_folder +'/train.csv'
val_results = save_folder +'/val.csv'
checkpoint_path = save_folder +'/checkpoint.pt'

logs = SummaryWriter(f'./logs/{model_name}')

if not os.path.isdir(results_folder):
    os.mkdir(results_folder)
    
if not os.path.isdir(results_folder + model_name):
    os.mkdir(results_folder + model_name)
      
with open(train_results, 'a') as train_result:
    header = ",".join(['loss', 'accuracy', 'tn', 'fp', 'fn', 'tp', 'precision_0', 'precision_1', 'recall_0', 'recall_1', 'f1_0', 'f1_1', 'count_0', 'count_1','auroc','\n'])
    train_result.write(header)

with open(val_results, 'a') as val_result:
    header = ",".join(['loss', 'accuracy', 'tn', 'fp', 'fn', 'tp', 'precision_0', 'precision_1', 'recall_0', 'recall_1', 'f1_0', 'f1_1', 'count_0', 'count_1','auroc','saved','\n'])
    val_result.write(header)

In [8]:
#visualise batch

# Make a grid from batch
begin = time.time()
inputs, classes = next(iter(valid_loader))
end = time.time() - begin
img_out = torchvision.utils.make_grid(inputs)

#imshow(img_out, title=[class_names[x] for x in classes])

#add to tensorboard
#logs.add_image('Sample input image',img_out)

f'loading complete in {end // 60:.0f}m {end % 60:.0f}s'

'loading complete in 0m 3s'

In [9]:
#calculate loss weights 
tr_loss_weights = 1.0/torch.Tensor([np.sum(np.array(train_set.dataset.targets)[train_set.indices] == 0) , np.sum(np.array(train_set.dataset.targets)[train_set.indices] == 1)])
valid_loss_weights = 1.0/torch.Tensor([np.sum(np.array(val_set.dataset.targets)[val_set.indices] == 0) , np.sum(np.array(val_set.dataset.targets)[val_set.indices] == 1)])
test_loss_weights = 1.0/torch.Tensor([np.sum(np.array(test_set.dataset.targets)[test_set.indices] == 0) , np.sum(np.array(test_set.dataset.targets)[test_set.indices] == 1)])
tr_loss_weights, valid_loss_weights, test_loss_weights

(tensor([1.3623e-06, 4.3350e-05]),
 tensor([6.8082e-06, 2.1988e-04]),
 tensor([1.0218e-05, 3.2415e-04]))

In [10]:
model= ResNet50_GradCam(num_classes=len(class_names), visualise=False, pretrained=True, finetune=True)

# Observe that all parameters are being optimized
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

#set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

balance_loss = True

if balance_loss:
    print("Using scaled loss function")
    train_criterion = nn.CrossEntropyLoss(weight=tr_loss_weights.to(device))
    valid_criterion = nn.CrossEntropyLoss(weight=valid_loss_weights.to(device))
else:
    print("Using unscaled loss function")
    train_criterion = nn.CrossEntropyLoss()
    valid_criterion = nn.CrossEntropyLoss()
    
criterion_set = [train_criterion, valid_criterion]

'''
# Get a batch of data
time1 = time.time()
images, classes = next(iter(valid_loader))
model.to(device)
images = images.to(device)
out = model(images)
time2 = time.time() - time1


logs.add_graph(model, images)
logs.close()

print(f'{time2 // 60:.0f}m {time2 % 60:.0f}s')
'''

Using scaled loss function


"\n# Get a batch of data\ntime1 = time.time()\nimages, classes = next(iter(valid_loader))\nmodel.to(device)\nimages = images.to(device)\nout = model(images)\ntime2 = time.time() - time1\n\n\nlogs.add_graph(model, images)\nlogs.close()\n\nprint(f'{time2 // 60:.0f}m {time2 % 60:.0f}s')\n"

In [11]:
print(device)

cuda


In [None]:
train(model, criterion_set, optimizer, epochs= 90, scheduler=exp_lr_scheduler,class_names= class_names, device = device, board_writer=logs, save_folder = save_folder)

Epoch 0/89
---------------
Phase: train Batch 1000 complete in 5m 6s
Phase: train Batch 2000 complete in 10m 19s
Phase: train Batch 3000 complete in 15m 34s
Phase: train Batch 4000 complete in 20m 49s
Phase: train Batch 5000 complete in 26m 4s
Phase: train Batch 6000 complete in 31m 20s
Phase: train Batch 7000 complete in 36m 36s
Phase: train Batch 8000 complete in 41m 52s
Phase: train Batch 9000 complete in 47m 9s
Phase: train Batch 10000 complete in 52m 25s
Phase: train Batch 11000 complete in 57m 42s
Phase: train Batch 12000 complete in 62m 59s
Phase: train Batch 13000 complete in 68m 15s
Phase: train Batch 14000 complete in 73m 32s
Phase: train Batch 15000 complete in 78m 49s
Phase: train Batch 16000 complete in 84m 6s
Phase: train Batch 17000 complete in 89m 23s
Phase: train Batch 18000 complete in 94m 40s
Phase: train Batch 19000 complete in 99m 57s
Phase: train Batch 20000 complete in 105m 15s
Phase: train Batch 21000 complete in 110m 32s
Phase: train Batch 22000 complete in 115