In [1]:
#imports
import os
import time
import torch
import torchvision
import numpy as np
import torch.nn as nn
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch import optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms, models
from torch.utils.tensorboard import SummaryWriter 
from torch.utils.data.sampler import SubsetRandomSampler

from sklearn.metrics import roc_curve, roc_auc_score, auc, precision_recall_fscore_support, confusion_matrix, accuracy_score

torch.backends.cudnn.benchmark = True

In [2]:
!pip install tensorboard

Collecting tensorboard
  Downloading tensorboard-2.4.1-py3-none-any.whl (10.6 MB)
[K     |████████████████████████████████| 10.6 MB 11.3 MB/s eta 0:00:01
[?25hCollecting google-auth<2,>=1.6.3
  Downloading google_auth-1.24.0-py2.py3-none-any.whl (114 kB)
[K     |████████████████████████████████| 114 kB 11.2 MB/s eta 0:00:01
[?25hCollecting werkzeug>=0.11.15
  Downloading Werkzeug-1.0.1-py2.py3-none-any.whl (298 kB)
[K     |████████████████████████████████| 298 kB 11.6 MB/s eta 0:00:01
Collecting protobuf>=3.6.0
  Downloading protobuf-3.14.0-cp38-cp38-manylinux1_x86_64.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 10.9 MB/s eta 0:00:01
[?25hCollecting tensorboard-plugin-wit>=1.6.0
  Downloading tensorboard_plugin_wit-1.8.0-py3-none-any.whl (781 kB)
[K     |████████████████████████████████| 781 kB 11.8 MB/s eta 0:00:01
Collecting markdown>=2.6.8
  Downloading Markdown-3.3.3-py3-none-any.whl (96 kB)
[K     |████████████████████████████████| 96 kB 3.1 MB/s  eta 0:0

In [None]:
# data preprocessing
data = '/workspace/loid/images/old_uk_not_uk/train/'

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

begin = time.time()

tr_transform = transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean, std)])
#test_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean, std)])

tr_dataset = datasets.ImageFolder(data, transform=tr_transform)
#test_dataset = datasets.ImageFolder('./hymenoptera_data/val', transform=test_transform)

end = time.time() - begin
class_names = tr_dataset.classes
class_names, f'loading complete in {end // 60:.0f}m {end % 60:.0f}s'

In [None]:
tr_class_counts = np.sum(np.array(tr_dataset.targets) == 0), np.sum(np.array(tr_dataset.targets) == 1)
#test_class_counts = np.sum(np.array(test_dataset.targets) == 0), np.sum(np.array(test_dataset.targets) == 1)

print(tr_class_counts)
#, test_class_counts

In [None]:
train_count, val_count = int(0.75 * len(tr_dataset)), int(0.15 * len(tr_dataset))
test_count = len(tr_dataset) - (train_count + val_count)
train_set, val_set, test_set = torch.utils.data.random_split(tr_dataset, [train_count, val_count, test_count])
dataset_sizes = len(train_set), len(val_set), len(test_set)
dataset_sizes

In [None]:
#loaders
batchsize = 64
workers = 1
pinmemory = True
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batchsize, num_workers=workers, shuffle=True, pin_memory=pinmemory)
valid_loader = torch.utils.data.DataLoader(val_set, batch_size=batchsize, num_workers=workers, shuffle=True, pin_memory=pinmemory)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batchsize, num_workers=workers, shuffle=True, pin_memory=pinmemory)

In [None]:
# helper functions
def add_pr_curve_tensorboard(writer, phase, targets, probs, preds, global_step=0):
    '''
    Takes in a target-class and plots the corresponding precision-recall curve
    '''
    tensorboard_preds = preds == targets
    tensorboard_probs = probs[:, targets]

    writer.add_pr_curve(f'{phase}/pr_curve/{class_names[targets]}', tensorboard_preds, tensorboard_probs, global_step=global_step)
    writer.flush()
    writer.close()
    
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated
    
def log_metrics(filename, y_target, y_pred, loss, saved=False):
    
    with open(filename, 'a') as result:
        a, b = confusion_matrix(y_target, y_pred)
        tn, fp, fn, tp = *a, *b
        precision, recall, f1, support = precision_recall_fscore_support(y_target, y_pred)
        precision_0, precision_1, recall_0, recall_1, f1_0, f1_1, count_0, count_1 = *precision, *recall, *f1, *support
        accuracy = accuracy_score(y_target, y_pred)
        auroc = roc_auc_score(y_target, y_pred)
        line = ",".join([str(loss), str(accuracy), str(tn), str(fp), str(fn), str(tp), str(precision_0), str(precision_1), str(recall_0), str(recall_1), str(f1_0), str(f1_1), str(count_0), str(count_1), str(auroc), str(saved)+'\n'])
        result.write(line)



In [None]:
def train(model, criterion_set, optimizer, epochs, scheduler, device = 'cpu', board_writer=None, hook=False,  save_folder = None):
    model = model.to(device)
    valid_loss_min = np.Inf
    start = time.time()
    for epoch in range(epochs):

        print(f'Epoch {epoch}/{epochs-1}')
        print('-'*15)

        for phase in ['train','val']:
            
            running_loss = 0.0
            running_correct = 0

            class_probs = []
            class_preds = []
            y_target = np.array([])

            if phase == 'train':
                model.train()  # Set model to training mode
                loader = train_loader
                dataset_size = dataset_sizes[0]
                filename = save_folder+'/train.csv'
                criterion = criterion_set[0]
            else:
                model.eval()
                loader = valid_loader
                dataset_size = dataset_sizes[1]
                filename = save_folder+'/val.csv'
                criterion = criterion_set[1]
            i = 0
            begin_phase = time.time()
            for inputs, labels in loader:
                inputs, labels = inputs.to(device), labels.to(device)
                y_target = np.concatenate((y_target,labels.cpu()))
                # zero the parameter gradients
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    class_probs_batch = [F.softmax(output, dim=0) for output in outputs]
                    class_probs.append(class_probs_batch)
                    class_preds.append(preds.cpu())

                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    running_loss += loss.item() * inputs.size(0)
                    running_correct += torch.sum(preds == labels.data)
                    
                end = time.time() - begin_phase
                i+=1
                if i%1000 == 0:
                    print(f'Phase: {phase} Batch {i} complete in {end // 60:.0f}m {end % 60:.0f}s')
                
            if phase == 'train':
                scheduler.step()

            epoch_probs = torch.cat([torch.stack(batch) for batch in class_probs])
            epoch_preds = torch.cat(class_preds)

            for i in range(len(class_names)):
                add_pr_curve_tensorboard(board_writer, phase, i, epoch_probs, epoch_preds, global_step=epoch)

            epoch_loss = running_loss / dataset_size
            epoch_accuracy = running_correct.double() / dataset_size
            
            # save model if validation loss has decreased
            if phase == 'val' and epoch_loss <= valid_loss_min:
                print('Validation loss decreased ({:.6f} --> {:.6f}).  Saved updated model.'.format(valid_loss_min, epoch_loss))
                torch.save(model.state_dict(), save_folder+'/checkpoint.pt')
                valid_loss_min = epoch_loss

            log_metrics(filename, y_target, epoch_preds, epoch_loss, epoch_loss <= valid_loss_min)

            time_elapsed = time.time() - start

            print(f'{phase} Loss: {epoch_loss:.4f}; Accuracy: {epoch_accuracy:.4f}; Completed in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')

            if board_writer is not None:
                board_writer.add_scalar(f'{phase}/loss', epoch_loss, epoch)
                board_writer.add_scalar(f'{phase}/accuracy', epoch_accuracy, epoch)

        print()

    if board_writer is not None:
        board_writer.flush()
        board_writer.close()

    time_elapsed = time.time() - start
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    
    return model

In [None]:
#model settings
model_name = 'pretrained_resnet50'
results_folder = './results/'
save_folder = results_folder + model_name
train_results = save_folder +'/train.csv'
val_results = save_folder +'/val.csv'
checkpoint_path = save_folder +'/checkpoint.pt'
balance_loss = True

logs = SummaryWriter(f'{save_folder}/logs/{model_name}')

if not os.path.isdir(results_folder):
    os.mkdir(results_folder)
    
if not os.path.isdir(results_folder + model_name):
    os.mkdir(results_folder + model_name)
      
with open(train_results, 'a') as train_result:
    header = ",".join(['loss', 'accuracy', 'tn', 'fp', 'fn', 'tp', 'precision_0', 'precision_1', 'recall_0', 'recall_1', 'f1_0', 'f1_1', 'count_0', 'count_1','auroc','\n'])
    train_result.write(header)

with open(val_results, 'a') as val_result:
    header = ",".join(['loss', 'accuracy', 'tn', 'fp', 'fn', 'tp', 'precision_0', 'precision_1', 'recall_0', 'recall_1', 'f1_0', 'f1_1', 'count_0', 'count_1','auroc','saved','\n'])
    val_result.write(header)

In [None]:
#visualise batch

# Make a grid from batch
inputs, classes = next(iter(valid_loader))
img_out = torchvision.utils.make_grid(inputs)

imshow(img_out, title=[class_names[x] for x in classes])

#add to tensorboard
logs.add_image('Sample input image',img_out)

In [None]:
#calculate loss weights 
tr_loss_weights = 1.0/torch.Tensor([np.sum(np.array(train_set.dataset.targets)[train_set.indices] == 0) , np.sum(np.array(train_set.dataset.targets)[train_set.indices] == 1)])
valid_loss_weights = 1.0/torch.Tensor([np.sum(np.array(val_set.dataset.targets)[val_set.indices] == 0) , np.sum(np.array(val_set.dataset.targets)[val_set.indices] == 1)])
test_loss_weights = 1.0/torch.Tensor([np.sum(np.array(test_set.dataset.targets)[test_set.indices] == 0) , np.sum(np.array(test_set.dataset.targets)[test_set.indices] == 1)])
tr_loss_weights, valid_loss_weights, test_loss_weights

In [None]:
model= models.resnet50(pretrained=True)

num_ftrs = model.fc.in_features
# Here the size of each output sample is set to 2.
model.fc = nn.Linear(num_ftrs, len(class_names))

# Observe that all parameters are being optimized
optimizer = optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

#set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if balance_loss:
    print("Using scaled loss function")
    train_criterion = nn.CrossEntropyLoss(weight=tr_loss_weights.to(device))
    valid_criterion = nn.CrossEntropyLoss(weight=valid_loss_weights.to(device))
else:
    print("Using unscaled loss function")
    train_criterion = nn.CrossEntropyLoss()
    valid_criterion = nn.CrossEntropyLoss()
    
criterion_set = [train_criterion, valid_criterion]

'''
# Get a batch of data
time1 = time.time()
images, classes = next(iter(valid_loader))
model.to(device)
images = images.to(device)
out = model(images)
time2 = time.time() - time1


logs.add_graph(model, images)
logs.close()

print(f'{time2 // 60:.0f}m {time2 % 60:.0f}s')'''

In [None]:
print(device)

In [None]:
train(model, criterion_set, optimizer, epochs= 90, scheduler=exp_lr_scheduler, device = device, board_writer=logs, hook=False, save_folder = save_folder)