In [1]:
# !pip install efficientnet-pytorch sklearn

In [2]:
import argparse
import os
import random
import time
from collections import OrderedDict

import numpy as np
import torch
import torchvision
from PIL import Image
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data.sampler import WeightedRandomSampler
from torchvision import transforms

import models
from dataset_generator import DatasetGenerator

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
# this is an enhancement to first resize to larger image and then crop
image_resize = 256
image_size = 224

class_to_idx = {
    'normal': 0,
    'pneumonia': 1,
    'COVID-19': 2
}

In [3]:
print(torch.__version__)
print(torchvision.__version__)

SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

1.4.0
0.5.0


In [4]:
image_dir = 'data'
# we will use train/test split only to get more COVID-19 data
train_csv_file = 'train_split.txt'
test_csv_file = 'test_split.txt'

In [5]:
train_transforms = transforms.Compose([
    transforms.RandomOrder([
        transforms.ColorJitter(hue=.05, saturation=.05),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15, resample=Image.BILINEAR),
        transforms.RandomResizedCrop(image_size, scale=(0.9, 1.0)),
    ]),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

test_transforms = transforms.Compose([
    transforms.Resize(image_resize),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [6]:
train_dir = os.path.join(image_dir, 'train')
train_dataset = DatasetGenerator(train_csv_file, train_dir, transform=train_transforms)
image, label = next(iter(train_dataset))

In [7]:
test_dir = os.path.join(image_dir, 'test')
test_dataset = DatasetGenerator(test_csv_file, test_dir, transform=test_transforms)
image, label = next(iter(test_dataset))

In [8]:
def load_pretrained_model(arch):
    model_func = getattr(models, arch)
    model = model_func()
    model.arch = arch
    
    return model

In [9]:
# convert labels to tensor
def labels_to_tensor(labels):
    label_indices = [class_to_idx[label] for label in labels]
    label_indices = np.array(label_indices, int)
    label_indices = torch.LongTensor(label_indices)
    return label_indices

labels_to_tensor(['normal', 'pneumonia', 'COVID-19'])

tensor([0, 1, 2])

In [10]:
def validate(model, valid_dataloader, loss_func, device):
    #track accuracy and loss 
    accuracy = 0
    test_loss = 0
    
    with torch.no_grad(): #deactivates requires_grad flag, disables tracking of gradients 
        for images, labels in valid_dataloader: #iterate over images and labels in valid dataset
            labels = labels_to_tensor(labels)
            images, labels = images.to(device), labels.to(device) #move a tensor to a device
            log_ps = model.forward(images) #log form of probabilities for each label
            test_loss += loss_func(log_ps, labels).item() #.item() returns loss value as float, compare prob to actual 
            
            ps = torch.exp(log_ps) #gets rid of log 
            equality = (labels.data == ps.max(dim=1)[1]) #takes highest probability
            accuracy += torch.mean(equality.type(torch.FloatTensor))

    return test_loss, accuracy

In [11]:
# Do validation on the test set
def test_model(model, test_loader, device):
    #track accuracy, move to device, switch on eval mode
    accuracy = 0
    model.to(device)
    model.eval()
    
    with torch.no_grad():
        for images, labels in iter(test_loader):
            labels = labels_to_tensor(labels)
            images, labels = images.to(device), labels.to(device) #move a tensor to a device
            log_ps = model.forward(images)
            ps = torch.exp(log_ps)
            
            equality = (labels.data == ps.max(dim=1)[1])
            accuracy += equality.type(torch.FloatTensor).mean()
        model_accuracy = accuracy/len(test_loader)
        model.accuracy = model_accuracy
    
    return model.accuracy           

In [12]:
# Save the checkpoint
def save_checkpoint(checkpoint_path, model):
    checkpoint = {
        # Save the model arch, accuracy, class_to_idx
        'arch':model.arch,
        'accuracy':model.accuracy,
        'class_to_idx':class_to_idx,
        'state_dict':model.state_dict()
    }
    torch.save(checkpoint, checkpoint_path)
    print('Saved the trained model: %s' % checkpoint_path)

In [24]:
train_label_cnt = [0, 0, 0]
train_labels = [] 

# TBD shuffle the training data 
# TBD this is best done inside the DatasetGenerator class
# train_dataset.csv_df = train_dataset.csv_df.sample(frac=1)
for label in train_dataset.csv_df[2]:
    train_label_cnt[class_to_idx[label]] += 1
    train_labels.append(class_to_idx[label])
    
train_num_samples = sum(train_label_cnt)
train_class_weights = [train_num_samples/train_label_cnt[i] for i in range(len(train_label_cnt))]
train_weights = [train_class_weights[train_labels[i]] for i in range(int(train_num_samples))] 

# TBD create WeightedRandomSampler to balance the training data set
train_sampler = WeightedRandomSampler(torch.DoubleTensor(train_weights), int(train_num_samples))

#train_dataset.csv_df.head(20)
train_class_weights

[1.7493095656540296, 2.550329428989751, 27.594059405940595]

In [14]:
# Using the image datasets and the transforms, define the dataloaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=16, shuffle=False, sampler=train_sampler)

dataloaders = {"train": train_loader,
               "test": DataLoader(test_dataset, batch_size=batch_size, num_workers=16, shuffle=True)}

In [15]:
device = 'cpu'
cuda = torch.cuda.is_available()
if cuda:
    device = 'cuda'
device

'cuda'

In [16]:
available_models = [m for m in dir(models) if 'Net' in m and 'Model' not in m]
available_models

['DenseNet121',
 'DenseNet169',
 'DenseNet201',
 'EfficientNet4',
 'EfficientNet5',
 'EfficientNet6',
 'ResNet101',
 'ResNet34',
 'ResNet50']

In [17]:
arch = 'DenseNet169'
pretrained_model = load_pretrained_model(arch)

Downloading: "https://download.pytorch.org/models/densenet169-b2777c0a.pth" to /home/ubuntu/.cache/torch/checkpoints/densenet169-b2777c0a.pth


HBox(children=(FloatProgress(value=0.0, max=57365526.0), HTML(value='')))




In [18]:
# !ls -ltrh ../pretrain

In [19]:
def convert_prior_weights(pretrained_model, state_dict, prefix='module.'):
    old_state_dict = pretrained_model.state_dict()
    new_state_dict = state_dict

    if prefix is not None:
        new_state_dict = OrderedDict()
        # make sure the keys of the state dict match those of old_state_dict
        for k in state_dict:
            short_key = k.replace(prefix, '')
            # make sure the shape of the weight tensors matches
            if state_dict[k].shape != old_state_dict[short_key].shape:
                print('Unmatched key {} in state_dict: {} vs. {}'.format(short_key, 
                                                                         state_dict[k].shape,
                                                                         old_state_dict[short_key].shape))
            else:
                new_state_dict[short_key] = state_dict[k]

    for k in new_state_dict:
        if k not in old_state_dict:
            print('Unexpected key %s in new_state_dict' % k)

    for k in old_state_dict:
        if k not in new_state_dict:
            print('Missing key %s in old_state_dict' % k)

    return new_state_dict


In [20]:
# # TBD this is to load previously saved checkpoints from pretraining
# ckp_path = '../pretrain/DenseNet169-17082020-063151.pth.tar'
# model_checkpoint = torch.load(ckp_path, map_location=torch.device(device))
# state_dict = model_checkpoint['state_dict']

# # TBD this step is for converting the variable names from pretraining
# # TBD so it is not necessary when loading saved checkpoints from previous txfer learning
# new_state_dict = convert_prior_weights(pretrained_model, state_dict)

# pretrained_model.load_state_dict(new_state_dict, strict=False)

In [21]:
optimizer = optim.Adam(pretrained_model.get_optimizer_parameters(),
                       lr=0.00001, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5)
criterion = nn.NLLLoss()

In [25]:
def train(model, loss_func, optimizer, dataloaders, device, epochs, checkpoint_prefix, print_every=20):
    device = torch.device(device)
    model.to(device)

    train_loader = dataloaders['train']
    test_loader = dataloaders['test']

    epoch_start = time.time()
    max_acc = 0.0

    # loop to train for number of epochs
    for e in range(epochs):
        running_loss = 0
        batch_start = time.time()
        steps = 0

        for images, labels in train_loader:
            # within each loop, iterate train_loader, and print loss
            label_indices = labels_to_tensor(labels)
            images, labels = images.to(device), label_indices.to(device)
            optimizer.zero_grad()
            log_ps = model.forward(images)
            loss = loss_func(log_ps, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            steps += 1

            if steps % print_every == 0:
                model.eval()
                valid_loss, valid_accuracy = validate(model, test_loader, loss_func, device)
                model.train()
                batch_time = time.time() - batch_start
                print(
                    "Epoch: {}/{}..".format(e + 1, epochs),
                    "Step: {}..".format(steps),
                    "Training Loss: {:.3f}..".format(running_loss / len(train_loader)),
                    "Test Loss: {:.3f}..".format(valid_loss / len(test_loader)),
                    "Test Accuracy: {:.3f}..".format(valid_accuracy / len(test_loader)),
                    "Batch Time: {:.3f}, avg: {:.3f}".format(batch_time, batch_time / steps)
                )

        model.eval()
        test_accuracy = test_model(model, test_loader, device)
        model.train()
        epoch_time = time.time() - epoch_start

        if test_accuracy > max_acc:
            max_acc = test_accuracy
            model.accuracy = test_accuracy
            save_checkpoint('%s.pth.tar' % checkpoint_prefix, model)
            print ('Epoch [{}] [save] Accuracy={:.3f} time: {:.3f}, avg: {:.3f}'
                   .format(e + 1, test_accuracy, epoch_time, epoch_time / (e + 1)))
        else:
            print ('Epoch [{}] [----] Accuracy={:.3f} time: {:.3f}, avg: {:.3f}'
                   .format(e + 1, test_accuracy, epoch_time, epoch_time / (e + 1)))

    return model

In [26]:
#device = 'cpu'
epochs = 2
training_start = time.time()
checkpoint_prefix = os.path.join('checkpoints','%s-%d'%(arch, training_start))
    
trained_model = train(pretrained_model, criterion, optimizer, dataloaders, device, epochs, checkpoint_prefix, print_every=20)
print('%.2f seconds taken for model training' % (time.time() - training_start))

Epoch: 1/2.. Step: 20.. Training Loss: 0.096.. Test Loss: 0.985.. Test Accuracy: 0.750.. Batch Time: 287.100, avg: 14.355
Epoch: 1/2.. Step: 40.. Training Loss: 0.191.. Test Loss: 0.967.. Test Accuracy: 0.778.. Batch Time: 379.257, avg: 9.481
Epoch: 1/2.. Step: 60.. Training Loss: 0.283.. Test Loss: 0.956.. Test Accuracy: 0.778.. Batch Time: 481.504, avg: 8.025
Epoch: 1/2.. Step: 80.. Training Loss: 0.373.. Test Loss: 0.940.. Test Accuracy: 0.779.. Batch Time: 619.689, avg: 7.746
Epoch: 1/2.. Step: 100.. Training Loss: 0.461.. Test Loss: 0.901.. Test Accuracy: 0.802.. Batch Time: 780.732, avg: 7.807
Epoch: 1/2.. Step: 120.. Training Loss: 0.547.. Test Loss: 0.884.. Test Accuracy: 0.804.. Batch Time: 874.772, avg: 7.290
Epoch: 1/2.. Step: 140.. Training Loss: 0.631.. Test Loss: 0.879.. Test Accuracy: 0.788.. Batch Time: 988.440, avg: 7.060
Epoch: 1/2.. Step: 160.. Training Loss: 0.713.. Test Loss: 0.843.. Test Accuracy: 0.802.. Batch Time: 1098.013, avg: 6.863
Epoch: 1/2.. Step: 180.. T

In [None]:
test_loader = dataloaders['test']
test_accuracy = test_model(trained_model, test_loader, device)
print(test_accuracy)