In [75]:
# imports
import torch
from torch import nn
from torchvision.models import resnet18
from torch.utils.data import Dataset
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import platform


# vis
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
import sklearn.metrics
from math import ceil

In [50]:
# import dataset
train = pd.read_csv('../input/train.csv')

In [51]:
class Config:
    train_pcent = 0.8
    TRAIN_BS = 32
    VALID_BS = 16
    NB_EPOCHS = 1
    model_name = 'NN'

# Dataset

In [64]:
class SIIM(Dataset):
    def __init__(self, df, is_train=True, augments=None):
        super().__init__()
        # random sample data
        self.df = df.sample(frac=1).reset_index(drop=True)
        # training or validation
        self.is_train = is_train
        # augmentations
        self.augments = augments
    
    @staticmethod
    def get_image(image_path):
        return ''
    
    def __getitem__(self, idx):
        # retrieve image
        image_id = self.df['StudyInstanceUID'].values[idx]
        image_path = '' # self.df['path'].values[idx]
        # get image (obviously change in real implementation)
        image = self.get_image(image_path)
        
        # Augments
        if self.augments:
            image = self.augments(image=image)
        else:
            image = torch.tensor(image)
            
        # if train
        if self.is_train:
            label = self.df[self.df['StudyInstanceUID'] == image_id].values.tolist()[0][4:-1]
            return image, torch.tensor(label)

        return image
    
    def __len__(self):
        return(len(self.df.id))

# Model

Fake model function that should output a value of 1 for either:

<code>'negative', 'typical', 'indeterminate', 'atypical'</code>

In [53]:
class NN(nn.Module):
    def __init__(self, num_classes=4):
        super(NN, self).__init__()
        # model spec
        self.out = nn.Sigmoid()
        self.num_classes = num_classes
    
    def forward(self, X):
        # batch size
        batch_size = X.shape[0]
        # create fake data
        rand = torch.randn([batch_size, 4])
        return self.out(rand)

# Trainer

In [77]:
class Trainer:
    def __init__(self, train_dataloader, valid_dataloader, model,
                optimiser, loss_fn, val_loss_fn, device='cpu'):
        """
        Constructor for Trainer class
        """
        self.train = train_dataloader
        self.valid = valid_dataloader
        self.model = model
        self.optim = optimiser
        self.loss_fn = loss_fn
        self.val_loss_fn = val_loss_fn
        self.device = device
        
    def train_one_cycle(self):
        """
        Run one epoch of training, backpropogation and optimisation.
        """
            
        # model train mode
        model.train()
        
        # progress bar
        train_prog_bar = tqdm(self.train, total=len(self.train))
        
        # stats
        all_train_labels = []
        all_train_preds = []
        running_loss = 0
        
        with torch.set_grad_enabled(True):
            for xtrain, ytrain in train_prog_bar:
                # send to devices
                xtrain = xtrain.to(self.device).float()
                ytrain = ytrain.to(self.device).float()

                # get predictions
                pred = model(xtrain)

                # training
                train_loss = self.loss_fn(pred, ytrain)

                # Backpropogation
                if self.optimiser:
                    self.optimiser.zero_grad()
                    self.optimiser.step()
                train_loss.backward()
                # For averaging and reporting later
                running_loss += train_loss
                
                # convert predictions to numpy
                train_predictions = torch.argmax(pred, 1).detach().cpu().numpy()
                train_labels = ytrain.detach().cpu().numpy()
                
                # append to stats
                all_train_labels += [train_predictions]
                all_train_preds += [train_labels]

                # show the current loss to the progress bar
                train_pbar_desc = f'loss: {train_loss.item():.4f}'
                train_prog_bar.set_description(desc=train_pbar_desc)
                
                # average the running loss over all batches and return
                train_running_loss = running_loss / len(self.train)
                print(f"Final Training Loss: {train_running_loss:.4f}")
                
                # free memory
                del all_train_labels, all_train_preds, train_predictions, train_labels, xtrain, ytrain, pred
                # free up cache
                torch.cuda.empty_cache()
                
                return(train_running_loss)
        

    def valid_one_cycle(self):
        """
        Run one epoch of prediction.
        """
            
        # model eval mode
        model.eval()
        
        # progress bar
        valid_prog_bar = tqdm(self.valid, total=len(self.train))
        
        # stats
        all_valid_labels = []
        all_valid_preds = []
        running_loss = 0
        
        with torch.no_grad():
            for xtrain, ytrain in valid_prog_bar:
                # send to devices
                xtrain = xtrain.to(self.device).float()
                ytrain = ytrain.to(self.device).float()

                # get predictions
                pred = model(xtrain)

                # training
                val_loss = self.val_loss_fn(pred, ytrain)

                # Backpropogation
                if self.optimiser:
                    self.optimiser.zero_grad()
                    self.optimiser.step()
                    
                train_loss.backward()
    

                # For averaging and reporting later
                running_loss += val_loss.item()
                
                # convert predictions to numpy
                val_pred = torch.argmax(pred, 1).detach().cpu().numpy()
                val_label = ytrain.detach().cpu().numpy()
                
                # append to stats
                all_valid_labels += [val_label]
                all_valid_preds += [val_pred]

                # show the current loss to the progress bar
                valid_pbar_desc = f'loss: {val_loss.item():.4f}'
                valid_prog_bar.set_description(desc=valid_pbar_desc)
                
                # average the running loss over all batches and return
                final_loss_val = running_loss / len(self.train)
                
                # Get Validation Accuracy
                all_valid_labels = np.concatenate(all_valid_labels)
                all_valid_preds = np.concatenate(all_valid_preds)
                
                
                
                print(f"Final Training Loss: {train_running_loss:.4f}")
                
                # Free up memory
                del all_valid_labels, all_valid_preds, val_label, val_pred, xval, yval, pred
                # free cache
                torch.cuda.empty_cache()
                
                return(final_loss_val, model)            
            

# Cycle

In [78]:
# Training Cycle
nb_training_samples = int(Config.train_pcent * train.id.shape[0])
train_data = train[:nb_training_samples]
valid_data = train[nb_training_samples:]

print(f"[INFO] Training on {train_data.shape[0]} samples ({int(Config.train_pcent*100)}%) and validation on {valid_data.shape[0]} ({ceil(abs(1 - Config.train_pcent) * 100)}%) samples")

if torch.cuda.is_available():
        print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
        DEVICE = torch.device('cuda:0')
else:
    print("\n[INFO] GPU not found. Using CPU: {}\n".format(platform.processor()))
    DEVICE = torch.device('cpu')
        

# Make Training and Validation Datasets
training_set = SIIM(
    df=train_data
)

validation_set = SIIM(
    df=valid_data
)

train_loader = DataLoader(
    training_set,
    batch_size=Config.TRAIN_BS,
    shuffle=True,
    num_workers=8,
    pin_memory=True
)

valid_loader = DataLoader(
    validation_set,
    batch_size=Config.VALID_BS,
    shuffle=False,
    num_workers=8
)

if "NN" in Config.model_name in Config.model_name:        
    model = NN().to(DEVICE)
    
else:
    raise RuntimeError("Must specify a valid model type to train.")

print(f"Training Model: {Config.model_name}")

optim = None # torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.001)
loss_fn_train = nn.BCELoss()
loss_fn_val = nn.BCELoss()

trainer = Trainer(
    train_dataloader=train_loader,
    valid_dataloader=valid_loader,
    model=model,
    optimiser=optim,
    loss_fn=loss_fn_train,
    val_loss_fn=loss_fn_val,
    device=DEVICE,
)

train_losses_eff = []
valid_losses_eff = []

for epoch in range(Config.NB_EPOCHS):
    print(f"{'-'*20} EPOCH: {epoch+1}/{Config.NB_EPOCHS} {'-'*20}")

    # Run one training epoch
    current_train_loss = trainer.train_one_cycle()
    train_losses_eff.append(current_train_loss)

    # Run one validation epoch
    current_val_loss, op_model = trainer.valid_one_cycle()
    valid_losses_eff.append(current_val_loss)

    # Empty CUDA cache
    torch.cuda.empty_cache()

  0%|          | 0/159 [00:00<?, ?it/s]

[INFO] Training on 5067 samples (80%) and validation on 1267 (20%) samples

[INFO] GPU not found. Using CPU: i386

Training Model: NN
-------------------- EPOCH: 1/1 --------------------


  0%|          | 0/159 [00:35<?, ?it/s]


KeyboardInterrupt: 