## Imports and Constant Definitions

In [1]:
import numpy as np
import os
import pandas as pd
import torch
import torchvision

from joblib import dump
from PIL import Image
from tqdm.auto import tqdm

In [2]:
SEED = 42
IMG_SIZE = (256, 256)
IMG_DIR = './data/data'
LABEL_TO_INT = {'NORMAL': 0, 'COVID': 1, 'PNEUMONIA': 2}
INT_TO_LABEL = {v: k for k, v in LABEL_TO_INT.items()}

_ = torch.manual_seed(SEED) # for reproducibility

## Data Loading

Load the training data

In [3]:
class Dataset(torch.utils.data.Dataset):
    
    def __init__(self, metadata, img_dir, img_size):
        super().__init__()
        
        # Load images in memory
        self.x, self.y = [], []
        for _, row in tqdm(metadata.iterrows(), total=len(metadata)):
            path = os.path.join(img_dir, row['image_id'])
            img = Image.open(path)
            if len(img.getbands()) != 3:
                print(f"Skipping {row['image_id']} because it does not have 3 channels.")
                continue
            img = img.resize(img_size, Image.Resampling.BILINEAR)
            self.x.append(img)
            self.y.append(LABEL_TO_INT[row['label']])
        
        # Define the transforms used during training
        self.transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.RandomHorizontalFlip(),
        ])
    
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        return self.transforms(self.x[idx]), self.y[idx]

In [4]:
train = pd.read_csv('./data/train.csv')
train_set = Dataset(train, IMG_DIR, IMG_SIZE)

  0%|          | 0/3659 [00:00<?, ?it/s]

Skipping 4210714.png because it does not have 3 channels.


## Model Definition

We simply use some pretrained models. We just change the last layer to fit the number of possible labels.

In [5]:
class Net(torch.nn.Module):
    
    def __init__(self, backbone_name, num_classes, bias):
        super().__init__()
        
        if backbone_name == 'mobilenet_v2':
            print('Using mobilenet_v2 as backbone.')
            weights = torchvision.models.MobileNet_V2_Weights.IMAGENET1K_V1
            self.backbone = torchvision.models.mobilenet_v2(
                weights=weights,
                progress=False,
            )
            self.backbone.classifier[-1] = torch.nn.Linear(
                in_features=self.backbone.classifier[-1].in_features, out_features=num_classes, bias=bias
            )
        elif backbone_name == 'googlenet':
            print('Using googlenet as backbone.')
            weights = torchvision.models.GoogLeNet_Weights.IMAGENET1K_V1
            self.backbone = torchvision.models.googlenet(
                weights=weights,
                progress=False,
            )
            self.backbone.fc = torch.nn.Linear(
                in_features=self.backbone.fc.in_features, out_features=num_classes, bias=bias
            )
        elif backbone_name == 'efficientnet_b2':
            print('Using efficientnet_b2 as backbone.')
            weights = torchvision.models.EfficientNet_B2_Weights.IMAGENET1K_V1
            self.backbone = torchvision.models.efficientnet_b2(
                weights=weights,
                progress=False,
            )
            self.backbone.classifier[-1] = torch.nn.Linear(
                in_features=self.backbone.classifier[-1].in_features, out_features=num_classes, bias=bias
            )
        else:
            raise Exception(f"Unknown backbone {backbone_name}.")
    
    def forward(self, inputs):
        return self.backbone(inputs)

The final model is a stacking of a few trained models.

In [6]:
class Stacking(torch.nn.Module):
    
    def __init__(self, models):
        super().__init__()
        self.models = models
        self.n_models = len(self.models)
        
    def forward(self, inputs):
        out = None
        with torch.no_grad():
            for model in self.models:
                model.eval()
                if out is None:
                    out = torch.softmax(model(inputs), dim=-1)
                else:
                    out += torch.softmax(model(inputs), dim=-1)
        return out / self.n_models

## Training

This is where we actually train the model

In [7]:
def fit(dataset, backbone_name, device, batch_size, n_epochs, lr, bias):
    loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    net = Net(backbone_name=backbone_name, num_classes=len(LABEL_TO_INT), bias=bias).to(device)
    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam([p for p in net.parameters() if p.requires_grad], lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=int(.9 * n_epochs), gamma=.1)
    
    for epoch in tqdm(range(n_epochs)):
        net.train()
        for i, (batch_x, batch_y) in enumerate(loader):
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            batch_loss = loss_fn(net(batch_x), batch_y)
            batch_loss.backward()
            optimizer.step()
        scheduler.step()
        
    return net

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
models = []
for backbone_name in ['mobilenet_v2', 'googlenet', 'efficientnet_b2']:
    model = fit(train_set, backbone_name, device, batch_size=64, n_epochs=40, lr=1e-4, bias=False)
    dump(model, f"{backbone_name}.joblib")
    models.append(model)

final_model = Stacking(models)
dump(final_model, "final_model.joblib")

Using mobilenet_v2 as backbone.


  0%|          | 0/40 [00:00<?, ?it/s]

Using googlenet as backbone.


  0%|          | 0/40 [00:00<?, ?it/s]

Using efficientnet_b2 as backbone.


  0%|          | 0/40 [00:00<?, ?it/s]

['final_model.joblib']

## Predictions

In [9]:
test = pd.read_csv('./data/test.csv')

transforms = torchvision.transforms.ToTensor()
predictions = []
for fname in tqdm(test['image_id']):
    path = os.path.join(IMG_DIR, fname)
    img = Image.open(path).resize(IMG_SIZE, Image.Resampling.BILINEAR)
    img = transforms(img).to(device)
    img = torch.unsqueeze(img, dim=0) # add batch dim
    y_pred = torch.argmax(final_model(img)[0]).cpu().item()
    predictions.append(INT_TO_LABEL[y_pred])

test['label'] = predictions
test[['trustii_id', 'label']].to_csv('submission.csv', index=False)

  0%|          | 0/1569 [00:00<?, ?it/s]