In [3]:
!kaggle competitions download -c 'Chest-x-ray-image-classification'
!unzip Chest-x-ray-image-classification.zip

In [36]:
%matplotlib inline
import random
from IPython.display import clear_output
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import densenet169
from torch.utils.data import Dataset
from torchvision.io import read_image
import torchvision.transforms as T
from torch.optim.lr_scheduler import StepLR

import wandb
from torchmetrics import AUROC
from tqdm.notebook import tqdm

def set_random_seed(seed):
    torch.backends.cudnn.deterministic = True
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    random.seed(seed)

In [7]:
names_of_deceases = ['Cardiomegaly', 'Edema', 'Consolidation', 'Atelectasis', 'Pleural Effusion']

class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None, train=True):
        df = pd.read_csv(annotations_file)
        self.img_labels = df[names_of_deceases].values
        self.img_locations = df['Image'].values
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return self.img_labels.shape[0]

    def __getitem__(self, idx):
        image = read_image(f'{self.img_dir}/{self.img_locations[idx]}') / 255
        label = self.img_labels[idx]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label



mean, std = 0.506, 0.289
transform_train = T.Compose([
    T.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0)),
    T.RandomHorizontalFlip(p=0.5),
    T.Normalize(mean=(mean), std=(std)),
    T.Lambda(lambda x: x.repeat(3, 1, 1))
])

training_data = CustomImageDataset(
    annotations_file='data/train.csv',
    img_dir='data',
    transform=transform_train
)

batch_size = 128
train_loader = torch.utils.data.DataLoader(
    training_data,
    batch_size=batch_size,
    shuffle=True,
    num_workers=8,
    pin_memory=True,
)

In [20]:
criterion = nn.BCEWithLogitsLoss()
aucroc = AUROC(num_labels=5, task='multilabel')
scaler = torch.cuda.amp.GradScaler()

def train_epoch(model, optimizer, record):
    model.train()
    for batch_num, (data, target) in tqdm(enumerate(train_loader), total=len(train_loader)):
        data = data.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            output = model(data)
        preds = torch.sigmoid(output).detach()
        auc_roc = aucroc(preds, target.to(torch.int)).item()
        loss = criterion(output, target)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        loss = loss.item()
        if record:
            wandb.log({
                "train/loss": loss,
                "train/auc_roc": auc_roc
            })
    
def train(model, opt, n_epochs, scheduler=None, record=False, config=None, run_name=None, PATH='model'):
    if record:
        wandb.login()
        wandb.init(
            project="HSE_DL_BHW1",
            config=config
        )
        if run_name is not None:
            wandb.run.name = run_name
    for epoch in range(n_epochs):
        print("Epoch {0} of {1}".format(epoch, n_epochs))
        train_epoch(model, opt, record)
        torch.save(model.state_dict(), f'{PATH}_{epoch}.pth')
        scheduler.step()
    if record:
        wandb.finish()

In [14]:
set_random_seed(3407)
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
model = densenet169(weights=None)
model.classifier = nn.Linear(1664, 5)
model = model.to(device)
print('model params:', sum(p.numel() for p in model.parameters()))

model params: 12492805


In [15]:
num_epochs = 5
lr = 1e-3
weight_decay = 1e-4
opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
scheduler = StepLR(opt, 3)

config = {
    "model": "densenet169",
    "epochs": num_epochs,
    "batch_size": batch_size,
    "optimizer": "Adam",
    "lr": lr,
    "weight_decay": weight_decay,
    "sheduler": "StepLR"
}

In [20]:
train(model, opt, num_epochs, scheduler, record=True, config=config)

  0%|          | 0/1343 [00:00<?, ?it/s]



Epoch 3 of 5
1343
Epoch 4 of 5
1343


  0%|          | 0/1343 [00:00<?, ?it/s]

0,1
train/auc_roc,▂▁▄▆▁▃▃▄▄▄▅▆▄▄▅▅▃▄▃▄▆▄▄▄▆▅▆▅▆▅▄▆▆█▇▆▆▄▄▆
train/loss,▇▆▅▅▇▅▅▄▇▇▅▄▄█▄█▆▇▆▄▄▆▂▄▄▄▃▄▂▄▆▂▆▁▄▃▂▅▅▅

0,1
train/auc_roc,0.82857
train/loss,0.613


# Composer

In [27]:
import composer.functional as cf
from composer.algorithms.colout import ColOutTransform
from composer.algorithms.augmix import AugmentAndMixTransform


augmix_transform = AugmentAndMixTransform(severity=3,
                                          width=3,
                                          depth=-1,
                                          alpha=1.0,
                                          augmentation_set="all")
colout_transform = ColOutTransform(p_row=0.15, p_col=0.15)

transform_train = T.Compose([
    T.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0)),
    T.RandomHorizontalFlip(p=0.5),
    T.Normalize(mean=(mean), std=(std)),
    augmix_transform,
    colout_transform,
    T.Lambda(lambda x: x.repeat(3, 1, 1))
])

training_data = CustomImageDataset(
    annotations_file='data/train.csv',
    img_dir='data',
    transform=transform_train
)

In [12]:
set_random_seed(3407)
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
model = densenet169(weights=None)
model.classifier = nn.Linear(1664, 5)
model = model.to(device)

num_epochs = 5
lr = 1e-3
weight_decay = 1e-4
opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
scheduler = StepLR(opt, 3)

config = {
    "model": "densenet169",
    "epochs": num_epochs,
    "batch_size": batch_size,
    "optimizer": "Adam",
    "lr": lr,
    "weight_decay": weight_decay,
    "sheduler": "StepLR"
}

cf.apply_squeeze_excite(
    model,
    optimizers=opt,
    min_channels=128,
    latent_channels=64
)

cf.apply_blurpool(
    model,
    optimizers=opt,
    replace_convs=True,
    replace_maxpools=True,
    blur_first=True
)

cf.apply_channels_last(model)

cf.apply_gyro_dropout(
    model,
    iters_per_epoch = 196,
    max_epoch = 100,
    p = 0.5,
    sigma = 256,
    tau = 16,
)



In [22]:
train(model, opt, num_epochs, scheduler, record=True, config=config, PATH='compose')

  0%|          | 0/1343 [00:00<?, ?it/s]

  0%|          | 0/1343 [00:00<?, ?it/s]

  0%|          | 0/1343 [00:00<?, ?it/s]

  0%|          | 0/1343 [00:00<?, ?it/s]

Epoch 0 of 5
Epoch 1 of 5
Epoch 2 of 5
Epoch 3 of 5
Epoch 4 of 5


  0%|          | 0/1343 [00:00<?, ?it/s]



0,1
train/auc_roc,▁▃▃▂▄▄▄▄▄▄▄▆▅▅▅▃▅▃▅▄▄▅▅▆▅▅▇▅▆▆▇▅▆▃▆▆█▅▆▆
train/loss,██▇█▆▅█▇▅▅▆▄▇▂▆▆▅▆▃▃▆▂▂▄▄▂▂▆▃▅▄▄▃▅▄▃▁▂▄▂

0,1
train/auc_roc,0.57357
train/loss,0.49094


# Predictions

In [23]:
import os

class CustomImageDatasetTest(Dataset):
    def __init__(self, img_dir, transform=None, target_transform=None):
        self.img_locations = os.listdir('data/valid')
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_locations)

    def __getitem__(self, idx):
        image = read_image(f'{self.img_dir}/{self.img_locations[idx]}') / 255
        # print(image.shape)
        label = self.img_locations[idx]
        if self.transform:
            image = self.transform(image)
        return image, label
        
transform_test = T.Compose([
    T.CenterCrop(size=(224, 224)),
    T.Normalize(mean=(mean), std=(std)),
    T.Lambda(lambda x: x.repeat(3, 1, 1))
])


testing_data_real = CustomImageDatasetTest(
    img_dir='data/valid',
    transform=transform_test
)

In [24]:
test_loader_real = torch.utils.data.DataLoader(
    testing_data_real,
    batch_size=batch_size,
    shuffle=False,
    num_workers=8,
    pin_memory=True,
)

In [25]:
def make_preds(name):
    model.eval()
    Image, Cardiomegaly, Edema, Consolidation, Atelectasis, Pleural_Effusion = [], [], [], [], [], []
    for batch_num, (data, label) in tqdm(enumerate(test_loader_real), total=len(test_loader_real)):
        data = data.to(device)
        with torch.no_grad():
            with torch.cuda.amp.autocast():
                output = model(data)
                pred = torch.nn.Sigmoid()(output)

        Image.extend(label)
        Cardiomegaly.extend(pred[:, 0].tolist())
        Edema.extend(pred[:, 1].tolist())
        Consolidation.extend(pred[:, 2].tolist())
        Atelectasis.extend(pred[:, 3].tolist())
        Pleural_Effusion.extend(pred[:, 4].tolist())
    df = pd.DataFrame({
        'Image': [f'valid/{name}' for name in Image],
        'Cardiomegaly': Cardiomegaly,
        'Edema': Edema,
        'Consolidation': Consolidation,
        'Atelectasis': Atelectasis,
        'Pleural Effusion': Pleural_Effusion
    })
    df.to_csv(f'{name}.csv', index=False)

In [26]:
make_preds('composed')
!kaggle competitions submit -c chest-x-ray-image-classification -f composed.csv -m "composed"

  0%|          | 0/151 [00:00<?, ?it/s]

Successfully submitted to Chest x-ray image classification

100%|██████████| 1.69M/1.69M [00:02<00:00, 864kB/s] 


# Results

* Code can be updated in 5 minutes

* https://wandb.ai/messlav/HSE_DL_BHW1

* The same time

* Similar train metrics

* Increased AUROC on test by 0.03 (huge improvement)