In [1]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0


# Transfer learning experiments

This notebook includes:
1) Implement a transfer learning from ImageNet > 13 diseases > 1 disease. 
2) Compared performance against ImageNet > 1 disease and ImageNet > MURA > 1 disease 

Results for disease corresponding to index 10.

## Imports & global variables

We choose `idx=10` because from previous experiments we have detected a strong signal and a sufficient amount of positive cases. 

In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import sys; sys.path.append("..")
import warnings; warnings.filterwarnings('ignore')

from core import * 
from data_manipulation import Transform, RandomRotation, Flip, RandomCrop, multi_label_2_binary, balance_obs, DataBatches
from utils import save_model, load_model, lr_loss_plot, resize
from architectures import DenseNet121
from train_functions import OptimizerWrapper, TrainingPolicy, FinderPolicy, validate_multilabel, lr_finder, validate_binary, TTA_binary

seed = 42
r_pix = 8
BATCH_SIZE = 16
EPOCHS = 10
TRANSFORMATIONS = [RandomRotation(arc_width=20), Flip(), RandomCrop(r_pix=r_pix)]
PRETRAINED = True
NORMALIZE = True # ImageNet
FREEZE = True
GRADUAL_UNFREEZING = True
DATA = '14diseases'
N_SAMPLES = 1100


BASE_PATH = Path('../..')
PATH = BASE_PATH/'data'
SAVE_RESULTS = './results/'
SAVE_MODELS = './models'
IMG_FOLDER = PATH/'ChestXRay-250'

DISEASE = 'Emphysema'
tgt2idx = {disease: i for i, disease in enumerate([ 'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia',
               'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia'])}

# Downstream task - 1 label (small data)

Balance training and testing.

In [3]:
def train(n_epochs, train_dl, valid_dl, model, max_lr=.01, wd=0, alpha=1./ 3,
          save_path=None, unfreeze_during_loop:tuple=None):
    
    if unfreeze_during_loop:
        total_iter = n_epochs*len(train_dl)
        first_unfreeze = int(total_iter*unfreeze_during_loop[0])
        second_unfreeze = int(total_iter*unfreeze_during_loop[1])

    best_loss = np.inf
    cnt = 0
    
    policy = TrainingPolicy(n_epochs=n_epochs, dl=train_dl, max_lr=max_lr)
    optimizer = OptimizerWrapper(model, policy, wd=wd, alpha=alpha)

    for epoch in tqdm_notebook(range(n_epochs), ):
        model.train()
        agg_div = 0
        agg_loss = 0
        train_dl.set_random_choices()
        for x, y in tqdm_notebook(train_dl, leave=False):

            if unfreeze_during_loop:
                if cnt == first_unfreeze: model.unfreeze(1)
                if cnt == second_unfreeze: model.unfreeze(0)

            out = model(x)
            loss = F.binary_cross_entropy_with_logits(input=out.squeeze(), target=y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            batch = y.shape[0]
            agg_loss += batch*loss.item()
            agg_div += batch
            cnt += 1


        val_loss, measure, _ = validate_binary(model, valid_dl)
        print(f'Ep. {epoch+1} - train loss {agg_loss/agg_div:.4f} -  val loss {val_loss:.4f} AUC {measure:.4f}')

        if save_path and val_loss < best_loss:
            save_model(model, save_path)
            best_loss = val_loss

# Comparison on the Downstream task

In [4]:
train_df = pd.read_csv(PATH/'train_df.csv')
valid_df = pd.read_csv(PATH/"val_df.csv")
test_df = pd.read_csv(PATH/"test_df.csv")

train_df = multi_label_2_binary(train_df, tgt2idx[DISEASE])
train_df = train_df.sample(frac=1, random_state=42)

valid_df = multi_label_2_binary(valid_df, tgt2idx[DISEASE])
valid_df = balance_obs(valid_df, amt=None, rate_positive=.5)

test_df = multi_label_2_binary(test_df, tgt2idx[DISEASE])
test_df = balance_obs(test_df, amt=None, rate_positive=.5)

In [5]:
# len(sample_train_df),len(valid_df),len(test_df)

## Without Mixup

In [6]:
p_positive = .30
N = int(p_positive*100)
sample_train_df = balance_obs(train_df, amt=50, rate_positive=p_positive)

In [7]:

train_dl = DataBatches(df=sample_train_df, transforms=TRANSFORMATIONS, shuffle=True,
                       img_folder_path=IMG_FOLDER, batch_size=BATCH_SIZE, data='Pneumonia',
                       r_pix=r_pix, normalize=NORMALIZE, seed=seed, mixup=False)

valid_dl = DataBatches(df=valid_df, transforms=None, shuffle=False,
                       img_folder_path=IMG_FOLDER, batch_size=BATCH_SIZE, data='Pneumonia',
                       r_pix=r_pix, normalize=NORMALIZE, seed=seed, mixup=False)

test_dl = DataBatches(df=test_df, transforms=TRANSFORMATIONS, shuffle=False, 
                      img_folder_path=IMG_FOLDER, batch_size=BATCH_SIZE, data='Pneumonia',
                      r_pix=r_pix, normalize=NORMALIZE, seed=seed, mixup=False)

### ImageNet

In [9]:
drop_rate = 0 # Not good for CNN
model = DenseNet121(out_size=1, pretrained=True, freeze=True, drop_rate=drop_rate).cuda()
model_p = f'./models/best_{N}_emphysema_imagenet.pth'
epochs = 4
train(epochs, train_dl, valid_dl, model, max_lr=1e-3, save_path=None, 
      unfreeze_during_loop = (.1,.2))
TTA_binary(model, test_dl)

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 1 - train loss 0.7087 -  val loss 0.8219 AUC 0.6151


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 2 - train loss 0.5210 -  val loss 1.0829 AUC 0.7092


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 3 - train loss 0.3589 -  val loss 1.1945 AUC 0.7385


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 4 - train loss 0.2846 -  val loss 1.1146 AUC 0.7528

TTA loss 1.0447  auc 0.6321  accuracy 0.5014


(1.0447034310389307, 0.6321103520783091, 0.5013723696248856)

### MURA

In [17]:
drop_rate = 0 # Not good for CNN
model = DenseNet121(out_size=1, pretrained='MURA', freeze=True, drop_rate=drop_rate).cuda()
model_p = f'./models/best_{N}_emphysema_MURA.pth'
epochs = 5
train(epochs, train_dl, valid_dl, model, max_lr=1e-3, save_path=None, 
      unfreeze_during_loop = (.1,.2))
TTA_binary(model, test_dl)

HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 1 - train loss 0.6224 -  val loss 0.7037 AUC 0.5595


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 2 - train loss 0.5787 -  val loss 0.6381 AUC 0.7470


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 3 - train loss 0.3324 -  val loss 0.6640 AUC 0.7712


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 4 - train loss 0.1848 -  val loss 0.7291 AUC 0.7746


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 5 - train loss 0.1457 -  val loss 0.7101 AUC 0.7779

TTA loss 0.7848  auc 0.6273  accuracy 0.5489


(0.7847948747892393, 0.6272796444813498, 0.5489478499542544)

### CheXpert

In [29]:
drop_rate = 0 # Not good for CNN
model = DenseNet121(out_size=1, pretrained='chexpert', freeze=True, drop_rate=drop_rate).cuda()
model_p = f'./models/best_{N}_emphysema_chexpert.pth'
epochs = 15
train(epochs, train_dl, valid_dl, model, 
      max_lr=7e-4, save_path=None, 
      unfreeze_during_loop = (.1,.2))
TTA_binary(model, test_dl)

HBox(children=(IntProgress(value=0, max=15), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 1 - train loss 0.6888 -  val loss 0.7128 AUC 0.4941


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 2 - train loss 0.6889 -  val loss 0.6969 AUC 0.5502


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 3 - train loss 0.6765 -  val loss 0.6814 AUC 0.6857


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 4 - train loss 0.6543 -  val loss 0.6705 AUC 0.7162


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 5 - train loss 0.6227 -  val loss 0.6651 AUC 0.7365


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 6 - train loss 0.5945 -  val loss 0.6626 AUC 0.7535


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 7 - train loss 0.5697 -  val loss 0.6637 AUC 0.7706


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 8 - train loss 0.5435 -  val loss 0.6711 AUC 0.7714


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 9 - train loss 0.5401 -  val loss 0.6733 AUC 0.7731


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 10 - train loss 0.5236 -  val loss 0.6811 AUC 0.7759


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 11 - train loss 0.5065 -  val loss 0.6701 AUC 0.7871


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 12 - train loss 0.5112 -  val loss 0.6641 AUC 0.7944


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 13 - train loss 0.5001 -  val loss 0.6630 AUC 0.7958


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 14 - train loss 0.5068 -  val loss 0.6625 AUC 0.7983


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 15 - train loss 0.5005 -  val loss 0.6622 AUC 0.7976

TTA loss 0.6764  auc 0.7283  accuracy 0.5137


(0.6764278869261353, 0.7282854629267675, 0.5137236962488564)

In [37]:
drop_rate = 0 # Not good for CNN
model = DenseNet121(out_size=1, pretrained='chexpert', freeze=True, drop_rate=drop_rate).cuda()
model_p = f'./models/best_{N}_emphysema_chexpert.pth'
epochs = 15
train(epochs, train_dl, valid_dl, model, 
      max_lr=7e-4, save_path=None, 
      unfreeze_during_loop = (.1,.2))
TTA_binary(model, test_dl)

HBox(children=(IntProgress(value=0, max=15), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 1 - train loss 0.6851 -  val loss 0.6965 AUC 0.5302


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 2 - train loss 0.6783 -  val loss 0.6917 AUC 0.5932


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 3 - train loss 0.6687 -  val loss 0.6818 AUC 0.6709


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 4 - train loss 0.6506 -  val loss 0.6756 AUC 0.6895


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 5 - train loss 0.6321 -  val loss 0.6762 AUC 0.7056


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 6 - train loss 0.6097 -  val loss 0.6763 AUC 0.7211


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 7 - train loss 0.5952 -  val loss 0.6770 AUC 0.7258


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 8 - train loss 0.5759 -  val loss 0.6830 AUC 0.7248


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 9 - train loss 0.5543 -  val loss 0.6899 AUC 0.7209


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 10 - train loss 0.5502 -  val loss 0.6940 AUC 0.7287


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 11 - train loss 0.5350 -  val loss 0.6974 AUC 0.7366


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 12 - train loss 0.5291 -  val loss 0.6974 AUC 0.7391


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 13 - train loss 0.5422 -  val loss 0.6990 AUC 0.7397


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 14 - train loss 0.5317 -  val loss 0.6951 AUC 0.7419


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 15 - train loss 0.5248 -  val loss 0.6914 AUC 0.7411

TTA loss 0.7158  auc 0.6213  accuracy 0.5000


(0.7157682032688212, 0.6213364762369533, 0.5)

In [36]:
drop_rate = 0 # Not good for CNN
model = DenseNet121(out_size=1, pretrained='chexpert', freeze=True, drop_rate=drop_rate).cuda()
model_p = f'./models/best_{N}_emphysema_chexpert.pth'
epochs = 3
train(epochs, train_dl, valid_dl, model, 
      max_lr=9e-4, save_path=None, 
      unfreeze_during_loop = (.1,.2))
TTA_binary(model, test_dl)

HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 1 - train loss 0.7097 -  val loss 0.6614 AUC 0.5887


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 2 - train loss 0.6813 -  val loss 0.6629 AUC 0.6251


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 3 - train loss 0.6547 -  val loss 0.6658 AUC 0.6464

TTA loss 0.6892  auc 0.6085  accuracy 0.5531


(0.6892017307542162, 0.6084958845652573, 0.5530649588289113)

It is important to remember that data is the best regularization technique. When small data is available the sensibility of the model's performance to different hyper-parameters and random components increases. For instance, during our experiments we saw a jump in performance from .62 to .72 AUC when running twice the same code.

If manually training we advise to look at the general trend of the target validation metric. It is most likely to do not monotonically increase / decrease.

### 13 diseases

In [None]:
# drop_rate = 0 # Not good for CNN
# model = DenseNet121(out_size=1, pretrained='13diseases', freeze=True, drop_rate=drop_rate).cuda()
# model_p = f'./models/best_{N}_emphysema_13diseases.pth'
# epochs = 7
# train(epochs, train_dl, valid_dl, model, max_lr=1e-3, save_path=None, 
#       unfreeze_during_loop = (.1,.2))
# TTA_binary(model, test_dl)

HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 1 - train loss 0.6256 -  val loss 0.6406 AUC 0.7954


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 2 - train loss 0.5775 -  val loss 0.6467 AUC 0.7996


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 3 - train loss 0.5075 -  val loss 0.7184 AUC 0.8019


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 4 - train loss 0.4973 -  val loss 0.7646 AUC 0.8120


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 5 - train loss 0.4279 -  val loss 0.7152 AUC 0.8148


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

Ep. 6 - train loss 0.4019 -  val loss 0.6766 AUC 0.8141


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

## With Mixup

In [None]:
train_dl = DataBatches(df=sample_train_df, transforms=TRANSFORMATIONS, shuffle=True,
                       img_folder_path=IMG_FOLDER, batch_size=BATCH_SIZE, data='Pneumonia',
                       r_pix=r_pix, normalize=NORMALIZE, seed=seed, mixup=True)

valid_dl = DataBatches(df=valid_df, transforms=None, shuffle=False,
                       img_folder_path=IMG_FOLDER, batch_size=BATCH_SIZE, data='Pneumonia',
                       r_pix=r_pix, normalize=NORMALIZE, seed=seed, mixup=False)

test_dl = DataBatches(df=test_df, transforms=TRANSFORMATIONS, shuffle=False, 
                      img_folder_path=IMG_FOLDER, batch_size=BATCH_SIZE, data='Pneumonia',
                      r_pix=r_pix, normalize=NORMALIZE, seed=seed, mixup=False)

### ImageNet

In [None]:
drop_rate = 0 # Not good for CNN
model = DenseNet121(out_size=1, pretrained=True, freeze=True, drop_rate=drop_rate).cuda()
model_p = f'./models/best_{N}_emphysema_imagenet_mixup.pth'
epochs = 6
train(epochs, train_dl, valid_dl, model, max_lr=1e-3, save_path=None, 
      unfreeze_during_loop = (.1,.2))
TTA_binary(model, test_dl)

### MURA

In [None]:
drop_rate = 0 # Not good for CNN
model = DenseNet121(out_size=1, pretrained='MURA', freeze=True, drop_rate=drop_rate).cuda()
model_p = f'./models/best_{N}_emphysema_MURA_mixup.pth'
epochs = 6
train(epochs, train_dl, valid_dl, model, max_lr=1e-3, save_path=None, 
      unfreeze_during_loop = (.1,.2))
TTA_binary(model, test_dl)

### CheXpert

In [None]:
drop_rate = 0 # Not good for CNN
model = DenseNet121(out_size=1, pretrained='chexpert', freeze=True, drop_rate=drop_rate).cuda()
model_p = f'./models/best_{N}_emphysema_chexpert_mixup.pth'
epochs = 6
train(epochs, train_dl, valid_dl, model, max_lr=5e-4, save_path=None, 
      unfreeze_during_loop = (.1,.2))
TTA_binary(model, test_dl)

### 13 diseases

In [None]:
drop_rate = 0 # Not good for CNN
model = DenseNet121(out_size=1, pretrained='13diseases', freeze=True, drop_rate=drop_rate).cuda()
model_p = f'./models/best_{N}_emphysema_13diseases_mixup.pth'
epochs = 5
train(epochs, train_dl, valid_dl, model, max_lr=1e-3, save_path=None, 
      unfreeze_during_loop = (.1,.2))
TTA_binary(model, test_dl)

# Results


| Method | test AUC   | test Accuracy (tr = .5) |
|------|------|-------| 
|   ImageNet + Mixup  | 0.611 | 0.552|
|   ImageNet  | 0.625 | 0.624|
|   MURA + Mixup  | 0.659| 0.615|
|   MURA  | 0.674 | 0.606|
|   CheXpert + Mixup  | 0.748| 0.513|
|   CheXpert  | 0.769 | 0.594 |

Mixup seems to don't help in the case of X-rays. 