In [1]:
try:
    from google.colab import drive
    drive.mount('/content/gdrive')
    IN_COLAB = True
except:
    IN_COLAB = False

if IN_COLAB:
    PATH_DRIVER = '/content/gdrive/My Drive/'
    DATA_FOLDER = 'Dataset/'
else:
    PATH_DRIVER = ''
    DATA_FOLDER = 'C:/Users/Lluis/Desktop/Machine Learning/radiology_ai/data/'

In [2]:
import os

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

from fastai.basics import *
from fastai.callback.all import *
from fastai.data.block import *
from fastai.data.transforms import *
from fastai.vision import models
from fastai.vision.augment import *
from fastai.vision.core import PILImageBW
from fastai.vision.data import *
from fastai.vision.learner import create_cnn_model

In [3]:
from mixmatch.losses import MixMatchLoss
from mixmatch.callback import MixMatchCallback
from mixmatch.utils import TestColSplitter

In [4]:
PATH_PREFIX = os.path.join(PATH_DRIVER, DATA_FOLDER, '')
raw_folder = PATH_PREFIX + 'DICOMS'
organize_folder = PATH_PREFIX + 'pending_classification'
preprocess_folder = PATH_PREFIX + 'preprocess'

In [5]:
# Hyperparameters

LR = 0.002

BATCH_SIZE = 32
RESIZE = 512
RANDOM_RESIZE_CROP = 256

T = 0.5
LAMBDA_U = 75
ALPHA = 0.75
# EMA_DECAY = 0.999

MODEL = models.resnet18

In [6]:
# Transformations

label_transform = [
    RandomResizedCropGPU(RANDOM_RESIZE_CROP),
    Flip(),
    Normalize()
]

class Multiply_255(Transform):
    def encodes(self, o): return o * 255

unlabel_transform = [
    RandomResizedCropGPU(RANDOM_RESIZE_CROP),
    Flip(),
    Rotate(180, p=1),
    Multiply_255(),
    # Normalize()
]

In [7]:
# Callbacks
from fastai.callback.tensorboard import TensorBoardCallback

cbs = None
cbs = [TensorBoardCallback()]
# cbs = []

In [8]:
# Data
df = pd.read_excel(os.path.join(PATH_PREFIX, 'all.xlsx'), dtype={'ID':'string','Target':'string'})

unlabel_df = df[['ID','Target']][df['Target'].isnull()].reset_index(drop=True)
label_df = df[['ID','Target']][df['Target'].notnull()].reset_index(drop=True)

train_df, test_df = train_test_split(label_df, test_size=0.15, shuffle=True)
train_df, val_df = train_test_split(train_df, test_size=0.15, shuffle=True)

label_df.loc[train_df.index, 'Dataset'] = 'train'
label_df.loc[val_df.index, 'Dataset'] = 'valid'
label_df.loc[test_df.index, 'Dataset'] = 'test'

In [9]:
# DataLoaders
print(f'==> Preparing label dataloaders')

label_dl = DataBlock(
    blocks=(ImageBlock(cls=PILImageBW), MultiCategoryBlock),
    get_x=ColReader('ID', pref=preprocess_folder+'/', suff='.png'), 
    get_y=ColReader('Target'),
    splitter=TestColSplitter(col='Dataset'),
    item_tfms=Resize(RESIZE),
    batch_tfms=label_transform,
).dataloaders(label_df, bs=BATCH_SIZE, num_workers=0, shuffle_train=True, drop_last=True)

print(f'==> Preparing unlabel dataloaders')

ds_params = {
    'blocks': (ImageBlock(cls=PILImageBW)),
    'get_x': ColReader('ID', pref=preprocess_folder+'/', suff='.png'),
    'splitter': RandomSplitter(0),
    'item_tfms': Resize(RESIZE)
}
dls_params = {
    'source': unlabel_df,
    'bs': BATCH_SIZE,
    'num_workers': 0,
    'shuffle_train': True,
    'drop_last': True
}

unlabel_dl = DataBlock(
    **ds_params
).dataloaders(**dls_params)

transform_dl = DataBlock(
    **ds_params,
    batch_tfms = unlabel_transform
).dataloaders(**dls_params)

print(f'==> Preparing MixMatch callback')

if cbs is None:
    cbs = [MixUp(ALPHA)]
else:
    cbs.append(MixUp(ALPHA))

cbs.append(MixMatchCallback(unlabel_dl, transform_dl, T))

==> Preparing label dataloaders
==> Preparing unlabel dataloaders
==> Preparing MixMatch callback


In [10]:
# Model
print("==> creating model")

def create_model(model_arq, n_out, pretrained=True, n_in=1, ema=False):
    model = create_cnn_model(model_arq, n_out=n_out, cut=None, pretrained=pretrained, n_in=n_in)
    model = model.cuda()

    if ema:
        for param in model.parameters():
            param.detach_()

    return model

classes = label_df['Target'].unique()
n_out = len(classes)

model = create_model(MODEL, n_out, pretrained=True, n_in=1)
# ema_model = create_model(MODEL, n_out, pretrained=True, n_in=1, ema=True)

==> creating model


In [11]:
# Loss
print("==> defining loss")

class_weight = compute_class_weight(class_weight='balanced', classes=classes, y=train_df['Target'])
class_weight = torch.as_tensor(class_weight).float()
if torch.cuda.is_available():
    class_weight = class_weight.cuda()

train_criterion = MixMatchLoss(unlabel_dl=unlabel_dl, model=model, n_out=n_out, bs=BATCH_SIZE, lambda_u=LAMBDA_U, weight=class_weight)
criterion = train_criterion.Lx_criterion

==> defining loss


In [13]:
# Learner
print("==> defining learner")

Lx_metric = AvgMetric(func=criterion)
Lu_metric = AvgMetric(func=train_criterion.Lu_criterion)

f1_score = F1ScoreMulti(average='macro')
precision = PrecisionMulti(average='macro')
recall = RecallMulti(average='macro')
fastai_metrics = [
    # Lx_metric, Lu_metric, 
    f1_score, precision, recall
]

# cbs.append(Recorder(train_metrics=True))

learn = Learner(label_dl, model, loss_func=train_criterion, lr=LR, metrics=fastai_metrics, cbs=cbs)
learn.fit_one_cycle(1)

==> defining learner
epoch     train_loss  valid_loss  f1_score  precision_score  recall_score  time    
0         20.849142   0.008246    0.500000  0.500000         0.500000      00:32     
