In [None]:
import sys
sys.path.append('../input/pytorch-efficientnet')
sys.path.append('../input/multistartifiedkfold')

In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import matplotlib.image as immg
from pathlib import Path
import os,sys
import seaborn as sns
import gc
import torchvision
import cv2
from fastai.data.all import *
from fastai.vision.core import *
from fastai.vision.data import *
from tqdm.notebook import tqdm
import zipfile
import io
from sklearn.decomposition import PCA
from fastai.vision.all import *

In [None]:
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [None]:
df = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv')

In [None]:
df.head()

In [None]:
target_cols = ['ETT - Abnormal', 'ETT - Borderline',
       'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal',
       'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']

## MultiStratifiedKFold

In [None]:
trn_df = df.sample(frac=1.,random_state = 2020)
trn_df['kfold'] = -1
y = trn_df[target_cols].values
kf = MultilabelStratifiedKFold(n_splits=5,random_state = 2020,shuffle = True)
for fold ,(trn_,val_ )in enumerate(kf.split(X=trn_df,y=y)):
    trn_df.loc[val_,'kfold'] = fold

In [None]:
trn_df.to_csv('train_df_kfold.csv',index=False)

In [None]:
trn_df.head()

## Target Distribution

In [None]:
fig = plt.figure(1,figsize=(20,12))
columns = 4
rows = 3
fea_num = 0
fea_cols = target_cols
for i in range(rows*columns):
    fig.add_subplot(rows, columns, i+1)
    sns.countplot(df[fea_cols[min(i,10)]])
    plt.title(fea_cols[min(i,10)])
    fea_num+=1
plt.subplots_adjust(wspace=0.3, hspace=0.3)
plt.show()

### Let us see total Number of postive sample in each group

In [None]:
df[target_cols].sum(axis=0)

In [None]:
FOLD = 1

### Train and valid idxs

In [None]:
trn_idx,val_idx = trn_df[trn_df['kfold']!=FOLD].index, trn_df[trn_df['kfold']==FOLD].index

In [None]:
item_tfms = Resize(300)
batch_tfms = [*aug_transforms(size=300, max_warp=0), Normalize.from_stats(*imagenet_stats)]
bs = 8

In [None]:
img_path = '../input/ranzor-clip-resized-data-512-256'

In [None]:
dls = ImageDataLoaders.from_df(trn_df, path=img_path, fn_col = 'StudyInstanceUID', 
                               folder='trainXray_512',label_col=target_cols,suff='.jpg',
                               bs=bs,y_block=MultiCategoryBlock(encoded=True, vocab=target_cols),
                               item_tfms=item_tfms,batch_tfms=batch_tfms,val_idxs=val_idx)

In [None]:
dls.show_batch(nrows=3, ncols=3,figsize=(20,12))

In [None]:
len(dls.valid_ds),len(dls.train_ds)

## Metrics column wise mean aucroc

In [None]:
import sklearn.metrics as sklm

In [None]:
def col_mean_aucroc(preds, targs, labels=range(len(target_cols))):
    # One-hot encode targets
    return np.mean([sklm.roc_auc_score(targs[:,i], preds[:,i]) for i in labels])
def ETTAbnormal_auc(*args):
    return col_mean_aucroc(*args, labels=[0])
def ETTBorderline_auc(*args):
    return col_mean_aucroc(*args, labels=[1])
def ETTNormal_auc(*args):
    return col_mean_aucroc(*args, labels=[2])
def NGTAbnormal_auc(*args):
    return col_mean_aucroc(*args, labels=[3])
def NGTBorderline_auc(*args):
    return col_mean_aucroc(*args, labels=[4])
def NGTIncompletelyImaged_auc(*args):
    return col_mean_aucroc(*args, labels=[5])
def NGTNormal_auc(*args):
    return col_mean_aucroc(*args, labels=[6])
def CVCAbnormal_auc(*args):
    return col_mean_aucroc(*args, labels=[7])
def CVCBorderline_auc(*args):
    return col_mean_aucroc(*args, labels=[8])
def CVCNormal_auc(*args):
    return col_mean_aucroc(*args, labels=[9])
def SwanGanzCatheterPresent_auc(*args):
    return col_mean_aucroc(*args, labels=[10])

## Model EfficientNetB0

In [None]:
from efficientnet_pytorch import EfficientNet

In [None]:
wp = '../input/efficientnet-pytorch/efficientnet-b5-586e6cc6.pth'

In [None]:
class MyModel(Module):
    def __init__(self, num_classes):

        self.effnet = EfficientNet.from_pretrained("efficientnet-b5",weights_path=wp)
        self.dropout = nn.Dropout(0.1)
        self.out = nn.Linear(2048, num_classes)

    def forward(self, image):
        batch_size, _, _, _ = image.shape

        x = self.effnet.extract_features(image)
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch_size, -1)
        outputs = self.out(self.dropout(x))
        return outputs

In [None]:
effnet_B5 = MyModel(dls.c)

## Making The Model by adding a dropout layer and final output layer

## Metrics of each target columns

In [None]:
metrics = [ AccumMetric(col_mean_aucroc, flatten=False),#Avg AUCROC
            AccumMetric(ETTAbnormal_auc, flatten=False),
            AccumMetric(ETTBorderline_auc, flatten=False),
            AccumMetric(ETTNormal_auc, flatten=False),
            AccumMetric(NGTAbnormal_auc, flatten=False),
            AccumMetric(NGTBorderline_auc, flatten=False),
            AccumMetric(NGTIncompletelyImaged_auc, flatten=False),
            AccumMetric(NGTNormal_auc, flatten=False),
            AccumMetric(CVCAbnormal_auc, flatten=False),
            AccumMetric(CVCBorderline_auc, flatten=False),
            AccumMetric(CVCNormal_auc, flatten=False),
            AccumMetric(SwanGanzCatheterPresent_auc, flatten=False)]

In [None]:
learn = Learner(dls, 
                effnet_B5, 
                loss_func=BCEWithLogitsLossFlat(),
                metrics=metrics,
                model_dir='/kaggle/working').to_native_fp16()

In [None]:
cb1 = SaveModelCallback(monitor='col_mean_aucroc',fname='best_model',comp=np.greater) # Callbacks
cb2 = ReduceLROnPlateau(monitor='col_mean_aucroc', min_delta=0.1, patience=2,factor=0.2)
learn.fit_one_cycle(10, 1e-3, cbs = [cb1,cb2])

In [None]:
learn.load('best_model');

In [None]:
item_tfms = Resize(380)
batch_tfms = [*aug_transforms(size=380, max_warp=0), Normalize.from_stats(*imagenet_stats)]
bs = 8

In [None]:
dlsB = ImageDataLoaders.from_df(trn_df, path=img_path, fn_col = 'StudyInstanceUID', 
                               folder='trainXray_512',label_col=target_cols,suff='.jpg',
                               bs=4,y_block=MultiCategoryBlock(encoded=True, vocab=target_cols),
                               item_tfms=item_tfms,batch_tfms=batch_tfms,val_idxs=val_idx)

In [None]:
learn.dls = dlsB

In [None]:
gc.collect()

In [None]:
learn.unfreeze()
cb1 = SaveModelCallback(monitor='col_mean_aucroc',fname='best_model_380',comp=np.greater) # Callbacks
cb2 = ReduceLROnPlateau(monitor='col_mean_aucroc', min_delta=0.1, patience=2,factor=0.2)
learn.fit_one_cycle(4, 1e-3/2, cbs = [cb1,cb2])

In [None]:
learn.load('best_model_380');

In [None]:
learn = learn.to_fp32()
learn.save('best_model_fp32',with_opt=True);

## Work In progress Stay Tuned
## Please Don't forget to upvote