# Intro
Welcome to the [RANZCR CLiP - Catheter and Line Position Challenge](https://www.kaggle.com/c/ranzcr-clip-catheter-line-classification/data).
![](https://storage.googleapis.com/kaggle-competitions/kaggle/23870/logos/header.png)

In this competition, we will detect the presence and position of catheters and lines on chest x-rays.

There are 11 tagets to predict:
* ETT - Abnormal - endotracheal tube placement abnormal
* ETT - Borderline - endotracheal tube placement borderline abnormal
* ETT - Normal - endotracheal tube placement normal
* NGT - Abnormal - nasogastric tube placement abnormal
* NGT - Borderline - nasogastric tube placement borderline abnormal
* NGT - Incompletely Imaged - nasogastric tube placement inconclusive due to imaging
* NGT - Normal - nasogastric tube placement borderline normal
* CVC - Abnormal - central venous catheter placement abnormal
* CVC - Borderline - central venous catheter placement borderline abnormal
* CVC - Normal - central venous catheter placement normal
* Swan Ganz Catheter Present

<span style="color: royalblue;">Please vote the notebook up if it helps you. Thank you. </span>

# Libraries

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import cv2

In [None]:
from keras.utils import to_categorical, Sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, Activation, BatchNormalization,GlobalAveragePooling2D
from keras.optimizers import RMSprop,Adam
from keras.applications import ResNet50, MobileNet
from tensorflow.keras.applications import EfficientNetB3
import tensorflow as tf

In [None]:
import warnings
warnings.filterwarnings("ignore")

# Path

In [None]:
path = '/kaggle/input/ranzcr-clip-catheter-line-classification/'
os.listdir(path)

# Load Data

In [None]:
train = pd.read_csv(path+'train.csv')
train_anno = pd.read_csv(path+'train_annotations.csv')
samp_subm = pd.read_csv(path+'sample_submission.csv')

# Functions
We define some helper functions.

In [None]:
def plot_example(data, type_):
    fig, axs = plt.subplots(1, 5, figsize=(25, 12))
    fig.subplots_adjust(hspace = .2, wspace=.2)
    axs = axs.ravel()
    temp = data[data[type_]==1]
    for i in range(5):
        idx = temp.index[i]
        image_id = temp.loc[idx, 'StudyInstanceUID']
        image_file = cv2.imread(''.join([path, 'train/', image_id, '.jpg']))
        image_file = cv2.cvtColor(image_file, cv2.COLOR_BGR2RGB)
        axs[i].imshow(image_file)
        axs[i].set_title(type_)
        axs[i].set_xticklabels([])
        axs[i].set_yticklabels([])

# Overview

In [None]:
print('number of train samples:', len(train.index))
print('numebr of test samples:', len(samp_subm.index))
print('number of train images:', len(os.listdir(path+'train/')))
print('number of test images:', len(os.listdir(path+'test/')))
print('number of unique patient ids:', len(train['PatientID'].unique()))

There are less patients as samples. So for some patients there is more than one image.

# Parameters

In [None]:
image_size = 222
image_channel = 3
num_classes = 11
labels = train[train.columns[1:-1]].columns.tolist()

# EDA

The distribution of the labels is unblanced.

In [None]:
train[train.columns[1:-1]].sum()

In [None]:
plt.bar(x=labels, height=np.sum(train[labels], axis=0))
plt.grid()
plt.xticks(rotation=90)
plt.show()

## ETT - Abnormal

In [None]:
plot_example(train, 'ETT - Abnormal')

## ETT - Borderline

In [None]:
plot_example(train, 'ETT - Borderline')

## ETT - Normal

In [None]:
plot_example(train, 'ETT - Normal')

## NGT - Abnormal

In [None]:
plot_example(train, 'NGT - Abnormal')

## NGT - Borderline

In [None]:
plot_example(train, 'NGT - Borderline')

## NGT - Incompletely Imaged

In [None]:
plot_example(train, 'NGT - Incompletely Imaged')

## NGT - Normal

In [None]:
plot_example(train, 'NGT - Normal')

## CVC - Abnormal

In [None]:
plot_example(train, 'CVC - Abnormal')

## CVC - Borderline

In [None]:
plot_example(train, 'CVC - Borderline')

## CVC - Normal

In [None]:
plot_example(train, 'CVC - Normal')

## Swan Ganz Catheter Present

In [None]:
plot_example(train, 'Swan Ganz Catheter Present')

# Class Weights

In [None]:
class_weight = dict(zip(range(num_classes), train[labels].sum().values/len(train.index)))

# Data Generator
We define a data generator to laod the data on demand.

In [None]:
class DataGenerator(Sequence):
    def __init__(self, path, data, batch_size,
                 image_size, image_channel, num_classes):
        self.path = path
        self.list_IDs = data['StudyInstanceUID']
        self.labels = data[data.columns[1:12]]
        self.batch_size = batch_size
        self.image_size = image_size
        self.image_channel = image_channel
        self.num_classes = num_classes
        self.indexes = np.arange(len(self.list_IDs))
        
    def __len__(self):
        l = int(len(self.list_IDs)/self.batch_size)
        if l*self.batch_size < len(self.list_IDs):
            l += 1
        return l
        
    
    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        return X, y

    
    def __data_generation(self, list_IDs_temp):
        X = np.zeros((self.batch_size, self.image_size, self.image_size, self.image_channel))
        y = np.zeros((self.batch_size, self.num_classes), dtype=int)
        for i, ID in enumerate(list_IDs_temp):
            data_file = cv2.imread(''.join([self.path, ID, '.jpg']))
            image = cv2.resize(data_file, (self.image_size, self.image_size))
            X[i, ] = image/255.
            y[i, ] = self.labels.iloc[i]
        return X, y

# Define Model

## MobileNet

In [None]:
#weights='../input/models/mobilenet_1_0_224_tf_no_top.h5'

In [None]:
#conv_base = MobileNet(include_top=False,
#                     weights=weights,
#                     input_shape=(image_size, image_size, image_channel))
#conv_base.trainable = True

In [None]:
#model = Sequential()
#model.add(conv_base)
#model.add(GlobalAveragePooling2D())
#model.add(Dense(1024, activation='relu'))
#model.add(Dense(1024, activation='relu'))
#model.add(Dense(512, activation='relu'))
#model.add(Dense(num_classes, activation='sigmoid'))

## EfficientNet

In [None]:
weights = '../input/models/efficientnetb3_notop.h5'

In [None]:
conv_base = EfficientNetB3(include_top=False,
                          weights=weights,
                          input_shape=(image_size, image_size, image_channel))
conv_base.trainable = True

In [None]:
model = Sequential()
model.add(conv_base)
model.add(GlobalAveragePooling2D())
model.add(Dense(num_classes, activation='sigmoid'))

In [None]:
metrics = [tf.keras.metrics.AUC(name='auc', multi_label=True)]
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=metrics)

In [None]:
model.summary()

# Train Model

In [None]:
epochs = 5
batch_size = 64

In [None]:
train_gen = DataGenerator(path+'train/', train, batch_size, image_size, image_channel, num_classes)

In [None]:
history = model.fit_generator(generator=train_gen,
                              epochs = epochs,
                              workers=4)

# Predict Test Data

In [None]:
test_gen = DataGenerator(path+'test/', samp_subm, batch_size, image_size, image_channel, num_classes)

In [None]:
predict = model.predict_generator(test_gen, verbose=1)

# Write Output

In [None]:
output = pd.DataFrame(predict, columns = labels)
output.insert(0, 'StudyInstanceUID', samp_subm['StudyInstanceUID'])
output.dropna(inplace=True)

In [None]:
output.to_csv('submission.csv', index=False)

In [None]:
plt.bar(x=labels, height=np.sum(output[labels], axis=0))
plt.grid()
plt.xticks(rotation=90)
plt.show()