In [None]:
import numpy as np
import pandas as pd
import os

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split

from keras.preprocessing.image import ImageDataGenerator

from keras.applications.resnet50 import ResNet50 
from keras.applications.resnet50 import preprocess_input

from keras.layers import Flatten, Dense, GlobalAveragePooling2D, BatchNormalization, Activation, Dropout
from keras.models import Model, Sequential



Main idea: define catheter type(s) on image using one RNN50-based network and after that define normality of this catheter position using specifically trained RNN50-based networks.

In [None]:
BASE_DIR = "../input/ranzcr-clip-catheter-line-classification/"
train_df = pd.read_csv(os.path.join(BASE_DIR, "train.csv"), index_col=0)


Splitting train dataset for different catheter types

In [None]:
ETT_columns = [x for x in train_df.columns.values if x.startswith('ETT')]
NGT_columns = [x for x in train_df.columns.values if x.startswith('NGT')]
CVC_columns = [x for x in train_df.columns.values if x.startswith('CVC')]

In [None]:
train_df_ETT = train_df[train_df[ETT_columns].isin([1]).any(axis=1)][ETT_columns]
train_df_NGT = train_df[train_df[NGT_columns].isin([1]).any(axis=1)][NGT_columns]
train_df_CVC = train_df[train_df[CVC_columns].isin([1]).any(axis=1)][CVC_columns]

In [None]:
train_df_ETT['IsETT'] = 1
train_df_NGT['IsNGT'] = 1
train_df_CVC['IsCVC'] = 1

Helper functions

In [None]:


def create_image_generators(preprocess_input, target_image_size, train_dataframe, validation_dataframe, y_columns):
    train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
    val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

    train_generator = train_datagen.flow_from_dataframe(
        train_dataframe,
        x_col='image',
        y_col=y_columns,
        target_size=target_image_size,
        batch_size=32,
        shuffle=True,
        class_mode='raw')

    validation_generator = val_datagen.flow_from_dataframe(
        validation_dataframe,
        x_col='image',
        y_col=y_columns,
        target_size=target_image_size,
        shuffle=False,
        batch_size=32,
        class_mode='raw')
    return train_generator, validation_generator

In [None]:
def create_rn50_based_model(class_number, activation_fn):
    base_model_rn50 = ResNet50(input_shape=(224,224, 3),
                include_top=False, 
                weights='../input/base-model-weights-rn50-enb3/resnet50_notop.h5')
    dropout_dense_layer = 0.3

    model_full = Sequential()
    model_full.add(base_model_rn50)
    
    model_full.add(GlobalAveragePooling2D())
    model_full.add(Dense(128))
    model_full.add(BatchNormalization())
    model_full.add(Activation('relu'))
    model_full.add(Dense(32))
    model_full.add(BatchNormalization())
    model_full.add(Activation('relu'))
    model_full.add(Dropout(dropout_dense_layer))

    model_full.add(Dense(class_number, activation=activation_fn))
    
    return model_full

In [None]:
EPOCHS = 4
BATCH_SIZE = 32


Swan Ganz training

In [None]:
train_swanganz = pd.DataFrame(train_df['Swan Ganz Catheter Present'])
train_swanganz = train_swanganz.reset_index()
train_swanganz['image'] = BASE_DIR+'/train/'+train_swanganz.StudyInstanceUID+'.jpg'

In [None]:

X_train, X_val, y_train, y_val = train_test_split(train_swanganz['image'],train_swanganz[['Swan Ganz Catheter Present']], test_size=0.2)


train_sg = pd.concat([X_train, y_train], axis=1)
validation_sg = pd.concat([X_val, y_val], axis=1)

In [None]:

train_generator_sg, validation_generator_sg = create_image_generators(preprocess_input, (224,224), 
                                                                                 train_sg, validation_sg,
                                                                                 ['Swan Ganz Catheter Present'])

In [None]:
model_sg = create_rn50_based_model (1, "sigmoid")
model_sg.compile(optimizer='adam', 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

In [None]:
model_sg.fit_generator(generator=train_generator_sg,
                    validation_data=validation_generator_sg,
                    steps_per_epoch=len(train_sg)//BATCH_SIZE,
                    epochs=EPOCHS)

New dataset to determine catheter type first

In [None]:
train_df_general = train_df_ETT.merge(train_df_NGT, how='outer', left_index=True, right_index=True).merge(train_df_CVC, how='outer', left_index=True, right_index=True)
train_df_general = train_df_general.drop(columns = ETT_columns+NGT_columns+CVC_columns)
train_df_general = train_df_general.fillna(0)
train_df_general = train_df_general.reset_index()
train_df_general['image'] = BASE_DIR+'/train/'+train_df_general.StudyInstanceUID+'.jpg'

In [None]:

X_train, X_val, y_train, y_val = train_test_split(train_df_general['image'],train_df_general[['IsETT', 'IsNGT', 'IsCVC']], test_size=0.2)


train_general = pd.concat([X_train, y_train], axis=1)
validation_general = pd.concat([X_val, y_val], axis=1)

Catheter type classification training

In [None]:

train_generator_general, validation_generator_general = create_image_generators(preprocess_input, (224,224), 
                                                                                 train_general, validation_general,
                                                                                 ['IsETT', 'IsNGT', 'IsCVC'])

In [None]:

model_general = create_rn50_based_model (3, "sigmoid")
model_general.compile(optimizer='adam', 
              loss='binary_crossentropy', 
              metrics=['AUC'])

In [None]:
model_general.fit_generator(generator=train_generator_general,
                    validation_data=validation_generator_general,
                    steps_per_epoch=len(train_general)//BATCH_SIZE,
                    epochs=EPOCHS)

ETT training

In [None]:
train_df_ETT = train_df_ETT.drop(columns='IsETT').reset_index()
train_df_ETT['image'] = BASE_DIR+'/train/'+train_df_ETT.StudyInstanceUID+'.jpg'

X_train, X_val, y_train, y_val = train_test_split(train_df_ETT['image'],train_df_ETT[ETT_columns], test_size=0.2)


train_ETT = pd.concat([X_train, y_train], axis=1)
validation_ETT = pd.concat([X_val, y_val], axis=1)

train_generator_ETT, validation_generator_ETT = create_image_generators(preprocess_input, (224,224), 
                                                                                 train_ETT, validation_ETT,
                                                                                 ETT_columns)

In [None]:
model_ETT = create_rn50_based_model (len(ETT_columns), "softmax")
model_ETT.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['AUC'])

In [None]:
model_ETT.fit_generator(generator=train_generator_ETT,
                    validation_data=validation_generator_ETT,
                    steps_per_epoch=len(train_ETT)//BATCH_SIZE,
                    epochs=EPOCHS)

NGT training

In [None]:
train_df_NGT = train_df_NGT.drop(columns='IsNGT').reset_index()
train_df_NGT['image'] = BASE_DIR+'/train/'+train_df_NGT.StudyInstanceUID+'.jpg'

X_train, X_val, y_train, y_val = train_test_split(train_df_NGT['image'],train_df_NGT[NGT_columns], test_size=0.2)


train_NGT = pd.concat([X_train, y_train], axis=1)
validation_NGT = pd.concat([X_val, y_val], axis=1)

train_generator_NGT, validation_generator_NGT = create_image_generators(preprocess_input, (224,224), 
                                                                                 train_NGT, validation_NGT,
                                                                                 NGT_columns)

In [None]:
model_NGT = create_rn50_based_model (len(NGT_columns), "softmax")
model_NGT.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['AUC'])

In [None]:
model_NGT.fit_generator(generator=train_generator_NGT,
                    validation_data=validation_generator_NGT,
                    steps_per_epoch=len(train_NGT)//BATCH_SIZE,
                    epochs=EPOCHS)

CVC training

In [None]:
train_df_CVC = train_df_CVC.drop(columns='IsCVC').reset_index()
train_df_CVC['image'] = BASE_DIR+'/train/'+train_df_CVC.StudyInstanceUID+'.jpg'

X_train, X_val, y_train, y_val = train_test_split(train_df_CVC['image'],train_df_CVC[CVC_columns], test_size=0.2)


train_CVC = pd.concat([X_train, y_train], axis=1)
validation_CVC = pd.concat([X_val, y_val], axis=1)

In [None]:
train_generator_CVC, validation_generator_CVC = create_image_generators(preprocess_input, (224,224), 
                                                                                 train_CVC, validation_CVC,
                                                                                 CVC_columns)

In [None]:
model_CVC = create_rn50_based_model (len(CVC_columns), "softmax")
model_CVC.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['AUC'])

In [None]:
model_CVC.fit_generator(generator=train_generator_CVC,
                    validation_data=validation_generator_CVC,
                    steps_per_epoch=len(train_CVC)//BATCH_SIZE,
                    epochs=EPOCHS)

Prediction

In [None]:
from keras.preprocessing import image

In [None]:
test_dir = BASE_DIR+'test/'

In [None]:
def predict_into_list (image_name, model_spec, list_spec):
    img = image.load_img(test_dir+image_name, target_size=(224, 224))
    model_input_img = preprocess_input(np.expand_dims(img.copy(), axis=0)) 
    predicted_list = model_spec.predict(model_input_img)
    list_to_add = list(predicted_list[0])
    list_to_add.insert(0,image_name.replace('.jpg',''))
    list_spec.append(list_to_add)

Predict Swan Ganz catheter

In [None]:
image_name_list = os.listdir(test_dir)
list_sg = []
for image_name in image_name_list:
    predict_into_list (image_name, model_sg, list_sg)

Predict catheter type

In [None]:

list_general = []
for image_name in image_name_list:
    predict_into_list (image_name, model_general, list_general)
    

In [None]:
test_classified = pd.DataFrame(list_general, columns = ['StudyInstanceUID', 'IsETT', 'IsNGT', 'IsCVC'])


In [None]:
test_classified.head()

Predict output values dependent of catheter type

In [None]:
list_ETT = []
list_NGT = []
list_CVC = []

In [None]:
def predict_specific (row):
    if (row['IsETT']>0.8):
        predict_into_list (row['StudyInstanceUID']+'.jpg', model_ETT, list_ETT)
            
    if (row['IsNGT']>0.8):
        predict_into_list (row['StudyInstanceUID']+'.jpg', model_NGT, list_NGT)
    
    if (row['IsCVC']>0.8):
        predict_into_list (row['StudyInstanceUID']+'.jpg', model_CVC, list_CVC)
        

In [None]:
test_classified.apply (predict_specific, axis=1)

In [None]:
ETT_columns.insert(0,'StudyInstanceUID')
NGT_columns.insert(0,'StudyInstanceUID')
CVC_columns.insert(0,'StudyInstanceUID')


In [None]:
predict_ETT_df = pd.DataFrame(list_ETT, columns=ETT_columns).set_index('StudyInstanceUID')
predict_NGT_df = pd.DataFrame(list_NGT, columns=NGT_columns).set_index('StudyInstanceUID')
predict_CVC_df = pd.DataFrame(list_CVC, columns=CVC_columns).set_index('StudyInstanceUID')
predict_sg_df = pd.DataFrame(list_sg, columns = ['StudyInstanceUID', 'Swan Ganz Catheter Present']).set_index('StudyInstanceUID')


In [None]:
test_classified = test_classified.set_index('StudyInstanceUID')

In [None]:
submission_df = test_classified.join(predict_ETT_df).join(predict_NGT_df).join(predict_CVC_df).join(predict_sg_df)
submission_df = submission_df.fillna(0).drop(columns=['IsETT', 'IsNGT', 'IsCVC'])
submission_df = submission_df.reset_index()


In [None]:
def decision_boundaries(x):
    if type(x) is str:
        return x
    elif x>0.6:
        return 1
    elif x<0.3:
        return 0
    else:
        return x


In [None]:
#submission_df = submission_df.applymap(decision_boundaries)

In [None]:
submission_df.to_csv('submission.csv', index=False)