In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
'''
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
'''
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
import matplotlib.pyplot as plt
from PIL import Image
import tensorflow_addons as tfa
from sklearn import model_selection
from sklearn import metrics
import tensorflow_hub as hub
import pandas as pd
from tensorflow.keras.preprocessing import image
import glob
import random
# because jupyter doesn't make auto completions for me I don't know why
%config Completer.use_jedi = False

#Please Upvote :) 

In [None]:
train_df = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv')

In [None]:
train_df = train_df.set_index(train_df.StudyInstanceUID) #for easier search for images

In [None]:
train_df.head()

In [None]:
x_train, x_test, y_train, y_test = model_selection.train_test_split(train_df.iloc[:,0].values, train_df.iloc[:, 1:-1].values,train_size=0.8)

In [None]:
#generator to get batches of data
class RanzcrDataGenerator(keras.utils.Sequence):
    def __init__(self, data_path, x, y, target_shape, batch_size):
        self.data_path = data_path
        self.batch_size = batch_size
        self.target_shape = target_shape
        self.x = x
        self.y = y
        self.images = [os.path.join(self.data_path, curr_img+'.jpg') for curr_img in x]
        self.dataset_length = len(x)
    
    def __len__(self):
        return self.dataset_length // self.batch_size
    
    def __getitem__(self, index):
        idx = index * self.batch_size
        imgs_batch = self.images[idx: idx+self.batch_size]
        labels = self.y[idx: idx+self.batch_size]
        decoded_batch = self.decode(imgs_batch).astype('float32')
        return decoded_batch, labels.astype('int32')
    
    def decode(self, batch):
        decoded_batch = np.zeros((self.batch_size,) + self.target_shape + (3,))
        for i, current_img in enumerate(batch):
            decoded_batch[i] = image.img_to_array(image.load_img(current_img, target_size=self.target_shape))
        return decoded_batch

In [None]:
train_dataset = RanzcrDataGenerator('../input/ranzcr-clip-catheter-line-classification/train/', x=x_train, y=y_train, 
                                    batch_size=64, target_shape=(200, 200))

In [None]:
val_dataset = RanzcrDataGenerator('../input/ranzcr-clip-catheter-line-classification/train/', x=x_test, y=y_test, 
                                  batch_size=64, target_shape=(200, 200))

In [None]:
#Big Transfer Model
#check out https://blog.tensorflow.org/2020/05/bigtransfer-bit-state-of-art-transfer-learning-computer-vision.html
module_handle='https://tfhub.dev/google/bit/m-r152x4/1'
module=hub.KerasLayer(module_handle)

In [None]:
data_augmentation = keras.Sequential([layers.experimental.preprocessing.RandomFlip("horizontal"),
        layers.experimental.preprocessing.RandomRotation(0.1),])

In [None]:
class BiTModel(keras.Model):
    def __init__(self, module, num_classes, activation, augmentation=None):
        super(BiTModel, self).__init__()
        self.num_classes = num_classes
        self.module = module
        self.head = layers.Dense(num_classes, kernel_initializer='zeros')
        self.augmentation=augmentation
        self.activation=keras.activations.get(activation)
    def call(self, inputs):
        if self.augmentation:
            inputs = self.augmentation(inputs)
        inputs = self.module(inputs)
        return self.activation(self.head(inputs))

In [None]:
keras.backend.clear_session() #to free up ram
model = BiTModel(module, 11, 'sigmoid', data_augmentation)

In [None]:

# Define optimiser and loss
# Decay learning rate by factor of 10 at SCHEDULE_BOUNDARIES.
lr = 0.003
SCHEDULE_BOUNDARIES = [200, 300, 400]
lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=SCHEDULE_BOUNDARIES,
                                                                  values=[lr, lr*0.1, lr*0.001, lr*0.0001])
optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)


In [None]:
loss_fn = tf.keras.losses.BinaryCrossentropy()
model.compile(optimizer=optimizer,
             loss=loss_fn,
             metrics=[keras.metrics.AUC()])


In [None]:
callbacks = [keras.callbacks.EarlyStopping(patience=4), keras.callbacks.ModelCheckpoint("chest_xray_classification.h5", save_best_only=True)]
model.fit(train_dataset,
   epochs=15, validation_data=val_dataset, callbacks=callbacks)

In [None]:
def get_test_images(data_path, model):
    predictions = dict()
    images = glob.glob(data_path)
    for i, img in enumerate(images):
        img_name = img.split('/')[-1][:-4]
        decoded_img = np.expand_dims(image.load_img(img, target_size=(200, 200)), 0)
        preds = model(decoded_img)
        preds = (preds > 0.5).numpy().astype('int32')
        predictions[img_name] = preds
        if i % 50 == 0:
            print('Finished 50 imgs')
    return predictions

In [None]:
preds = get_test_images('../input/ranzcr-clip-catheter-line-classification/test/*', model)

In [None]:
ids = list(preds.keys())

In [None]:
predicted_data = np.array(list(preds.values()))

In [None]:
predicted_data = np.squeeze(predicted_data)

In [None]:
submission = pd.DataFrame({'StudyInstanceUID': x, 
                           'ETT - Abnormal': predicted_data[:, 0], 'ETT - Borderline': predicted_data[:, 1], 
                           'ETT - Normal': predicted_data[:, 2], 'NGT - Abnormal': predicted_data[:, 3], 
                           'NGT - Borderline': predicted_data[:, 4],
                          'NGT - Incompletely Imaged': predicted_data[:,5], 'NGT - Normal': predicted_data[:, 6], 
                           'CVC - Abnormal': predicted_data[:, 7], 
                          'CVC - Borderline': predicted_data[:, 8], 'CVC - Normal': predicted_data[:, 9], 
                           'Swan Ganz Catheter Present': predicted_data[:, 10]})

In [None]:
submission.to_csv('submission.csv', index=False)