# # # Data set Analysis

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.preprocessing import image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
from plotly import tools


In [None]:
df_train = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv') 

labels  = (['ETT - Abnormal', 'ETT - Borderline',
       'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal',
       'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present'])
df_train.head()

In [None]:
df_train.shape

# Tubes coexistence in the same image :

In [None]:

#sample.groupby(['StudyInstanceUID']).sum(1).value_counts(sort=True)

df_train[labels].sum(1).value_counts(sort=True)

In [None]:
df_train[labels].sum(1).value_counts(sort=True).plot.bar()
plt.xlabel("Occurrence number")
plt.ylabel("Simultaneous label number")



In [None]:
import seaborn as sns
targ_cts=df_train.iloc[:,1:-2].sum(axis=0)
fig = plt.figure(figsize=(12,6))
sns.barplot(y=targ_cts.sort_values(ascending=False).index, x=targ_cts.sort_values(ascending=False).values, palette='mako')
plt.show()

# DataSet Imbalance :

In [None]:
df_train[labels].mean()

In [None]:
df_train[labels].mean().plot.bar()

# Height number of annotations for the same patient :

In [None]:
df_train['PatientID'].value_counts()

# ETT Tubes :

In [None]:
df_train[labels[:3]].value_counts().rename('Counts').reset_index()

# NG Tubes :

In [None]:
df_train[labels[4:7]].value_counts().rename('Counts').reset_index()

# CVC Tubes :

In [None]:
df_train[labels[7:10]].value_counts().rename('Counts').reset_index()

# Preprocesing :

In [None]:
from skimage import exposure
from skimage.util import random_noise
def randRange(a, b):
    return np.random.rand() * (b - a) + a


def AHE(img):
    img_adapteq = exposure.equalize_adapthist(img, clip_limit=0.03)
    #var = randRange(0.005, 0.01)
    #img_adapteq=  random_noise(img_adapteq, var=var)
    return img_adapteq
#datagen = ImageDataGenerator(rotation_range=30, horizontal_flip=0.5, preprocessing_function=AHE)

In [None]:
train_df1 = pd.read_csv('../input/extract/train-Copy1.csv') 
def append_ext(fn):
    return fn+".jpg"


train_df1["StudyInstanceUID"]=train_df1["StudyInstanceUID"].apply(append_ext)
train_image = []
for i in tqdm(range(train_df1.shape[0]-1500)):
    img = image.load_img("../input/ranzcr-clip-catheter-line-classification/train/"+train_df1['StudyInstanceUID'][i],target_size=(640,640,3))
    img = image.img_to_array(img)
    img = img/255
    train_image.append(img)
         
X = np.array(train_image)

df_lab =train_df1[['ETT - Abnormal', 'ETT - Borderline',
       'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal',
       'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']]
labels = np.array(df_lab)
#print (labels)

X_train, X_test= train_test_split(X, test_size=0.2, random_state=42)
y_train, y_test= train_test_split(labels, test_size=0.2, random_state=42)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
# print (labels_numeric)

print (X_train.shape)

print (y_train.shape)

print (X_test.shape)

print (y_test.shape)

In [None]:
#"""
#for i in range(40,90):
plt.figure(figsize=(30,12))
plt.subplot(141)
plt.imshow((X_train[40]))
plt.xticks([])
plt.yticks([])
plt.title('original image 1')
plt.subplot(142)
plt.imshow(AHE(X_train[40]))
plt.xticks([])
plt.yticks([])
plt.title('image 1 with histogram equalization')
plt.subplot(143)
plt.imshow((X_train[120]))
plt.xticks([])
plt.yticks([])
plt.title('original image 2')
plt.subplot(144)
plt.imshow(AHE(X_train[120]))
plt.xticks([])
plt.yticks([])
plt.title('image 2 with histogram equalization')
plt.show()
#"""

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
import tensorflow.keras.layers as tfl
from tensorflow.keras import losses
from tensorflow.keras.layers import Flatten ,Dense, Dropout
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.layers.experimental.preprocessing import RandomFlip, RandomRotation,RandomCrop,RandomContrast,Normalization

In [None]:
#load data
import pandas as pd
train_df = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv')
#sample_df.shape
def append_ext(fn):
    return fn+".jpg"

train_df["StudyInstanceUID"]=train_df["StudyInstanceUID"].apply(append_ext)

train_df.shape

In [None]:
BATCH_SIZE = 64
IMG_SIZE = (150, 150)
from tensorflow.keras.preprocessing.image import ImageDataGenerator
label=['ETT - Abnormal', 'ETT - Borderline',
       'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal',
       'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']

datagen=ImageDataGenerator(validation_split=0.20,
                          #rotation_range=rotation_range,
                          #horizontal_flip= True,
                          rescale=1./255.)
                          

train_dataset=datagen.flow_from_dataframe(
    dataframe=train_df,
    directory="../input/ranzcr-clip-catheter-line-classification/train/",
    x_col="StudyInstanceUID",
    y_col=label,
    subset="training",
    batch_size=BATCH_SIZE,
    color_mode='rgb',
    labels_mode ='binary',
    class_mode='raw',
    target_size=IMG_SIZE,
    shuffle=1024,
    seed=42,
    interpolation="bilinear")

validation_dataset=datagen.flow_from_dataframe(
dataframe=train_df,
directory="../input/ranzcr-clip-catheter-line-classification/train",
x_col="StudyInstanceUID",
y_col=label,
subset="validation",
batch_size=BATCH_SIZE,
color_mode='rgb',
labels_mode ='binary',
class_mode='raw',
target_size=IMG_SIZE,
#shuffle=1024,
shuffle=False,
seed=42,
interpolation="bilinear")


In [None]:
# UNQ_C1
from tensorflow.keras.layers.experimental.preprocessing import RandomFlip, RandomRotation,RandomCrop,RandomContrast,Normalization
# GRADED FUNCTION: data_augmenter
def data_augmenter():
    '''
    Create a Sequential model composed of 2 layers
    Returns:
        tf.keras.Sequential
    '''
    
    ### START CODE HERE
    data_augmentation = tf.keras.Sequential()
    data_augmentation.add(RandomFlip('horizontal'))
    data_augmentation.add(RandomFlip('vertical'))
    data_augmentation.add(RandomRotation(0.10))
    #data_augmentation.add(RandomCrop(300,300))
    data_augmentation.add(RandomContrast(0.2))
    #data_augmentation.add(Normalization())
    ### END CODE HERE
    
    return data_augmentation

In [None]:
data_augmentation = data_augmenter()

In [None]:

image_shape = (150,150)
input_shape = image_shape + (3,)
im_size =150
from tensorflow.keras import Model, initializers, regularizers

from tensorflow.keras.applications.efficientnet import EfficientNetB0


modelB7 = tf.keras.Sequential([EfficientNetB0(input_shape=(im_size, im_size, 3),
                                                weights='imagenet',
                                                include_top=False,
                                                drop_connect_rate=0.7),
                             tf.keras.layers.GlobalAveragePooling2D()])
    

    
    
inputs = tf.keras.Input(shape=input_shape) 

x = data_augmenter()(inputs)    
x = modelB7(x) 
#x =  Flatten()(x)
x = Dropout(0.5)(x)
#x = tfl.GlobalAveragePooling2D()(x)
outputs = tfl.Dense(11,activation='sigmoid')(x)
    

model = tf.keras.Model(inputs, outputs)    
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.AUC(multi_label=True)])
model.summary()

In [None]:
steps_per_epoch = 24067 // BATCH_SIZE
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'model.h5', save_best_only=True, monitor='val_auc', mode='max')
lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_auc', patience=3, min_lr=1e-6, mode='max')

In [None]:
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy

In [None]:
strategy = auto_select_accelerator()
with strategy.scope():
    history = model.fit(
        train_dataset, 
        epochs=20,
        verbose=True,
        workers=17,
        callbacks=[checkpoint, lr_reducer],
        steps_per_epoch=steps_per_epoch,
        validation_data=validation_dataset)