In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import seaborn as sns
from functools import partial
import matplotlib.pyplot as plt

In [None]:
try: # detect TPUs
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError: # no TPU found, detect GPUs
    strategy = tf.distribute.MirroredStrategy() # for GPU or multi-GPU machines
    #strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
    #strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() # for clusters of multi-GPU machines

print("Number of accelerators: ", strategy.num_replicas_in_sync)

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMAGE_SIZE = [512,512]

In [None]:
from kaggle_datasets import KaggleDatasets
GCS_PATH = KaggleDatasets().get_gcs_path('ranzcr-clip-catheter-line-classification')

In [None]:
train_records = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv')
train_records.head()

In [None]:
target_columns = train_records.columns.values[1:12]
target_columns

In [None]:
sns.barplot(train_records['Swan Ganz Catheter Present'].unique(),train_records['Swan Ganz Catheter Present'].value_counts())

In [None]:
train_records['Swan Ganz Catheter Present'].value_counts()

In [None]:
class_weights = train_records.groupby(['Swan Ganz Catheter Present']).count()
class_weights['StudyInstanceUID'] = class_weights['StudyInstanceUID'].values/train_records.shape[0]

In [None]:
class_weights = class_weights.drop('PatientID',axis=1)

In [None]:
cl_w = {0:class_weights.values[0],1:class_weights.values[1]}

In [None]:
img = plt.imread('../input/ranzcr-clip-catheter-line-classification/train/1.2.826.0.1.3680043.8.498.10000428974990117276582711948006105617.jpg')
plt.imshow(img)

In [None]:
def submit_gen(model,dataset_path):
    test_data = os.listdir(dataset_path)
    pred_array=[]
    for i in test_data:
        img = tf.keras.preprocessing.image.load_img(
            dataset_path+i,target_size=(512,512))
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        img_array = tf.expand_dims(img_array, 0)
        prediction = np.squeeze(model.predict(img_array))
        name_c = np.where(prediction>0.6,1,0)
        pred_array.append(name_c)
    return pred_array,test_data

In [None]:
gs_filenames = tf.io.gfile.glob(GCS_PATH + "/train_tfrecords/*.tfrec")
gs_filenames

In [None]:
split_ind = int(0.9* len(gs_filenames))
TRAINING_FILENAMES, VALID_FILENAMES = gs_filenames[:split_ind], gs_filenames[split_ind:]
dataset = tf.data.TFRecordDataset(filenames=TRAINING_FILENAMES)
val_dataset = tf.data.TFRecordDataset(filenames=VALID_FILENAMES)

In [None]:
for i in dataset.take(1):
    example = tf.train.Example()
    example.ParseFromString(i.numpy())
    #print(example)

In [None]:
image_feature_description = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'StudyInstanceUID': tf.io.FixedLenFeature([], tf.string),
    'CVC - Abnormal': tf.io.FixedLenFeature([], tf.int64),
     'CVC - Borderline': tf.io.FixedLenFeature([], tf.int64),
     'CVC - Normal': tf.io.FixedLenFeature([], tf.int64),
     'ETT - Abnormal': tf.io.FixedLenFeature([], tf.int64),
     'ETT - Borderline': tf.io.FixedLenFeature([], tf.int64),
    'ETT - Normal': tf.io.FixedLenFeature([], tf.int64),
    'NGT - Abnormal': tf.io.FixedLenFeature([], tf.int64),
    'NGT - Borderline': tf.io.FixedLenFeature([], tf.int64),
    'NGT - Incompletely Imaged': tf.io.FixedLenFeature([], tf.int64),
    'NGT - Normal': tf.io.FixedLenFeature([], tf.int64),
    'Swan Ganz Catheter Present':tf.io.FixedLenFeature([],tf.int64)
    
}
def return_target_features(example):
    target_features = []
    for i in target_columns:
        target_features.append(example[i])
    feature = tf.cast(target_features,tf.int32)
    #feature = tf.reshape(feature,(11,1))
    return feature

def _parse_image_function(example_proto):
    # Parse the input tf.train.Example proto using the dictionary above.
    example = tf.io.parse_single_example(example_proto, image_feature_description)
    image = tf.image.decode_jpeg(example['image'],channels=3)
    image = tf.cast(image, tf.float32)
    image = tf.image.resize(image,(IMAGE_SIZE))
    feature = return_target_features(example)
    return image,feature

In [None]:
parsed_image_dataset = dataset.map(partial( _parse_image_function),num_parallel_calls=AUTOTUNE)

In [None]:
val_image_dataset = val_dataset.map(partial( _parse_image_function),num_parallel_calls=AUTOTUNE)

In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.1),
    tf.keras.layers.experimental.preprocessing.RandomHeight(factor=0.1,),
    tf.keras.layers.experimental.preprocessing.RandomWidth(factor=0.1,),
    #tf.keras.layers.experimental.preprocessing.Rescaling(scale=1./255)
])

In [None]:
def augment(x):
    image = tf.image.random_flip_left_right(x)
    image = tf.image.random_brightness(image, 0.1, seed=None)
    return image

In [None]:
def create_dataset(dataset,aug=True):    
    # Set the number of datapoints you want to load and shuffle 
    dataset = dataset.shuffle(4096)
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    # Set the batchsize
    dataset = dataset.batch(64)
    if(aug==True):
        dataset = dataset.map(lambda x,y: (augment(x),y))
        #dataset = dataset.map(lambda x,y:(data_augmentation.call(x),y))
    return dataset

In [None]:
train_dataset = create_dataset(parsed_image_dataset,True)
validation_dataset = create_dataset(val_image_dataset,False)

In [None]:
with strategy.scope():
    base = tf.keras.applications.EfficientNetB4(include_top=False,input_shape=(*IMAGE_SIZE,3),
                                                weights='imagenet'
                                            )
    base.trainable = False
    inputs = tf.keras.layers.Input([*IMAGE_SIZE, 3])
    #input_layer = data_augmentation(inputs)
    pre_layer = tf.keras.applications.efficientnet.preprocess_input(inputs)
    base_model = base(pre_layer)
    #base_model = tf.keras.layers.GlobalAveragePooling2D()(base_model)
    base_model = tf.keras.layers.Flatten()(base_model)
    base_model = tf.keras.layers.Dropout(0.1)(base_model)
    base_model = tf.keras.layers.Dense(512,activation='relu')(base_model)
    base_model = tf.keras.layers.Dropout(0.1)(base_model)
    base_model = tf.keras.layers.BatchNormalization()(base_model)
    base_model = tf.keras.layers.Dense(128,activation='relu')(base_model)
    base_model = tf.keras.layers.Dense(64,activation='relu')(base_model)
    base_model = tf.keras.layers.Dense(11,activation='sigmoid')(base_model)
    
    model_efficientnet_net = tf.keras.Model(inputs=inputs,outputs=base_model)

In [None]:
model_efficientnet_net.summary()

In [None]:
initial_learning_rate_e = 0.005
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate_e, decay_steps=100, decay_rate=0.96, staircase=True
)

checkpoint_cb_e = tf.keras.callbacks.ModelCheckpoint(
    "efficientnetb4_best_model.h5", save_best_only=True
)

early_stopping_cb_e = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', min_delta = 0.001, 
                           patience = 5, mode = 'min', verbose = 1,
                           restore_best_weights = True)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', factor = 0.3, 
                              patience = 2, min_delta = 0.001, 
                              mode = 'min', verbose = 1)

In [None]:
base.trainable = False

In [None]:
model_efficientnet_net.compile(optimizer=tf.keras.optimizers.Adamax(learning_rate=5e-3),
              loss='binary_crossentropy',
              metrics=['AUC'])

In [None]:
tf.config.set_soft_device_placement(False)

In [None]:
epochs=25
history = model_efficientnet_net.fit(
      train_dataset,
      epochs=epochs,
      validation_data = validation_dataset,
      callbacks=[checkpoint_cb_e, early_stopping_cb_e,reduce_lr],
      #class_weight = cl_w
      #steps_per_epoch = 600
      #validation_data = val_generator,
    )

In [None]:
model_efficientnet_net.save('efficientb4.h5')