In [1]:
import tensorflow as tf 
import os 
import PIL
import pandas as pd 
import numpy as np 
import pathlib
import matplotlib.pyplot as plt 
import cv2
import warnings

import re

In [3]:
data_dir = pathlib.Path('../input/chexpert-dataset/CheXpert-v1.0-small') 
os.listdir(data_dir)

['valid.csv', 'valid', 'train.csv', 'train']

In [4]:
train_df = pd.read_csv(os.path.join(data_dir, 'train.csv'))
valid_df = pd.read_csv(os.path.join(data_dir, 'valid.csv'))

In [5]:
# Replacing all the NaN values in train and validation dataframes with 0.0 for the 14 labels
label_columns = list(train_df.iloc[:5, 5:].columns)
train_df.loc[:, label_columns] = train_df.loc[:, label_columns].replace(to_replace = np.nan, value = 0.0)

valid_df.loc[:, label_columns] = valid_df.loc[:, label_columns].replace(to_replace = np.nan, value= 0.0)

In [6]:
# Replacing uncertain labels with positive labels
def uncertain_to_pos(df):
    df.replace(to_replace = -1.0, value = 1.0, inplace = True)

uncertain_to_pos(train_df)
uncertain_to_pos(valid_df)

In [7]:
train_df.set_index('Path', inplace = True)
valid_df.set_index('Path', inplace = True)

In [9]:
sample_path = [str(next(pathlib.Path(os.path.join(data_dir, 'train')).glob('*/*/*')))]
sample_path

['../input/chexpert-dataset/CheXpert-v1.0-small/train/patient00734/study3/view2_lateral.jpg']

In [10]:

train_image_paths = list(pathlib.Path(os.path.join(data_dir, 'train')).glob('*/*/*'))
valid_image_paths = list(pathlib.Path(os.path.join(data_dir, 'valid')).glob('*/*/*'))

In [16]:
# train_image_paths = [str(path) for path in train_image_paths]
# valid_image_paths = [str(path) for path in valid_image_paths]

In [15]:
# with open('train_image_paths.txt', 'w') as file:
#     file.write(str(train_image_paths))
# with open('valid_image_paths.txt', 'w') as file:
#     file.write(str(valid_image_paths))

In [53]:
with open('train_image_paths.txt', 'r') as file:
    train_image_paths = file.readline()
with open('valid_image_paths.txt', 'r') as file:
    valid_image_paths = file.readline()

In [54]:
import ast
train_image_paths = ast.literal_eval(train_image_paths)
valid_image_paths = ast.literal_eval(valid_image_paths)

In [49]:
import random

In [55]:
random.shuffle(train_image_paths)

In [56]:
train_image_paths[:10]

['../input/chexpert-dataset/CheXpert-v1.0-small/train/patient18217/study3/view1_frontal.jpg',
 '../input/chexpert-dataset/CheXpert-v1.0-small/train/patient05274/study3/view2_lateral.jpg',
 '../input/chexpert-dataset/CheXpert-v1.0-small/train/patient38795/study10/view1_frontal.jpg',
 '../input/chexpert-dataset/CheXpert-v1.0-small/train/patient38752/study5/view1_frontal.jpg',
 '../input/chexpert-dataset/CheXpert-v1.0-small/train/patient49971/study5/view1_frontal.jpg',
 '../input/chexpert-dataset/CheXpert-v1.0-small/train/patient47278/study1/view1_frontal.jpg',
 '../input/chexpert-dataset/CheXpert-v1.0-small/train/patient49886/study2/view1_frontal.jpg',
 '../input/chexpert-dataset/CheXpert-v1.0-small/train/patient52043/study1/view1_frontal.jpg',
 '../input/chexpert-dataset/CheXpert-v1.0-small/train/patient06999/study1/view1_frontal.jpg',
 '../input/chexpert-dataset/CheXpert-v1.0-small/train/patient34975/study11/view1_frontal.jpg']

In [68]:
# train_ds = tf.data.Dataset.list_files(str(data_dir/'train/*/*/*'), shuffle=True)
# test_ds = tf.data.Dataset.list_files(str(data_dir/'valid/*/*/*'), shuffle=True)
train_ds = tf.data.Dataset.from_tensor_slices(train_image_paths)
test_ds = tf.data.Dataset.from_tensor_slices(valid_image_paths)

In [69]:
image_count = tf.data.experimental.cardinality(train_ds).numpy()

In [70]:
val_size = int(image_count * 0.5)
train_final_ds = train_ds.skip(val_size)
val_ds = train_ds.take(val_size)

In [27]:
main_df = pd.concat([train_df, valid_df], axis = 0)

In [28]:
def process_path(file_path):
    parts = list(tf.strings.split(file_path, os.path.sep).numpy()[-5:])
    parts = [i.decode() for i in parts]
    min_path = '/'.join(parts)
    label = tf.cast(list(main_df.loc[min_path,label_columns]), dtype = tf.int16)
    
    # Loading the image
    img = tf.io.read_file(file_path)
    img = tf.io.decode_jpeg(img, channels = 1)
    # Resizing the image
    img = tf.image.resize(img, [320,320])
#     img = tf.cast(img, dtype = tf.int16)
    img = tf.keras.applications.mobilenet.preprocess_input(img)
    return img, label

In [71]:
train_final_ds = train_final_ds.map(lambda x: tf.py_function(func = process_path,inp = [x], Tout = (tf.float32, tf.int16)), num_parallel_calls = tf.data.AUTOTUNE)
valid_ds = val_ds.map(lambda x: tf.py_function(func = process_path,inp = [x], Tout = (tf.float32, tf.int16)), num_parallel_calls = tf.data.AUTOTUNE)
test_ds = test_ds.map(lambda x: tf.py_function(func = process_path,inp = [x], Tout = (tf.float32, tf.int16)), num_parallel_calls = tf.data.AUTOTUNE)

In [72]:
batch_size = 16
train = train_final_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
valid = valid_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

test = test_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [31]:
# Learning Rate scheduler
learning_rate_scheduler = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.0001, 
                                              decay_steps=1000,
                                              decay_rate = 0.95,
                                             )

In [32]:
input_height, input_width = (320,320)

In [62]:
baseline = tf.keras.applications.EfficientNetB5(include_top=False,
weights = None, input_shape = (input_height, input_width, 1))

In [64]:
efficientNetB5 = tf.keras.models.Sequential([
    baseline,
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(14, activation = 'sigmoid')
])
efficientNetB5.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb5 (Functional)  (None, 10, 10, 2048)      28512659  
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 5, 5, 2048)        0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 51200)             0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 51200)             0         
_________________________________________________________________
dense_3 (Dense)              (None, 14)                716814    
Total params: 29,229,473
Trainable params: 29,056,734
Non-trainable params: 172,739
_________________________________________________________________


In [65]:
optimizer=tf.keras.optimizers.Adam(learning_rate_scheduler,)

efficientNetB5.compile(optimizer=optimizer, loss = tf.keras.losses.CategoricalCrossentropy(), metrics = [tf.keras.metrics.AUC()])

In [66]:
checkpoint_path = './checkpoints/train/efficientNetB5/320px'

In [67]:
early_stopping = tf.keras.callbacks.EarlyStopping(min_delta = 0.001, patience = 3, restore_best_weights=True)
# reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(factor = 0.1, patience = 1)
checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, store_best_only = True)
callbacks = [early_stopping, checkpoint]

In [None]:
efficientNetB5.fit(train,validation_data=valid, epochs = 20, callbacks=callbacks, verbose = 1)

Epoch 1/20