In [None]:
import os
import numpy as np
import pandas as pd
from glob import glob
from itertools import chain
from tensorflow.keras import layers
from sklearn.metrics import roc_curve, auc, roc_auc_score, accuracy_score, average_precision_score
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import tensorflow as tf

In [None]:
!pip install gdown

In [None]:
!gdown --id 1qRIdvUWiNxdAXXeEoyOMya-NuES6D-Px

**Fine Tune on chexpert**

In [None]:
path = "../input/chexpert-dataset/"

train_df = pd.read_csv('../input/modified-chexpert/modifiedv2_train.csv')
valid_df = pd.read_csv('../input/modified-chexpert/modifiedv2_valid.csv')
train_df["path"] = path + train_df["Path"]
valid_df["path"] = path + valid_df["Path"]

dfs = [train_df, valid_df]
all_xray_df = pd.concat(dfs)
all_xray_df.sample(3)

In [None]:
# all_xray_df.drop("No Finding", axis=1, inplace=True)
all_xray_df.columns

In [None]:
all_xray_df['Finding Labels'] = all_xray_df['Finding Labels'].fillna('')

In [None]:
all_xray_df['Finding Labels'] = all_xray_df['Finding Labels'].map(lambda x: x.replace('No Finding', ''))

In [None]:
all_labels = ['Atelectasis'
, 'Consolidation'
, 'Infiltration'
, 'Pneumothorax'
, 'Edema'
, 'Emphysema'
, 'Fibrosis'
, 'Pleural Effusion'
, 'Mass'
, 'Pneumonia'
, 'Pleural_thickening'
, 'Cardiomegaly'
, 'Nodule Mass'
, 'Hernia'
, 'Enlarged Cardiom'
, 'Lung Lesion'
, 'Lung Opacity'
, 'Pleural Other'
,'Fracture']

print('All Labels ({}): {}'.format(len(all_labels), all_labels))

In [None]:
for c_label in all_labels:
    if len(c_label)>1: # leave out empty labels
        all_xray_df[c_label] = all_xray_df['Finding Labels'].map(lambda finding: 1.0 if c_label in finding else 0)
all_xray_df.sample(3)

In [None]:
all_xray_df.head()

In [None]:
train_df, valid_df = train_test_split(all_xray_df, test_size=0.20, random_state=2018, stratify=all_xray_df['Finding Labels'].map(lambda x: x[:4]))

In [None]:
train_df['labels'] = train_df.apply(lambda x: x['Finding Labels'].split('|'), axis=1)
valid_df['labels'] = valid_df.apply(lambda x: x['Finding Labels'].split('|'), axis=1)

In [None]:
DATA_DIR = '../input/chexpert-dataset/CheXpert-v1.0-small/train'
image_size = 256
batch_size = 32

In [None]:
core_idg = ImageDataGenerator(rescale=1 / 255,
                                  samplewise_center=True,
                                  samplewise_std_normalization=True,
                                  horizontal_flip=True,
                                  vertical_flip=False,
                                  height_shift_range=0.05,
                                  width_shift_range=0.1,
                                  rotation_range=5,
                                  shear_range=0.1,
                                  fill_mode='reflect',
                                  zoom_range=0.15)

train_gen = core_idg.flow_from_dataframe(dataframe=train_df,
                                             directory=None,
                                             x_col='path',
                                             y_col='labels',
                                             class_mode='categorical',
                                             batch_size=batch_size,
                                             classes=all_labels,
                                             target_size=(image_size, image_size))

valid_gen = core_idg.flow_from_dataframe(dataframe=valid_df,
                                             directory=None,
                                             x_col='path',
                                             y_col='labels',
                                             class_mode='categorical',
                                             batch_size=batch_size,
                                             classes=all_labels,
                                             target_size=(image_size, image_size))

test_X, test_Y = next(core_idg.flow_from_dataframe(dataframe=valid_df,
                                                       directory=None,
                                                       x_col='path',
                                                       y_col='labels',
                                                       class_mode='categorical',
                                                       batch_size=1024,
                                                       classes=all_labels,
                                                       target_size=(image_size, image_size)))

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
weight_path="{}sigmoid_activation_inceptionresnetv2_chexpert.h5".format('xray_class')

checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, 
                             save_best_only=True, mode='min')

early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=3)
callbacks_list = [checkpoint, early]

In [None]:
for x, y in train_gen:
    print(x.shape)
    break

In [None]:
nihmodel = tf.keras.models.load_model('./sigmoid_activation_inceptionresnetv2.h5')

In [None]:
# del newnihmodel

In [None]:
base_inputs = nihmodel.layers[0].input
base_outputs = nihmodel.layers[-2].output
dense_1 = layers.Dense(500, activation="relu")(base_outputs)
output_layer = layers.Dense(19, activation="sigmoid")(dense_1)

newnihmodel = tf.keras.Model(inputs = base_inputs, outputs = output_layer)
newnihmodel.compile(
    optimizer = tf.keras.optimizers.Adam(),
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics = ["binary_accuracy", "mae"],
)

In [None]:
newnihmodel.summary()

In [None]:
newnihmodel.fit(train_gen, 
                            steps_per_epoch=100,
                            validation_data = (test_X, test_Y), 
                            epochs = 30, 
                            callbacks = callbacks_list)