# I. Import Libraries:

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

import warnings
warnings.filterwarnings('ignore')

sns.set_style("whitegrid")

In [2]:
import tensorflow as tf
print(tf.__version__)

2.2.0-dlenv


# II. Edit data:

In [3]:
DATA_ROOT = '/home/jupyter/lung_x_ray/data/'
PROJECT_FOLDER = '/home/jupyter/lung_x_ray'
MODEL_FOLDER = '/home/jupyter/lung_x_ray/trained_model/model1'

In [65]:
train = pd.read_csv('/home/jupyter/lung_x_ray/data/CheXpert-v1.0-small/Final_list/train_final.csv')
valid = pd.read_csv('/home/jupyter/lung_x_ray/data/CheXpert-v1.0-small/Final_list/valid_final.csv')

train.sample(5)

Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices,img_paths
185641,CheXpert-v1.0-small/train/patient59889/study1/...,Male,87,Frontal,AP,,,,,,,1.0,-1.0,,,,,,,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...
95384,CheXpert-v1.0-small/train/patient28817/study3/...,Female,79,Frontal,PA,,0.0,,,,,0.0,,,0.0,0.0,,1.0,,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...
145700,CheXpert-v1.0-small/train/patient41268/study1/...,Female,65,Frontal,AP,,,,,,,,,1.0,,1.0,,,1.0,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...
151476,CheXpert-v1.0-small/train/patient42968/study4/...,Female,23,Frontal,AP,,,,1.0,,,,,,,1.0,,,1.0,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...
202999,CheXpert-v1.0-small/train/patient13898/study4/...,Male,53,Lateral,,,,,,,,,,,0.0,,,1.0,,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...


In [68]:
train = train[train['Frontal/Lateral'] == 'Frontal']
valid = valid[valid['Frontal/Lateral'] == 'Frontal']

In [70]:
train = train.rename(columns={"img_paths": "image_paths"})
valid = valid.rename(columns={"img_paths": "image_paths"})

In [72]:
train = train[['image_paths','No Finding']]

In [73]:
valid = valid[['image_paths','No Finding']]

In [75]:
train.sample(5)

Unnamed: 0,image_paths,No Finding
203357,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...,
210637,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...,
199132,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...,
195869,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...,
222185,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...,


- Change all NaN (Blank) => 0 same as 'No Finding'



In [76]:
train.fillna(value=0, inplace=True)
valid.fillna(value=0, inplace=True)

In [77]:
train.sample(5)

Unnamed: 0,image_paths,No Finding
211451,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...,0.0
211779,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...,0.0
204838,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...,0.0
217798,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...,0.0
193690,/home/jupyter/lung_x_ray/data/CheXpert-v1.0-sm...,0.0


- Split train, valid dataset:

In [80]:
data_set_change = train.iloc[25935:,:]
train = train.drop(train.index[25935:], axis=0)
valid = pd.concat([valid, data_set_change])

In [None]:
train_img = train['image_paths']
val_img = valid['image_paths']

# III. Create Dataset:

In [89]:
train['No Finding'] = train['No Finding'].astype(str)
valid['No Finding'] = valid['No Finding'].astype(str)

In [90]:
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1. / 255,
    rotation_range=15,
    zoom_range=[0.95, 1.05],
    width_shift_range=0.05,
    height_shift_range=0.05,
    shear_range=0.15,
    fill_mode="constant",
    cval=128
)


train_ds = image_generator.flow_from_dataframe(train,
                                                color_mode='rgb',
                                                 x_col='image_paths',
                                                 y_col='No Finding',
                                                 class_mode='binary',
                                                 target_size=(224,224),
                                                 shuffle=True,
                                                 batch_size=32)

val_ds = image_generator.flow_from_dataframe(valid,
                                                    color_mode='rgb',
                                                    x_col='image_paths',
                                                    y_col='No Finding',
                                                    class_mode='binary',
                                                    target_size=(224,224),
                                                    batch_size=32)


Found 25935 validated image filenames belonging to 2 classes.
Found 6484 validated image filenames belonging to 2 classes.


# IV. Build model

In [95]:
densenet = tf.keras.applications.DenseNet121(input_shape=(224, 224, 3), weights="imagenet", include_top=False)

In [None]:
densenet.trainable=Truedensenet.trainable=True

In [98]:
fine_tune_at = 141

In [99]:
for layer in densenet.layers[:fine_tune_at]:
    layer.trainable = False

In [100]:
model2 = tf.keras.Sequential([
    densenet,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(1, activation = 'sigmoid')])

In [102]:
model2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0001, beta_1 = 0.9, beta_2=0.999),
              loss='binary_crossentropy',
              metrics=[tf.keras.metrics.AUC()])

In [103]:
model2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
densenet121 (Model)          (None, 7, 7, 1024)        7037504   
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 1025      
Total params: 7,038,529
Trainable params: 5,525,249
Non-trainable params: 1,513,280
_________________________________________________________________


In [93]:
#Setting up CheckPoint 
checkpoint_path = os.path.join(MODEL_FOLDER, "frontal_model1_ckpt.h5")
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
# by default it saves the weights every epoch
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_best_only = True,
                                                 save_weights_only=True,
                                                 mornitor = 'val_auc',
                                                 verbose=1)

# V. Training:

In [104]:
TRAIN_SAMPLES = len(train_img)
VAL_SAMPLES = len(val_img)

print (TRAIN_SAMPLES, VAL_SAMPLES)

25935 6484


In [None]:
# Train the model with Train data
steps_per_epoch = len(train_img) // 32
validation_steps = len(val_img) // 32

history = model2.fit(train_ds, 
                    epochs=10,
                    steps_per_epoch = steps_per_epoch,
                    validation_data= val_ds,
                    validation_steps = validation_steps,
                   callbacks=[cp_callback])

In [108]:
model2.save('./trained_model/model1/frontal_model1.h5')