In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.metrics import AUC
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.layers.experimental.preprocessing import Rescaling, RandomZoom
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras import layers
import warnings
warnings.filterwarnings('ignore')
from tensorflow.keras.optimizers import SGD
import time
import random
random.seed(42)
import numpy as np
np.random.seed(42)
tf.random.set_seed(42)



def visualize_training_results(history,num_epochs):
    metric = list(history.history.keys())[1]
    train_score = history.history[metric]
    val_score = history.history['val_'+metric]
    
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs_range = range(num_epochs)
    
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, train_score, label='Training '+metric)
    plt.plot(epochs_range, val_score, label='Validation '+metric)
    plt.legend(loc='lower right')
    plt.title('Training and Validation '+metric)
    
    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, train_loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()
    
def baseline_modeler(model, metrics = 'accuracy', optimizer = 'adam', num_epochs=100, early_stopping = None ):
    
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=[metrics])
    start_time = time.time()
    if early_stopping == None:
        history = model.fit(train_ds,
                            epochs=num_epochs,
                            validation_data=val_ds,
                            verbose=0)
    else:
        history = model.fit(train_ds,
                            epochs=num_epochs,
                            validation_data=val_ds,
                            verbose=0,
                            callbacks = [early_stopping])
        
    end_time = time.time()
    duration = end_time - start_time
    print(f"Training time: {duration} seconds")
    print('')
    visualize_training_results(history, num_epochs=int(len(history.history['loss'])))
    print('')
    train_scores = model.evaluate(train_ds)
    val_scores = model.evaluate(val_ds)
    num_metrics = int(len(history.history.keys())/2)
    metrics_names = list(history.history.keys())[:num_metrics]
    diff_scores = [b - a for a, b in zip(train_scores, val_scores)]
    display(pd.DataFrame([train_scores,val_scores,diff_scores],index=['Train','Val','Diff'],columns=metrics_names))
    print('------------------------------')
    print('')

2024-02-24 19:57:14.899910: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
train_dir = "../data/chest_xray/new_train"
test_dir = "../data/chest_xray/new_test"
val_dir = "../data/chest_xray/new_val"
batch_size = 64
image_size=(64,64)
input_shape = image_size + (1,)
train_ds = image_dataset_from_directory(train_dir,
                                        label_mode='binary',
                                        batch_size=batch_size,
                                        image_size=image_size,
                                        color_mode="grayscale")

test_ds = image_dataset_from_directory(test_dir,
                                        label_mode='binary',
                                        batch_size=batch_size,
                                        image_size=image_size,
                                        color_mode="grayscale")

val_ds = image_dataset_from_directory(val_dir,
                                        label_mode='binary',
                                        batch_size=batch_size,
                                        image_size=image_size,
                                        color_mode="grayscale")

train_ds = train_ds.cache().shuffle(buffer_size=1000).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.cache().shuffle(buffer_size=1000).prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.cache().shuffle(buffer_size=1000).prefetch(tf.data.AUTOTUNE)

Found 4684 files belonging to 2 classes.
Found 587 files belonging to 2 classes.
Found 585 files belonging to 2 classes.


In [None]:
metrics=[tf.keras.metrics.AUC(curve = 'PR', name='auc_pr'),
         tf.keras.metrics.AUC(name='auc_'),
         'accuracy',
         tf.keras.metrics.Precision(name='precision'),
         tf.keras.metrics.Recall(name='recall')]
early_stopping = EarlyStopping(monitor='val_loss', 
                               min_delta=0.001, 
                               patience=20, 
                               mode='min', 
                               restore_best_weights=True,
                               verbose=1)

model = Sequential([
    Rescaling(1./255, input_shape=input_shape),
    RandomZoom(0.2),
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.5),  
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='SGD',
                  loss='binary_crossentropy',
                  metrics=metrics)


history = model.fit(train_ds,
                    epochs=100,
                    validation_data=val_ds,
                    verbose=1,
                    callbacks = [early_stopping])
    
train_scores = model.evaluate(train_ds)
val_scores = model.evaluate(val_ds)
num_metrics = int(len(history.history.keys())/2)
metrics_names = list(history.history.keys())[:num_metrics]
diff_scores = [b - a for a, b in zip(train_scores, val_scores)]
display(pd.DataFrame([train_scores,val_scores,diff_scores],index=['Train','Val','Diff'],columns=metrics_names))
print('------------------------------')
print('')

In [None]:
metrics=[tf.keras.metrics.AUC(curve = 'PR', name='auc_pr'),
         tf.keras.metrics.AUC(name='auc_'),
         'accuracy',
         tf.keras.metrics.Precision(name='precision'),
         tf.keras.metrics.Recall(name='recall')]
early_stopping = EarlyStopping(monitor='val_loss', 
                               min_delta=0.0001, 
                               patience=20, 
                               mode='min', 
                               restore_best_weights=True,
                               verbose=1)

In [None]:
# Baseline, no initializers
model = Sequential([
    
    Rescaling(1./255, input_shape= input_shape),
    
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu'),
    
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'adam', num_epochs=50, early_stopping = None )

In [None]:
# Same model with he_normal initializer
# Best initializer based on val_loss value, despite overfitting for all
model = Sequential([
    
    Rescaling(1./255, input_shape= input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'adam', num_epochs=50, early_stopping = None )

In [None]:
# lecun_normal initializer

model = Sequential([
    
    Rescaling(1./255, input_shape= input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='lecun_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='lecun_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='lecun_normal'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='lecun_normal'),
    
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'adam', num_epochs=50, early_stopping = None )

In [None]:
# SGD optimizer with he_normal initializer
# Beat out adam by a good amount and killed rmsprop

model = Sequential([
    
    Rescaling(1./255, input_shape= input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=50, early_stopping = None )

In [None]:
# RMSprop optimizer with he_normal initializer
model = Sequential([
    
    Rescaling(1./255, input_shape= input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'rmsprop', num_epochs=50, early_stopping = None )

In [None]:
# l1 regularizer, SGD optimizer, he_normal initializer

model = Sequential([
    
    Rescaling(1./255, input_shape= input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal',kernel_regularizer=l1(0.01)),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal',kernel_regularizer=l1(0.01)),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal',kernel_regularizer=l1(0.01)),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal',kernel_regularizer=l1(0.01)),
    
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=50, early_stopping = None )

In [None]:
# l2 regularizer, SGD optimizer, he_normal initializer

model = Sequential([
    
    Rescaling(1./255, input_shape= input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal',kernel_regularizer=l2(0.01)),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal',kernel_regularizer=l2(0.01)),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal',kernel_regularizer=l2(0.01)),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal',kernel_regularizer=l2(0.01)),
    
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=50, early_stopping = None )

In [None]:
model = Sequential([
    Rescaling(1./255, input_shape=input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    Dropout(0.2),  # Example dropout after the first conv layer
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    Dropout(0.3),  # Slightly higher dropout after the second conv layer
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    Dropout(0.4),  # Even higher dropout after the third conv layer
    MaxPooling2D((2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=300, early_stopping = early_stopping)

In [None]:
# Dropout after dense
# SGD, he_normal
early_stopping = EarlyStopping(monitor='val_loss', 
                               min_delta=0.001, 
                               patience=10, 
                               mode='min', 
                               restore_best_weights=True,
                               verbose=1)
model = Sequential([
    Rescaling(1./255, input_shape=input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.5),  
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=300, early_stopping = early_stopping)

In [None]:
# Dropout after pooling
# SGD, he_normal
model = Sequential([
    Rescaling(1./255, input_shape=input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),  # Adding dropout after pooling
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),  # Repeating pattern for consistency
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),  # Maintaining dropout after pooling
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=300, early_stopping = early_stopping)

In [None]:
model = Sequential([
    Rescaling(1./255, input_shape=input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=300, early_stopping = early_stopping )

In [None]:
# Increasing Filter Size in Conv2D Layers
model = Sequential([
    Rescaling(1./255, input_shape=input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    Dropout(0.1),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Dropout(0.1),
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Dropout(0.1),
    Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Dropout(0.1),
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=100, early_stopping = early_stopping)

In [None]:
# Varying Kernel Size
model = Sequential([
    Rescaling(1./255, input_shape=input_shape),
    
    Conv2D(32, (5, 5), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (5, 5), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (5, 5), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.5),  # Adding dropout before the final dense layer
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=200, early_stopping = early_stopping )

In [None]:
# Adjusting Units in Dense Layers
model = Sequential([
    Rescaling(1./255, input_shape=input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Flatten(),

    Dense(256, activation='relu', kernel_initializer='he_normal'),#
    Dropout(0.5), 
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=200, early_stopping = early_stopping )

In [None]:
# Experimenting with Dropout Rates
model = Sequential([
    Rescaling(1./255, input_shape=input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Flatten(),

    Dense(128, activation='relu', kernel_initializer='he_normal'),#
    Dropout(0.3), 
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=200, early_stopping = early_stopping )

In [None]:
# Adding an additional Conv2D + MaxPooling2D block
model = Sequential([
    Rescaling(1./255, input_shape=input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.5),  # Adding dropout before the final dense layer
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=200, early_stopping = early_stopping )

In [None]:
# Adding an additional Conv2D + MaxPooling2D block
model = Sequential([
    Rescaling(1./255, input_shape=input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Dropout(0.4),  # Adding dropout after new Conv2D block
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.5),  # Adding dropout before the final dense layer
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=200, early_stopping = early_stopping )

In [None]:
# Adding an additional Dense layer
model = Sequential([
    Rescaling(1./255, input_shape=input_shape),
    
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.5),
    Dense(64, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.5),
    
    Dense(1, activation='sigmoid')
])
baseline_modeler(model, metrics = metrics, optimizer = 'SGD', num_epochs=200, early_stopping = early_stopping )