In [1]:
# --- CELL 1: IMPORTS & CONFIG ---
import tensorflow as tf
from tensorflow.keras import layers, models, applications, callbacks, optimizers
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import os
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report

# --- CONFIG ---
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 30
PATIENCE = 3
LEARNING_RATE = 1e-4
FIG_DIR = "figures"
os.makedirs(FIG_DIR, exist_ok=True)


In [2]:
# --- CELL 2: AUGMENTATION & MODEL REGISTRY ---

# Fixed augmentation for training
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomZoom(0.1),
    layers.RandomRotation(0.05),
    layers.RandomBrightness(0.1),
])

# Models to compare
MODEL_REGISTRY = {
    "VGG16": applications.VGG16,
    "ResNet50": applications.ResNet50,
    "InceptionV3": applications.InceptionV3,
    "Xception": applications.Xception,
    "InceptionResNetV2": applications.InceptionResNetV2,
    "NASNetMobile": applications.NASNetMobile,
}


In [3]:
# --- CELL 3: LOAD DATASETS ---

def prepare_dataset(dir_path, augment=False, shuffle=True):
    ds = tf.keras.preprocessing.image_dataset_from_directory(
        dir_path,
        labels="inferred",
        label_mode="binary",
        batch_size=BATCH_SIZE,
        image_size=IMG_SIZE,
        shuffle=shuffle
    )
    ds = ds.map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y))
    return ds

train_ds = prepare_dataset("train", augment=True, shuffle=True)
val_ds = prepare_dataset("val", augment=False, shuffle=False)
test_ds = prepare_dataset("test", augment=False, shuffle=False)

# Optional: speed up pipeline
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

print("Datasets loaded.")


Found 1120 files belonging to 2 classes.
Found 229 files belonging to 2 classes.
Found 237 files belonging to 2 classes.
Datasets loaded.


In [4]:
# --- CELL 4: BUILD MODEL ---

def build_model(model_name):
    base_model = MODEL_REGISTRY[model_name](
        include_top=False,
        weights='imagenet',
        input_shape=(*IMG_SIZE, 3),
        pooling='avg'
    )
    base_model.trainable = False

    inputs = layers.Input(shape=(*IMG_SIZE, 3))
    x = data_augmentation(inputs)
    x = base_model(x, training=False)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = models.Model(inputs, outputs)
    model.compile(
        optimizer=optimizers.Adam(learning_rate=LEARNING_RATE),
        loss='binary_crossentropy',
        metrics=[
            'accuracy',
            tf.keras.metrics.Precision(name="precision"),
            tf.keras.metrics.Recall(name="recall"),
            tf.keras.metrics.AUC(name="auc")
        ]
    )
    return model


In [5]:
# --- CELL 5: CALLBACKS & PLOTS ---

def get_callbacks(model_name):
    return [
        callbacks.EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True),
        callbacks.ModelCheckpoint(f"models/{model_name}_best.keras", save_best_only=True),
        callbacks.CSVLogger(f"logs/{model_name}.csv", append=False)
    ]

def plot_metrics(history, model_name):
    df = pd.DataFrame(history.history)
    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx()
    df[['accuracy', 'val_accuracy']].plot(ax=ax1, style=['-','--'])
    df[['loss', 'val_loss']].plot(ax=ax2, style=['-','--'], color=['tab:red','tab:orange'])
    ax1.set_ylabel('Accuracy')
    ax2.set_ylabel('Loss')
    plt.title(f"{model_name} Accuracy & Loss")
    plt.savefig(f"{FIG_DIR}/{model_name}_acc_loss.png")
    plt.close()

def save_confusion_matrix(y_true, y_pred, model_name):
    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(cm)
    disp.plot(cmap='Blues')
    plt.title(f"{model_name} Confusion Matrix")
    plt.savefig(f"{FIG_DIR}/{model_name}_cm.png")
    plt.close()
    print(classification_report(y_true, y_pred))


In [6]:
# --- CELL 6: RUN MODEL SELECTION ---

def run_model_selection(train_ds, val_ds, test_ds):
    results = []
    for name in MODEL_REGISTRY:
        print(f"\nTraining {name}...")
        model = build_model(name)
        hist = model.fit(
            train_ds,
            validation_data=val_ds,
            epochs=EPOCHS,
            callbacks=get_callbacks(name),
            verbose=2
        )
        plot_metrics(hist, name)

        print("\nEvaluating on test set...")
        y_true = np.concatenate([y for x, y in test_ds], axis=0)
        y_pred_probs = model.predict(test_ds)
        y_pred = (y_pred_probs > 0.5).astype(int).flatten()
        save_confusion_matrix(y_true, y_pred, name)

        scores = model.evaluate(test_ds, verbose=0)
        results.append((name, *scores))

    df = pd.DataFrame(results, columns=["Model", "Loss", "Accuracy", "Precision", "Recall", "AUC"])
    df.to_csv("model_selection_results.csv", index=False)
    print("\nAll model evaluations complete. Summary:")
    print(df.sort_values("Accuracy", ascending=False))


In [7]:
# --- CELL 7: LAUNCH COMPARISON ---

run_model_selection(train_ds, val_ds, test_ds)



Training VGG16...


FileNotFoundError: [Errno 2] No such file or directory: 'logs/VGG16.csv'