In [1]:
# vgg19_transfer_classifier.py
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import os
from pathlib import Path

In [2]:
# --- Config ---

TRAIN_DIR = r"E:\AI and ML\AI and ML projects\COVID_19_Chest_X_ray_Classifier\Artifacts\10_13_2025_14_38\data_transformation\transformed\train"
VAL_DIR   = r"E:\AI and ML\AI and ML projects\COVID_19_Chest_X_ray_Classifier\Artifacts\10_13_2025_14_38\data_transformation\transformed\validation"
TEST_DIR  = r"E:\AI and ML\AI and ML projects\COVID_19_Chest_X_ray_Classifier\Artifacts\10_13_2025_14_38\data_transformation\transformed\test"

IMG_SIZE = (224, 224)
BATCH_SIZE = 16
SEED = 42
NUM_CLASSES = 3
CLASS_NAMES = ["COVID", "Normal", "Viral Pneumonia"]
EPOCHS = 12
LEARNING_RATE = 1e-4
MODEL_SAVE_PATH = "vgg19_finetuned_classifier.h5"

AUTOTUNE = tf.data.AUTOTUNE

In [17]:
# --- Data loading (tf.data) ---
def create_datasets(train_dir, val_dir, test_dir, img_size=IMG_SIZE, batch_size=BATCH_SIZE, seed=SEED):
    # Create a TensorFlow dataset from the folder structure for training
    train_ds = tf.keras.preprocessing.image_dataset_from_directory(
        train_dir,          # ✅ Path to the main training folder that contains subfolders for each class
                            # Example:
                            # train/
                            #     COVID/
                            #     Normal/
                            #     Viral Pneumonia/
    
        labels="inferred",  # ✅ Automatically assigns labels based on the subfolder names.
                            # TensorFlow sorts folder names alphabetically → assigns numeric class indices:
                            # 'COVID' → 0, 'Normal' → 1, 'Viral Pneumonia' → 2
    
        label_mode="categorical",  # ✅ Converts numeric labels into one-hot encoded vectors.
                                   # Example: [1, 0, 0] for COVID, [0, 1, 0] for Normal, etc.
                                   # This format works best for multi-class classification with softmax output.
    
        batch_size=batch_size,     # ✅ Groups images into batches of this size for efficient training.
                                   # Example: if batch_size=16, each step gives 16 images + 16 labels.
    
        image_size=img_size,       # ✅ Resizes every image to this target size (height, width),
                                   # regardless of its original size.
                                   # For VGG19, this is usually (224, 224).
    
        shuffle=True,              # ✅ Randomly shuffles the order of the images each epoch
                                   # to improve generalization and prevent learning order bias.
    
        seed=seed,                 # ✅ Random seed for reproducibility of shuffling and splitting.
    )

    

    # read class names BEFORE prefetch
    class_names = train_ds.class_names
    print("Dataset class names (from directory):", class_names)


    # # train_ds is the prefetched dataset
    # for images, labels in train_ds.take(2):     # take 2 batches
    #     # labels is shape (batch_size, num_classes)
    #     for i in range(labels.shape[0]):
    #         one_hot = labels[i].numpy()
    #         idx = int(np.argmax(one_hot))
    #         print(f"Sample {i}: one-hot={one_hot} -> class_index={idx} -> class_name={class_names[idx]}")
    #     print("--- end batch ---")


    
    val_ds = tf.keras.preprocessing.image_dataset_from_directory(
        val_dir,
        labels="inferred",
        label_mode="categorical",
        batch_size=batch_size,
        image_size=img_size,
        shuffle=False,
        seed=seed,
    )

    test_ds = tf.keras.preprocessing.image_dataset_from_directory(
        test_dir,
        labels="inferred",
        label_mode="categorical",
        batch_size=batch_size,
        image_size=img_size,
        shuffle=False,
        seed=seed,
    )

    # Cache + prefetch for performance

    # Prefetch the next batch while the current batch is being processed by the model
    # This speeds up training by ensuring the GPU/CPU never waits for data to load
    
    train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
    test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

    return train_ds, val_ds, test_ds

train_ds, val_ds, test_ds = create_datasets(TRAIN_DIR, VAL_DIR, TEST_DIR)

Found 10564 files belonging to 3 classes.
Dataset class names (from directory): ['COVID', 'Normal', 'Viral Pneumonia']
Found 2262 files belonging to 3 classes.
Found 2267 files belonging to 3 classes.


In [None]:
# vgg19_transfer_classifier.py
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import os
from pathlib import Path

# --- Config ---
DATA_DIR = Path("/path/to/data")   # <-- change to your data root
TRAIN_DIR = DATA_DIR / "train"
VAL_DIR   = DATA_DIR / "val"
TEST_DIR  = DATA_DIR / "test"

IMG_SIZE = (224, 224)
BATCH_SIZE = 16
SEED = 42
NUM_CLASSES = 3
CLASS_NAMES = ["COVID", "Normal", "Viral Pneumonia"]
EPOCHS = 12
LEARNING_RATE = 1e-4
MODEL_SAVE_PATH = "vgg19_finetuned_classifier.h5"

AUTOTUNE = tf.data.AUTOTUNE

# --- Data loading (tf.data) ---
def create_datasets(train_dir, val_dir, test_dir,
                    img_size=IMG_SIZE, batch_size=BATCH_SIZE, seed=SEED):
    train_ds = tf.keras.preprocessing.image_dataset_from_directory(
        train_dir,
        labels="inferred",
        label_mode="categorical",
        batch_size=batch_size,
        image_size=img_size,
        shuffle=True,
        seed=seed,
    )

    val_ds = tf.keras.preprocessing.image_dataset_from_directory(
        val_dir,
        labels="inferred",
        label_mode="categorical",
        batch_size=batch_size,
        image_size=img_size,
        shuffle=False,
        seed=seed,
    )

    test_ds = tf.keras.preprocessing.image_dataset_from_directory(
        test_dir,
        labels="inferred",
        label_mode="categorical",
        batch_size=batch_size,
        image_size=img_size,
        shuffle=False,
        seed=seed,
    )

    # Cache + prefetch for performance
    train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
    test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

    return train_ds, val_ds, test_ds

train_ds, val_ds, test_ds = create_datasets(TRAIN_DIR, VAL_DIR, TEST_DIR)

# Check class names (make sure they match your expected)
print("Dataset class names (from directory):", train_ds.class_names)

# --- Data augmentation (on-the-fly) ---
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.06),
    layers.RandomZoom(0.08),
    layers.RandomContrast(0.08)
], name="data_augmentation")

# --- Preprocessing function for VGG19 ---
preprocess_input = tf.keras.applications.vgg19.preprocess_input

# --- Build model: VGG19 backbone (transfer learning) ---
def build_model(img_size=IMG_SIZE, num_classes=NUM_CLASSES, train_backbone=False, dropout_rate=0.4):
    inputs = layers.Input(shape=(*img_size, 3))
    x = data_augmentation(inputs)                     # augmentation only during training
    x = layers.Lambda(lambda x: preprocess_input(x))(x)

    # Load pretrained VGG19 without top classifier
    base_model = tf.keras.applications.VGG19(
        include_top=False,
        weights="imagenet",
        input_tensor=x
    )

    # Freeze or unfreeze backbone
    base_model.trainable = train_backbone

    x = base_model.output
    x = layers.GlobalAveragePooling2D(name="gap")(x)
    x = layers.Dense(512, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    model = keras.Model(inputs=inputs, outputs=outputs, name="vgg19_transfer")
    return model

# Create model — freeze backbone initially
model = build_model(train_backbone=False)
model.summary()

# --- Compile ---
optimizer = keras.optimizers.Adam(learning_rate=LEARNING_RATE)
model.compile(
    optimizer=optimizer,
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# --- Handle class imbalance (optional): compute class weights from train directory counts ---
def compute_class_weights_from_directory(train_dir, class_names):
    counts = []
    for cname in class_names:
        cdir = Path(train_dir) / cname
        n = sum(1 for _ in cdir.glob("*") if _.is_file())
        counts.append(n)
    total = sum(counts)
    class_weights = {}
    for i, c in enumerate(counts):
        # inverse frequency weighting (Keras expects dictionary mapping class_index -> weight)
        class_weights[i] = total / (len(counts) * max(1, c))
    print("Class counts:", dict(zip(class_names, counts)))
    print("Class weights:", class_weights)
    return class_weights

class_weights = compute_class_weights_from_directory(TRAIN_DIR, CLASS_NAMES)

# --- Callbacks ---
checkpoint_cb = keras.callbacks.ModelCheckpoint(
    "vgg19_best.h5", save_best_only=True, monitor="val_accuracy", mode="max"
)
earlystop_cb = keras.callbacks.EarlyStopping(
    monitor="val_accuracy", patience=4, restore_best_weights=True
)
reduce_lr_cb = keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss", factor=0.5, patience=3, min_lr=1e-7
)

# --- Train (stage 1: train top layers) ---
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=[checkpoint_cb, reduce_lr_cb, earlystop_cb],
    class_weight=class_weights
)

# --- Optional: fine-tune some of the VGG layers (stage 2) ---
# Unfreeze last convolutional blocks and fine-tune with a lower LR
base_model = model.get_layer(index=2)   # base_model we inserted as layer with index 2 (check .summary)
# If not retrieved correctly, find it by name:
# base_model = model.get_layer("vgg19") or inspect model.layers to identify
try:
    base_model.trainable = True
    # Freeze earlier layers, unfreeze last conv blocks
    for layer in base_model.layers:
        if layer.name.startswith("block5") or layer.name.startswith("block4"):
            layer.trainable = True
        else:
            layer.trainable = False

    # Re-compile with lower LR
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=1e-5),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )

    fine_tune_epochs = 6
    total_epochs = EPOCHS + fine_tune_epochs

    history_fine = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=total_epochs,
        initial_epoch=history.epoch[-1] if history.epoch else 0,
        callbacks=[checkpoint_cb, reduce_lr_cb, earlystop_cb],
        class_weight=class_weights
    )
except Exception as e:
    print("Could not fine-tune backbone automatically:", e)

# --- Evaluate on test set ---
test_loss, test_acc = model.evaluate(test_ds)
print(f"Test accuracy: {test_acc:.4f}")

# --- Save final model ---
model.save(MODEL_SAVE_PATH)
print("Saved model to:", MODEL_SAVE_PATH)

# --- Inference helper: classify a single image path ---
from tensorflow.keras.preprocessing import image

def predict_image(model, img_path, img_size=IMG_SIZE, class_names=CLASS_NAMES):
    img = image.load_img(img_path, target_size=img_size)
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    preds = model.predict(x)[0]
    top_idx = np.argmax(preds)
    return {
        "predicted_class": class_names[top_idx],
        "predicted_index": int(top_idx),
        "probabilities": {class_names[i]: float(preds[i]) for i in range(len(class_names))}
    }

# Example usage:
# result = predict_image(model, "/path/to/new_image.jpg")
# print(result)


In [18]:
l = [1,2,3,4,5]

In [19]:
def squar(x):
    return x**2

    

In [22]:
l_s = list(map(squar, l))

In [23]:
l_s

[1, 4, 9, 16, 25]

In [24]:
grater = lambda x : x > 10

In [25]:
a_b_t = filter(grater, l_s)

In [27]:
list(a_b_t)

[16, 25]

In [31]:
l_s.filter(grater, l_s)

AttributeError: 'list' object has no attribute 'filter'