In [2]:
import os
import tensorflow as tf
import numpy as np
import cv2
import glob
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix
from google.colab import drive
import matplotlib.pyplot as plt
import seaborn as sns

# --- 1. SETUP AND MOUNT GOOGLE DRIVE ---
drive.mount('/content/drive')
print("Google Drive mounted successfully. ✅")

BASE_PATH = '/content/drive/My Drive/Spoof_data'
ORIGINAL_TRAIN_DIR = os.path.join(BASE_PATH, 'train')
ORIGINAL_TEST_DIR = os.path.join(BASE_PATH, 'test')

IMG_WIDTH, IMG_HEIGHT = 128, 128
CHANNELS = 3
BATCH_SIZE = 32
EPOCHS = 5 # REDUCED EPOCHS from 10 to 5 for faster execution.
CLASS_NAMES = ['live', 'spoof']

if not os.path.exists(ORIGINAL_TRAIN_DIR) or not os.path.exists(ORIGINAL_TEST_DIR):
    raise FileNotFoundError(f"Check your Google Drive path. Train or Test folder missing in {BASE_PATH}")

# --- 2. IMAGE PREPROCESSING FUNCTIONS ---
HE_SUFFIX = '_he'
BLUR_SUFFIX = '_blur'
SHARPEN_SUFFIX = '_sharpen'
DATASETS = {}

def ensure_directory_exists(path):
    if not os.path.exists(path):
        os.makedirs(path)

def process_image(input_path, output_path, technique_name):
    img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return
    processed_img = img.copy()

    if technique_name == "HE":
        processed_img = cv2.equalizeHist(img)
    elif technique_name == "BLUR":
        processed_img = cv2.GaussianBlur(img, (5, 5), 0)
    elif technique_name == "SHARPEN":
        kernel = np.array([[-1, -1, -1],
                           [-1,  9, -1],
                           [-1, -1, -1]])
        processed_img = cv2.filter2D(img, -1, kernel)

    processed_img_3channel = cv2.cvtColor(processed_img, cv2.COLOR_GRAY2BGR)
    cv2.imwrite(output_path, processed_img_3channel)

def run_preprocessing_pipeline(base_input_dir, base_output_dir, technique, technique_name):
    print(f"\n--- Starting {technique_name} Processing for {os.path.basename(base_input_dir)} data ---")
    live_input = os.path.join(base_input_dir, "live")
    spoof_input = os.path.join(base_input_dir, "spoof")
    live_output = os.path.join(base_output_dir, "live")
    spoof_output = os.path.join(base_output_dir, "spoof")
    ensure_directory_exists(live_output)
    ensure_directory_exists(spoof_output)

    image_paths = []
    for ext in ['jpg','jpeg','png','bmp']:
        image_paths.extend(glob.glob(os.path.join(live_input, f"*.{ext}")))
        image_paths.extend(glob.glob(os.path.join(spoof_input, f"*.{ext}")))

    for input_path in tqdm(image_paths, desc=f"Applying {technique_name}"):
        class_folder = os.path.basename(os.path.dirname(input_path))
        filename = os.path.basename(input_path)
        output_folder = live_output if class_folder=="live" else spoof_output
        output_path = os.path.join(output_folder, filename)
        process_image(input_path, output_path, technique)
    print(f"Successfully processed and saved files to {base_output_dir}")

# --- 3. CNN MODEL ---
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def create_spoof_detection_cnn(input_shape, final_dropout=0.5):
    model = Sequential([
        Conv2D(32,(3,3),activation='relu',input_shape=input_shape,padding='same'),
        Conv2D(32,(3,3),activation='relu',padding='same'),
        BatchNormalization(),
        MaxPooling2D((2,2)),
        Dropout(0.25),

        Conv2D(64,(3,3),activation='relu',padding='same'),
        Conv2D(64,(3,3),activation='relu',padding='same'),
        BatchNormalization(),
        MaxPooling2D((2,2)),
        Dropout(0.25),

        Flatten(),
        Dense(512,activation='relu'),
        BatchNormalization(),
        Dropout(final_dropout),
        Dense(1,activation='sigmoid')
    ])
    return model



# --- 5. TRAIN AND EVALUATE ---
def train_and_evaluate_model(run_name, train_dir, test_dir, learning_rate, dropout_rate):
    """Trains and evaluates the model, returning all necessary data for later visualization."""
    tf.keras.backend.clear_session()
    print(f"\n=== RUN: {run_name} | LR={learning_rate:.1e}, Dropout={dropout_rate:.2f} ===")

    train_datagen = ImageDataGenerator(
        rescale=1./255, rotation_range=20, width_shift_range=0.1,
        height_shift_range=0.1, horizontal_flip=True
    )
    test_datagen = ImageDataGenerator(rescale=1./255)

    train_generator = train_datagen.flow_from_directory(
        train_dir, target_size=(IMG_WIDTH,IMG_HEIGHT), batch_size=BATCH_SIZE,
        class_mode='binary', color_mode='rgb', classes=CLASS_NAMES, shuffle=True
    )
    test_generator = test_datagen.flow_from_directory(
        test_dir, target_size=(IMG_WIDTH,IMG_HEIGHT), batch_size=BATCH_SIZE,
        class_mode='binary', color_mode='rgb', classes=CLASS_NAMES, shuffle=False
    )

    input_shape = (IMG_WIDTH, IMG_HEIGHT, CHANNELS)
    model = create_spoof_detection_cnn(input_shape, final_dropout=dropout_rate)
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy', metrics=['accuracy'])

    STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
    STEP_SIZE_TEST = test_generator.n // test_generator.batch_size

    history = model.fit(
        train_generator,
        steps_per_epoch=STEP_SIZE_TRAIN,
        epochs=EPOCHS,
        validation_data=test_generator,
        validation_steps=STEP_SIZE_TEST+1,
        verbose=1
    )

    loss, accuracy = model.evaluate(test_generator, steps=STEP_SIZE_TEST+1, verbose=0)
    predictions = model.predict(test_generator, verbose=0)
    y_pred_classes = (predictions>0.5).astype(int).flatten()
    y_true = test_generator.classes
    if len(y_true)>len(y_pred_classes): y_true = y_true[:len(y_pred_classes)]

    print(f"\nTest Loss: {loss:.4f} | Test Accuracy: {accuracy*100:.2f}%")

    # NOTE: Visualization is now called only for the best model in the main block (section 6)
    return accuracy, history, y_true, y_pred_classes

# --- 6. MAIN EXECUTION ---
if __name__ == "__main__":
    preprocessing_configs = [("HE", HE_SUFFIX), ("BLUR", BLUR_SUFFIX), ("SHARPEN", SHARPEN_SUFFIX)]

    # Generate processed datasets
    for technique, suffix in preprocessing_configs:
        train_out = os.path.join(BASE_PATH,f'train{suffix}')
        run_preprocessing_pipeline(ORIGINAL_TRAIN_DIR, train_out, technique, technique)
        DATASETS[f'{technique}_train'] = train_out

        test_out = os.path.join(BASE_PATH,f'test{suffix}')
        run_preprocessing_pipeline(ORIGINAL_TEST_DIR, test_out, technique, technique)
        DATASETS[f'{technique}_test'] = test_out

    DATASETS['Original'] = (ORIGINAL_TRAIN_DIR, ORIGINAL_TEST_DIR)

    # REDUCED GRID: 4 combinations (4 datasets * 4 combinations = 16 total runs)
    tuning_grid = {
        'learning_rates':[1e-3, 1e-4], # Reduced from 3 to 2
        'dropout_rates':[0.4, 0.6]     # Reduced from 3 to 2
    }
    results = []

    for dataset_name in ['Original','HE','Blur','Sharpen']:
        if dataset_name=='Original':
            TRAIN_DIR, TEST_DIR = ORIGINAL_TRAIN_DIR, ORIGINAL_TEST_DIR
        else:
            suffix = {'HE':HE_SUFFIX,'Blur':BLUR_SUFFIX,'Sharpen':SHARPEN_SUFFIX}[dataset_name]
            TRAIN_DIR = os.path.join(BASE_PATH,f'train{suffix}')
            TEST_DIR = os.path.join(BASE_PATH,f'test{suffix}')

        for lr in tuning_grid['learning_rates']:
            for dr in tuning_grid['dropout_rates']:
                run_name = f"{dataset_name}_LR={lr:.1e}_DO={dr:.2f}"

                # Call train function and capture all data
                acc, history, y_true, y_pred_classes = train_and_evaluate_model(
                    run_name, TRAIN_DIR, TEST_DIR, lr, dr
                )

                results.append({
                    'dataset': dataset_name,
                    'lr': lr,
                    'dropout': dr,
                    'accuracy': acc,
                    'run_name': run_name,
                    'history': history,
                    'y_true': y_true,
                    'y_pred_classes': y_pred_classes
                })

    # --- Summary ---
    print("\n\n=== FINAL COMBINED TUNING & PREPROCESSING SUMMARY ===")
    sorted_results = sorted(results, key=lambda x:x['accuracy'], reverse=True)
    print("Rank | Dataset   | LR     | Dropout | Test Accuracy")
    print("-"*65)

    for i,r in enumerate(sorted_results):
        print(f"{i+1:<4} | {r['dataset']:<9} | {r['lr']:.1e} | {r['dropout']:.2f} | {r['accuracy']*100:.2f}%")

    # --- VISUALIZE THE BEST MODEL (Rank 1) ---
    best_run = sorted_results[0]
    print(f"\n\n--- VISUALIZING BEST MODEL: {best_run['run_name']} ---")
    visualize_results(
        best_run['history'],
        best_run['y_true'],
        best_run['y_pred_classes'],
        best_run['run_name']
    )
    print("\nTraining and analysis complete. The visualization above shows the detailed performance of the best combination of preprocessing technique and hyperparameters.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive mounted successfully. ✅

--- Starting HE Processing for train data ---


Applying HE: 100%|██████████| 407/407 [00:11<00:00, 34.51it/s]


Successfully processed and saved files to /content/drive/My Drive/Spoof_data/train_he

--- Starting HE Processing for test data ---


Applying HE: 100%|██████████| 400/400 [00:11<00:00, 34.05it/s]


Successfully processed and saved files to /content/drive/My Drive/Spoof_data/test_he

--- Starting BLUR Processing for train data ---


Applying BLUR: 100%|██████████| 407/407 [00:10<00:00, 40.17it/s]


Successfully processed and saved files to /content/drive/My Drive/Spoof_data/train_blur

--- Starting BLUR Processing for test data ---


Applying BLUR: 100%|██████████| 400/400 [00:10<00:00, 38.01it/s]


Successfully processed and saved files to /content/drive/My Drive/Spoof_data/test_blur

--- Starting SHARPEN Processing for train data ---


Applying SHARPEN: 100%|██████████| 407/407 [00:11<00:00, 34.01it/s]


Successfully processed and saved files to /content/drive/My Drive/Spoof_data/train_sharpen

--- Starting SHARPEN Processing for test data ---


Applying SHARPEN: 100%|██████████| 400/400 [00:11<00:00, 33.50it/s]


Successfully processed and saved files to /content/drive/My Drive/Spoof_data/test_sharpen

=== RUN: Original_LR=1.0e-03_DO=0.40 | LR=1.0e-03, Dropout=0.40 ===
Found 407 images belonging to 2 classes.
Found 400 images belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 1s/step - accuracy: 0.6906 - loss: 0.8760 - val_accuracy: 0.5000 - val_loss: 1.6294
Epoch 2/5
[1m 1/12[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 63ms/step - accuracy: 0.7500 - loss: 0.4500



[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 248ms/step - accuracy: 0.7500 - loss: 0.4500 - val_accuracy: 0.5000 - val_loss: 1.5991
Epoch 3/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 640ms/step - accuracy: 0.8476 - loss: 0.3436 - val_accuracy: 0.5000 - val_loss: 1.7536
Epoch 4/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 327ms/step - accuracy: 0.9062 - loss: 0.2911 - val_accuracy: 0.5000 - val_loss: 1.7869
Epoch 5/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 616ms/step - accuracy: 0.9048 - loss: 0.2350 - val_accuracy: 0.5000 - val_loss: 1.7041

Test Loss: 1.7041 | Test Accuracy: 50.00%

=== RUN: Original_LR=1.0e-03_DO=0.60 | LR=1.0e-03, Dropout=0.60 ===
Found 407 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
Epoch 1/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.6

NameError: name 'visualize_results' is not defined