In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
import cv2
import pywt
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

def load_images_from_folder(folder, target_size=(224, 224), max_images=None):
    """
    Load images from a folder with memory-efficient approach
    
    Args:
        folder (str): Path to image folder
        target_size (tuple): Resize images to this size
        max_images (int, optional): Limit number of images loaded
    
    Returns:
        tuple: Images and corresponding labels
    """
    images = []
    labels = []
    
    # List all image files
    image_files = [f for f in os.listdir(folder) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tif'))]
    
    # Limit images if specified
    if max_images:
        image_files = image_files[:max_images]
    
    for filename in image_files:
        img_path = os.path.join(folder, filename)
        try:
            # Read image
            img = cv2.imread(img_path)
            
            # Check if image was read successfully
            if img is not None:
                # Resize image
                img = cv2.resize(img, target_size)
                
                # Convert BGR to RGB
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                
                # Normalize
                img = img / 255.0
                
                images.append(img)
        except Exception as e:
            print(f"Error processing {img_path}: {e}")
    
    return np.array(images)

def load_casia2_dataset(dataset_path, max_images_per_class=None):
    """
    Load CASIA2 dataset with memory efficiency
    
    Args:
        dataset_path (str): Path to CASIA2 dataset
        max_images_per_class (int, optional): Limit images per class
    
    Returns:
        tuple: Images and labels
    """
    # Load authentic and tampered images
    authentic_images = load_images_from_folder(
        os.path.join(dataset_path, 'Au'), 
        max_images=max_images_per_class
    )
    tampered_images = load_images_from_folder(
        os.path.join(dataset_path, 'Tp'), 
        max_images=max_images_per_class
    )
    
    # Create labels
    authentic_labels = np.zeros(len(authentic_images))
    tampered_labels = np.ones(len(tampered_images))
    
    # Combine images and labels
    images = np.concatenate([authentic_images, tampered_images])
    labels = np.concatenate([authentic_labels, tampered_labels])
    
    return images, labels

def create_hybrid_model(input_shape, learning_rate=0.0001):
    """
    Create hybrid MobileNetV2 model for binary forgery detection
    
    Args:
        input_shape (tuple): Input image shape
        learning_rate (float): Initial learning rate
    
    Returns:
        tensorflow.keras.Model: Compiled hybrid model
    """
    # Base MobileNetV2 model
    base_model = MobileNetV2(
        input_shape=input_shape, 
        include_top=False, 
        weights='imagenet'
    )
    
    # Freeze base model layers
    base_model.trainable = False
    
    # Add custom classification layers
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.3)(x)
    output = Dense(1, activation='sigmoid')(x)
    
    # Create model
    model = Model(inputs=base_model.input, outputs=output)
    
    # Compile model
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC()]
    )
    
    return model

def train_model(X_train, X_test, y_train, y_test, input_shape):
    """
    Train and evaluate the model
    
    Args:
        X_train (numpy.ndarray): Training images
        X_test (numpy.ndarray): Test images
        y_train (numpy.ndarray): Training labels
        y_test (numpy.ndarray): Test labels
        input_shape (tuple): Input image shape
    
    Returns:
        dict: Training results and metrics
    """
    # Create model
    model = create_hybrid_model(input_shape)
    
    # Data augmentation
    train_datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True
    )
    
    # Callbacks
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss', 
        factor=0.2,
        patience=3, 
        min_lr=0.00001
    )
    
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    )
    
    # Model checkpoint
    model_checkpoint = ModelCheckpoint(
        'best_casia2_model.keras',
        save_best_only=True, 
        monitor='val_accuracy',
        mode='max'
    )
    
    # Train model
    history = model.fit(
        train_datagen.flow(X_train, y_train, batch_size=16),
        validation_data=(X_test, y_test),
        epochs=20,
        callbacks=[reduce_lr, early_stopping, model_checkpoint]
    )
    
    # Evaluate model
    test_loss, test_accuracy, test_auc = model.evaluate(X_test, y_test)
    
    # Predictions
    y_pred_proba = model.predict(X_test).flatten()
    y_pred = (y_pred_proba > 0.5).astype(int)
    
    # Print classification report
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=['Authentic', 'Tampered']))
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=['Authentic', 'Tampered'], 
                yticklabels=['Authentic', 'Tampered'])
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.tight_layout()
    plt.savefig('confusion_matrix.png')
    plt.close()
    
    return {
        'test_loss': test_loss,
        'test_accuracy': test_accuracy,
        'test_auc': test_auc
    }

def main():
    # Set random seed
    np.random.seed(42)
    tf.random.set_seed(42)
    
    # Dataset path (MODIFY THIS)
    dataset_path = '/path/to/CASIA2/dataset'
    
    # Load dataset (limit images to prevent memory issues)
    images, labels = load_casia2_dataset(dataset_path, max_images_per_class=500)
    
    # Print dataset info
    print(f"Total images: {len(images)}")
    print(f"Authentic images: {np.sum(labels == 0)}")
    print(f"Tampered images: {np.sum(labels == 1)}")
    
    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(
        images, labels, test_size=0.2, random_state=42, stratify=labels
    )
    
    # Input shape
    input_shape = (224, 224, 3)
    
    # Train and evaluate
    results = train_model(X_train, X_test, y_train, y_test, input_shape)
    
    # Print results
    print("\nFinal Test Metrics:")
    print(f"Test Loss: {results['test_loss']}")
    print(f"Test Accuracy: {results['test_accuracy']}")
    print(f"Test AUC: {results['test_auc']}")

if __name__ == '__main__':
    main()

In [7]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

def load_images_from_folder(folder, target_size=(224, 224), max_images=None):
    """ Load images from a folder efficiently """
    images = []
    image_files = [f for f in os.listdir(folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    if max_images:
        image_files = image_files[:max_images]
    
    for filename in image_files:
        img_path = os.path.join(folder, filename)
        try:
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, target_size)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = img / 255.0  # Normalize
                images.append(img)
        except Exception as e:
            print(f"Error processing {img_path}: {e}")
    
    return np.array(images)

def load_casia2_dataset(dataset_path, max_images_per_class=None):
    """ Load CASIA2 dataset """
    authentic_images = load_images_from_folder(os.path.join(dataset_path, 'Au'), max_images=max_images_per_class)
    tampered_images = load_images_from_folder(os.path.join(dataset_path, 'Tp'), max_images=max_images_per_class)
    authentic_labels = np.zeros(len(authentic_images))
    tampered_labels = np.ones(len(tampered_images))
    images = np.concatenate([authentic_images, tampered_images])
    labels = np.concatenate([authentic_labels, tampered_labels])
    return images, labels

def create_optimized_model(input_shape, learning_rate=0.0001):
    """ Create an optimized MobileNetV2 model """
    base_model = MobileNetV2(input_shape=input_shape, include_top=False, weights='imagenet')
    base_model.trainable = False  # Freeze base layers initially
    
    # Custom top layers
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)  # Add Batch Normalization
    x = Dropout(0.5)(x)           # Increase dropout for regularization
    output = Dense(1, activation='sigmoid')(x)
    
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.AUC()])
    return model

def train_model(X_train, X_test, y_train, y_test, input_shape):
    """ Train the optimized model """
    model = create_optimized_model(input_shape)
    
    # Data Augmentation with more variations
    train_datagen = ImageDataGenerator(
        rotation_range=30,       # Larger rotation
        width_shift_range=0.3,   
        height_shift_range=0.3,
        shear_range=0.3,         # Increased transformations
        zoom_range=0.3,
        horizontal_flip=True,
        vertical_flip=True,      # Include vertical flip
        fill_mode='nearest'
    )
    
    # Callbacks
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)
    early_stopping = EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)
    model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_accuracy', mode='max')
    
    # Train model
    history = model.fit(
        train_datagen.flow(X_train, y_train, batch_size=32),  # Increase batch size
        validation_data=(X_test, y_test),
        epochs=30,  # Train for longer
        callbacks=[reduce_lr, early_stopping, model_checkpoint]
    )
    
    # Fine-tuning: Unfreeze base model and train
    model.trainable = True
    model.compile(optimizer=Adam(learning_rate=1e-5),  # Lower learning rate
                  loss='binary_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.AUC()])
    history_fine = model.fit(
        train_datagen.flow(X_train, y_train, batch_size=32),
        validation_data=(X_test, y_test),
        epochs=10,
        callbacks=[reduce_lr, early_stopping]
    )
    
    return model

def main():
    # Set random seeds for reproducibility
    np.random.seed(42)
    tf.random.set_seed(42)
    
    # Dataset path (MODIFY THIS)
    dataset_path = './CASIA2/'
    
    # Load dataset
    images, labels = load_casia2_dataset(dataset_path, max_images_per_class=1000)
    X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42, stratify=labels)
    
    # Train and fine-tune the model
    input_shape = (224, 224, 3)
    model = train_model(X_train, X_test, y_train, y_test, input_shape)
    
    # Evaluate model
    y_pred_proba = model.predict(X_test).flatten()
    y_pred = (y_pred_proba > 0.5).astype(int)
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=['Authentic', 'Tampered']))

if __name__ == '__main__':
    main()


Epoch 1/30


  self._warn_if_super_not_called()


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 691ms/step - accuracy: 0.4789 - auc_3: 0.4961 - loss: 1.0406 - val_accuracy: 0.5925 - val_auc_3: 0.6262 - val_loss: 0.6751 - learning_rate: 1.0000e-04
Epoch 2/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 764ms/step - accuracy: 0.5658 - auc_3: 0.5923 - loss: 0.8671 - val_accuracy: 0.6300 - val_auc_3: 0.6849 - val_loss: 0.6447 - learning_rate: 1.0000e-04
Epoch 3/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 654ms/step - accuracy: 0.6306 - auc_3: 0.6810 - loss: 0.7494 - val_accuracy: 0.6500 - val_auc_3: 0.7173 - val_loss: 0.6227 - learning_rate: 1.0000e-04
Epoch 4/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 631ms/step - accuracy: 0.6439 - auc_3: 0.6936 - loss: 0.7544 - val_accuracy: 0.6575 - val_auc_3: 0.7163 - val_loss: 0.6447 - learning_rate: 1.0000e-04
Epoch 5/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 751ms/step - accuracy: 0.66

In [13]:
import os
import numpy as np
import cv2
from PIL import Image, ImageChops, ImageEnhance
import pywt
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input, Conv2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, f1_score

def generate_ela_image(image_path, quality=90):
    """
    Generate ELA image to highlight tampering artifacts.
    
    Args:
        image_path (str): Path to the original image.
        quality (int): Quality level for compression (JPEG).

    Returns:
        np.ndarray: ELA image as uint8.
    """
    original = Image.open(image_path).convert('RGB')
    # Save as JPEG with specific quality
    temp_path = 'temp_compressed.jpg'
    original.save(temp_path, 'JPEG', quality=quality)
    
    # Reload compressed image
    compressed = Image.open(temp_path)
    
    # Calculate the difference
    ela_image = ImageChops.difference(original, compressed)
    
    # Enhance the difference for better visibility
    extrema = ela_image.getextrema()
    max_diff = max([ex[1] for ex in extrema]) or 1
    scale = 255.0 / max_diff
    ela_image = ImageEnhance.Brightness(ela_image).enhance(scale)
    
    # Convert to numpy and ensure uint8
    return np.array(ela_image).astype(np.uint8)


def extract_wavelet_noise(image_array):
    """
    Extract noise using Discrete Wavelet Transform (DWT).
    
    Args:
        image_array (np.ndarray): Input image as a numpy array.

    Returns:
        np.ndarray: Reconstructed noise image as uint8.
    """
    # Convert to grayscale
    gray = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)
    gray = gray / 255.0  # Normalize to [0, 1]

    # Perform 2D wavelet transform
    coeffs = pywt.wavedec2(gray, 'haar', level=2)
    
    # Zero out approximation coefficients to keep only noise
    coeffs_H = list(coeffs)
    coeffs_H[0] = np.zeros_like(coeffs_H[0])
    
    # Reconstruct image from modified coefficients
    noise = pywt.waverec2(coeffs_H, 'haar')
    
    # Clip and rescale to uint8
    noise = np.clip(noise * 255.0, 0, 255).astype(np.uint8)
    return noise


def preprocess_image(image_path, target_size=(224, 224)):
    """
    Preprocess image by combining ELA and wavelet noise.
    
    Args:
        image_path (str): Path to the image.
        target_size (tuple): Resize target dimensions.
    
    Returns:
        np.ndarray: Combined image with ELA and Wavelet Noise as 3 channels.
    """
    # Read and resize image
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, target_size).astype(np.uint8)
    
    # Generate ELA image
    ela_image = generate_ela_image(image_path, quality=90)
    ela_image_resized = cv2.resize(ela_image, target_size)
    
    # Extract wavelet noise
    wavelet_noise = extract_wavelet_noise(img)
    wavelet_noise_resized = cv2.resize(wavelet_noise, target_size)
    
    # Combine original, ELA, and wavelet noise into 3 channels
    combined = np.stack([img[:, :, 0], ela_image_resized[:, :, 0], wavelet_noise_resized], axis=-1)
    return combined.astype(np.float32) / 255.0  # Normalize to [0, 1]


def load_casia2_dataset_with_preprocessing(dataset_path, target_size=(224, 224), max_images=None):
    """
    Load CASIA2 dataset with ELA + Wavelet Noise preprocessing.

    Args:
        dataset_path (str): Path to the CASIA2 dataset.
        target_size (tuple): Resize dimensions.
        max_images (int): Maximum images per class.

    Returns:
        tuple: Preprocessed images and labels.
    """
    images, labels = [], []
    for label, folder in enumerate(['Au', 'Tp']):  # Authentic = 0, Tampered = 1
        folder_path = os.path.join(dataset_path, folder)
        files = os.listdir(folder_path)[:max_images]
        
        for file in files:
            image_path = os.path.join(folder_path, file)
            try:
                preprocessed_img = preprocess_image(image_path, target_size)
                images.append(preprocessed_img)
                labels.append(label)
            except Exception as e:
                print(f"Error processing {image_path}: {e}")
    
    return np.array(images), np.array(labels)


def create_mobilenetv2_model(input_shape, learning_rate=0.0001):
    """ MobileNetV2 model with custom input shape (3 channels). """
    base_model = MobileNetV2(input_shape=input_shape, include_top=False, weights='imagenet')
    base_model.trainable = False
    
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.AUC()])
    return model



def main():
    dataset_path = './CASIA22'  # Update path
    X, y = load_casia2_dataset_with_preprocessing(dataset_path, max_images=1000)
    
    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
    input_shape = (224, 224, 3)  # Updated to 9 channels
    
    # Compute class weights
    class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
    class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}
    
    # Data augmentation
    train_datagen = ImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        brightness_range=[0.8, 1.2]
    )
    
    train_generator = train_datagen.flow(X_train, y_train, batch_size=16)
    
    # Create model
    model = create_mobilenetv2_model(input_shape)
    
    # Initial Training with Frozen Layers
    model.fit(train_generator, epochs=10, validation_data=(X_test, y_test), class_weight=class_weights_dict)
    
    # Unfreeze the base model for fine-tuning
    model.layers[1].trainable = True  # Unfreeze MobileNetV2 base model
    model.compile(optimizer=Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.AUC()])
    
    # Fine-tune
    model.fit(train_generator, epochs=20, validation_data=(X_test, y_test), class_weight=class_weights_dict)

    # Evaluate model
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    print(classification_report(y_test, y_pred, target_names=['Authentic', 'Tampered']))
    print(f"F1-Score: {f1_score(y_test, y_pred)}")


if __name__ == '__main__':
    main()


KeyboardInterrupt: 

In [1]:
import os
import numpy as np
import cv2
from PIL import Image, ImageChops, ImageEnhance
from multiprocessing import Pool, cpu_count
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.optimizers import Adam


# --- ELA Image Generation ---
def generate_ela_image(image_path, quality=90):
    """
    Generate an ELA image to highlight tampering artifacts.
    """
    original = Image.open(image_path).convert("RGB")
    temp_path = "temp_compressed.jpg"
    original.save(temp_path, "JPEG", quality=quality)
    compressed = Image.open(temp_path)
    ela_image = ImageChops.difference(original, compressed)
    extrema = ela_image.getextrema()
    max_diff = max([ex[1] for ex in extrema]) or 1
    scale = 255.0 / max_diff
    ela_image = ImageEnhance.Brightness(ela_image).enhance(scale)
    return np.array(ela_image).astype(np.uint8)


# --- Wavelet Noise Extraction ---
import pywt

def extract_wavelet_noise(image_array):
    """
    Extract noise using Discrete Wavelet Transform (DWT).
    """
    gray = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY) / 255.0
    coeffs = pywt.wavedec2(gray, "haar", level=2)
    coeffs_H = list(coeffs)
    coeffs_H[0] = np.zeros_like(coeffs_H[0])  # Zero out approximation coefficients
    noise = pywt.waverec2(coeffs_H, "haar")
    noise = np.clip(noise * 255.0, 0, 255).astype(np.uint8)
    return noise


# --- Preprocessing ---
def preprocess_image(image_path, target_size=(224, 224)):
    """
    Preprocess image by combining ELA and Wavelet Noise.
    """
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_resized = cv2.resize(img, target_size)
    
    ela_image = generate_ela_image(image_path)
    ela_resized = cv2.resize(ela_image, target_size)
    
    wavelet_noise = extract_wavelet_noise(img)
    wavelet_resized = cv2.resize(wavelet_noise, target_size)
    
    combined = np.stack([img_resized[:, :, 0], ela_resized[:, :, 0], wavelet_resized], axis=-1)
    return combined.astype(np.float32) / 255.0  # Normalize


def preprocess_image_wrapper(args):
    return preprocess_image(*args)


def load_casia2_dataset(dataset_path, target_size=(224, 224), max_images=None):
    """
    Load CASIA2 dataset with parallel preprocessing.
    """
    images, labels = [], []
    for label, folder in enumerate(["Au", "Tp"]):
        folder_path = os.path.join(dataset_path, folder)
        files = os.listdir(folder_path)[:max_images]
        file_paths = [(os.path.join(folder_path, file), target_size) for file in files]
        
        with Pool(cpu_count()) as p:
            preprocessed_images = p.map(preprocess_image_wrapper, file_paths)
        
        images.extend(preprocessed_images)
        labels.extend([label] * len(preprocessed_images))
    
    return np.array(images), np.array(labels)


# --- Model Definition ---
def create_mobilenetv2_model(input_shape, learning_rate=0.0001):
    """
    Create MobileNetV2 model for binary classification.
    """
    base_model = MobileNetV2(input_shape=input_shape, include_top=False, weights="imagenet")
    base_model.trainable = False  # Freeze base model layers
    
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation="relu")(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation="sigmoid")(x)
    
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss="binary_crossentropy",
                  metrics=["accuracy", tf.keras.metrics.AUC()])
    return model


# --- Main Training Pipeline ---
def main():
    dataset_path = "./CASIA22"  # Path to the CASIA2 dataset
    target_size = (224, 224)
    max_images = 1000  # Limit the number of images per class
    
    # Load and preprocess dataset
    print("Loading and preprocessing dataset...")
    X, y = load_casia2_dataset(dataset_path, target_size, max_images)
    
    # Train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
    
    # Create model
    input_shape = (224, 224, 3)
    print("Creating model...")
    model = create_mobilenetv2_model(input_shape)
    
    # Train model
    print("Training model...")
    history = model.fit(X_train, y_train, batch_size=16, epochs=10, validation_data=(X_test, y_test))
    
    # Evaluate model
    print("Evaluating model...")
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    print(classification_report(y_test, y_pred, target_names=["Authentic", "Tampered"]))
    
    # Save model
    model.save("mobilenetv2_casia2.h5")
    print("Model saved as mobilenetv2_casia2.h5")


if __name__ == "__main__":
    main()


Loading and preprocessing dataset...


Process SpawnPoolWorker-1:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/pool.py", line 114, in worker
    task = get()
           ^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/queues.py", line 369, in get
    return _ForkingPickler.loads(res)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute 'preprocess_image_wrapper' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>
Process SpawnPoolWorker-3:
Traceback (most recent call last):
Process SpawnPoolWorker-4:
Process SpawnPoolWorker-5:
Traceback (most recent call last)

KeyboardInterrupt: 