In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Concatenate
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

# Directory paths
train_path = "/kaggle/input/deepfake-and-real-images/Dataset/Train"
validation_path = "/kaggle/input/deepfake-and-real-images/Dataset/Validation"
test_path = "/kaggle/input/deepfake-and-real-images/Dataset/Test"

# Parameters
# reducing the size of image to make the model more efficient (reduce the trianiing time )
# iteration per epochs for batch size of 16 is total images/batch size  
IMAGE_SIZE = (64, 64)
BATCH_SIZE = 16
AUTOTUNE = tf.data.AUTOTUNE

# Load datasets
# labesls inferred assigns value to the directory in the oreder they appear in the file so if the file struct is Trian/ Fake then real then the numbers 
# numbers assigns are 0 to fake and 1 to real
def load_datasets(path, image_size=IMAGE_SIZE, batch_size=BATCH_SIZE):
    return image_dataset_from_directory(
        path,
        labels='inferred',  
        label_mode='binary',
        color_mode='rgb',
        batch_size=batch_size,
        image_size=image_size,
        shuffle=True,
        seed=42
    )

# FFT Feature Extraction
def extract_fft(images):
    # size of tensor [16,64,64,3] for rgb for gray scale [16,64,64,1] for single channel 
    images_gray = tf.image.rgb_to_grayscale(images) 
    images_gray = tf.cast(images_gray, tf.float32)
    fft_features = tf.signal.fft2d(tf.cast(images_gray, tf.complex64))
    return tf.abs(fft_features)  # Take the magnitude of the FFT

# Preprocess dataset and apply FFT
def preprocess_dataset(dataset):
    def preprocess_image(image, label):
        fft_features = extract_fft(image)
        return (image, fft_features), label

    return dataset.map(preprocess_image, num_parallel_calls=AUTOTUNE)

# Load and preprocess datasets
train_dataset = preprocess_dataset(load_datasets(train_path))
val_dataset = preprocess_dataset(load_datasets(validation_path))
test_dataset = preprocess_dataset(load_datasets(test_path))

# Build a custom CNN model for images and FFT features
def create_cnn_model(image_shape, fft_shape):
    # Image input
    image_input = Input(shape=image_shape)
    x = Conv2D(32, (3, 3), activation='relu')(image_input)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Flatten()(x)

    # FFT input
    fft_input = Input(shape=fft_shape)
    z = Conv2D(32, (3, 3), activation='relu')(fft_input)
    z = MaxPooling2D(pool_size=(2, 2))(z)
    z = Conv2D(64, (3, 3), activation='relu')(z)
    z = MaxPooling2D(pool_size=(2, 2))(z)
    z = Conv2D(128, (3, 3), activation='relu')(z)
    z = MaxPooling2D(pool_size=(2, 2))(z)
    z = Flatten()(z)

    # Concatenate image and FFT features
    combined = Concatenate()([x, z])

    # Dense layers for classification
    combined = Dense(128, activation='relu')(combined)
    combined = Dropout(0.5)(combined)
    output = Dense(1, activation='sigmoid')(combined)

    # Model
    model = Model(inputs=[image_input, fft_input], outputs=output)
    return model

# Define input shapes
image_shape = (64, 64, 3)
fft_shape = (64, 64, 1)

# Build the model
cnn_model = create_cnn_model(image_shape, fft_shape)

# Compile the model
cnn_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Callbacks for learning rate reduction and early stopping
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)

# Train the model
history = cnn_model.fit(
    train_dataset.map(lambda x, y: (x, y)),  
    validation_data=val_dataset.map(lambda x, y: (x, y)),
    epochs=5,
    callbacks=[lr_scheduler, early_stopping]
)

# Evaluate the model on the test set
test_loss, test_accuracy = cnn_model.evaluate(test_dataset.map(lambda x, y: (x, y)))
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')
