Install Dependencies

In [1]:
# Install Dependencies
!pip install tensorflow
!pip install matplotlib
!pip install tqdm
!pip install flask
!pip install opencv-python

Collecting matplotlib
  Downloading matplotlib-3.8.3-cp312-cp312-win_amd64.whl.metadata (5.9 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.2.0-cp312-cp312-win_amd64.whl.metadata (5.8 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.49.0-cp312-cp312-win_amd64.whl.metadata (162 kB)
     ---------------------------------------- 0.0/162.3 kB ? eta -:--:--
     -- ------------------------------------- 10.2/162.3 kB ? eta -:--:--
     -- ------------------------------------- 10.2/162.3 kB ? eta -:--:--
     -- ------------------------------------- 10.2/162.3 kB ? eta -:--:--
     ------- ----------------------------- 30.7/162.3 kB 262.6 kB/s eta 0:00:01
     -------------- ---------------------- 61.4/162.3 kB 363.1 kB/s eta 0:00:01
     --------------------- --------------- 92.2/162.3 kB 435.7 kB/s eta 0:00:01
     ----------------

Extract Dataset

In [3]:
# Local Dataset Path
DATASET = "C:\\Users\\rupes\\Downloads\\archive.zip"

# Extract Dataset with Progress Bar
import zipfile
from tqdm import tqdm  # Import tqdm for the progress bar

# Replace "/content/drive/MyDrive/deepfakeimgdetection/dataset.zip" with the local path
with zipfile.ZipFile(DATASET, 'r') as zip_ref:
    # Get the total number of files in the zip archive for progress tracking
    num_files = len(zip_ref.namelist())
    
    # Use tqdm to create a progress bar
    with tqdm(total=num_files, desc='Extracting', unit=' files') as pbar:
        for file in zip_ref.namelist():
            zip_ref.extract(file, "C:\\Users\\rupes\\Downloads")
            pbar.update(1)  # Update progress bar


Extracting: 100%|██████████| 190335/190335 [05:30<00:00, 575.09 files/s]


Verify Dataset

In [5]:
import os

# Local Dataset Path
data_dir = "C:\\Users\\rupes\\Downloads\\Dataset"

# Local subfolders
subfolders = ["Fake", "Real"]

def setup_directory_and_count_files(directory, subfolders):
    total_files = 0
    for subfolder in subfolders:
        subfolder_path = os.path.join(directory, subfolder)
        total_files += sum([1 for file in os.listdir(subfolder_path) if os.path.isfile(os.path.join(subfolder_path, file))])
    return total_files

train_dir = os.path.join(data_dir, "Train")
test_dir = os.path.join(data_dir, "Test")
validation_dir = os.path.join(data_dir, "Validation")

total_train_files = setup_directory_and_count_files(train_dir, subfolders)
total_test_files = setup_directory_and_count_files(test_dir, subfolders)
total_validation_files = setup_directory_and_count_files(validation_dir, subfolders)

total = total_train_files + total_test_files + total_validation_files
train_perc = (total_train_files / total) * 100
test_perc = (total_test_files / total) * 100
valid_perc = (total_validation_files / total) * 100
print("Total Train Files:", total_train_files)
print("Total Test Files:", total_test_files)
print("Total Validation Files:", total_validation_files)
print(f"Train Data Percentage: {train_perc:.2f}%")
print(f"Test Data Percentage: {test_perc:.2f}%")
print(f"Validation Data Percentage: {valid_perc:.2f}%")

Total Train Files: 140002
Total Test Files: 10905
Total Validation Files: 39428
Train Data Percentage: 73.56%
Test Data Percentage: 5.73%
Validation Data Percentage: 20.72%


Model Training

In [7]:
import matplotlib.pyplot as plt
import numpy as np
import os
import cv2
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Activation, Conv2D, Dense, Flatten, MaxPooling2D
from tensorflow.keras.models import Sequential

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

data_dir = "C:\\Users\\rupes\\Downloads\\Dataset"
train_dir = os.path.join(data_dir, "Train")
test_dir = os.path.join(data_dir, "Test")
validation_dir = os.path.join(data_dir, "Validation")
subfolders = ["Fake", "Real"]

def normalize_image(image, labels):
    image = tf.cast(image, tf.float32) / 255.0
    return image, labels

BATCH_SIZE = 32
IMG_SIZE = (256, 256)

train_data = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    label_mode='categorical',
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE
).map(normalize_image).prefetch(buffer_size=tf.data.AUTOTUNE)

validation_data = tf.keras.preprocessing.image_dataset_from_directory(
    validation_dir,
    label_mode='categorical',
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE
).map(normalize_image).prefetch(buffer_size=tf.data.AUTOTUNE)

test_data = tf.keras.preprocessing.image_dataset_from_directory(
    test_dir,
    label_mode='categorical',
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    shuffle=False
).map(normalize_image).prefetch(buffer_size=tf.data.AUTOTUNE)

model_CNN = Sequential([
    Conv2D(filters=8, kernel_size=3, padding='same', input_shape=(256, 256, 3)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.BatchNormalization(),
    Activation('relu'),

    Conv2D(filters=16, kernel_size=4, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),

    Conv2D(filters=32, kernel_size=3, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.BatchNormalization(),

    Conv2D(filters=64, kernel_size=2, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.BatchNormalization(),

    Conv2D(filters=128, kernel_size=1, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),

    Flatten(),
    tf.keras.layers.Dropout(0.3),
    Dense(units=64, activation='relu'),
    Dense(units=20, activation='relu'),
    Dense(units=2, activation='softmax')
])

model_CNN.compile(optimizer=tf.keras.optimizers.Adam(),
                loss='categorical_crossentropy',
                metrics=['accuracy'])

epoch_num = 50  # Define the number of epochs
hist = model_CNN.fit(train_data,
                    epochs=epoch_num,
                    validation_data=validation_data,
                    validation_steps=int(0.5 * len(validation_data))
                    )

Found 140002 files belonging to 2 classes.
Found 39428 files belonging to 2 classes.
Found 10905 files belonging to 2 classes.


  super().__init__(


Epoch 1/50
[1m 692/4376[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m24:54[0m 406ms/step - accuracy: 0.6840 - loss: 0.6170

KeyboardInterrupt: 

Save the Trained Model

In [None]:
# @title Save the Trained Model
svpth = "C:\\Users\\tejas\\Documents\\COLLEGE\\SEMESTER_5\\EDI_DeepFake_ImageDetection\\Model"
# @markdown Save Model As:
model_name = "dfimgdet50ep"  # @param {type:"string"}
modelwext = model_name + ".h"
save_path = svpth + modelwext
model_CNN.save(save_path)