#Imports

In [None]:
import os
import zipfile
from PIL import Image
import numpy as np
import csv

In [None]:
import cv2
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, TimeDistributed, Flatten, Bidirectional, Reshape
from tensorflow.keras.layers import LSTM, BatchNormalization, Dropout, Dense, GlobalAveragePooling1D
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

#Kaggle Dataset Download

In [None]:
!kaggle datasets download -d manjilkarki/deepfake-and-real-images

Dataset URL: https://www.kaggle.com/datasets/manjilkarki/deepfake-and-real-images
License(s): unknown
Downloading deepfake-and-real-images.zip to /content
 99% 1.66G/1.68G [00:07<00:00, 215MB/s]
100% 1.68G/1.68G [00:07<00:00, 250MB/s]


In [None]:
def image_to_numpy(image_file):
    image = Image.open(image_file)
    image = np.array(image)
    image = cv2.resize(image, (128, 128))
    return image

def convert_images_in_zip(zip_path, folder_name):
    image_arrays = []

    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        for filename in zip_ref.namelist():
            if filename.startswith(folder_name) and filename.endswith(".jpg"):
                with zip_ref.open(filename) as image_file:
                    image_array = image_to_numpy(image_file)
                    image_arrays.append(image_array)
                    print(f"Converted {filename} to NumPy array with shape {image_array.shape}")

    return np.array(image_arrays)

In [None]:
zip_path = 'deepfake-and-real-images.zip'

#Dataset Load

In [None]:
train_real = convert_images_in_zip(zip_path, 'Dataset/Train/Real/')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Converted Dataset/Train/Real/real_685.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Real/real_6850.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Real/real_68500.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Real/real_68501.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Real/real_68502.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Real/real_68503.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Real/real_68504.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Real/real_68505.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Real/real_68506.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Real/real_68507.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Real/real_68508.jpg to NumPy array with shape (128, 128, 3)
Converted Data

In [None]:
train_fake = convert_images_in_zip(zip_path, 'Dataset/Train/Fake/')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Converted Dataset/Train/Fake/fake_685.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Fake/fake_6850.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Fake/fake_68500.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Fake/fake_68501.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Fake/fake_68502.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Fake/fake_68503.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Fake/fake_68504.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Fake/fake_68505.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Fake/fake_68506.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Fake/fake_68507.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Train/Fake/fake_68508.jpg to NumPy array with shape (128, 128, 3)
Converted Data

In [None]:
test_real = convert_images_in_zip(zip_path, 'Dataset/Test/Real/')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Converted Dataset/Test/Real/real_137.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Real/real_1370.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Real/real_1371.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Real/real_1372.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Real/real_1373.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Real/real_1374.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Real/real_1375.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Real/real_1376.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Real/real_1377.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Real/real_1378.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Real/real_1379.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Real/real_1

In [None]:
test_fake = convert_images_in_zip(zip_path, 'Dataset/Test/Fake/')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Converted Dataset/Test/Fake/fake_1440.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Fake/fake_1441.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Fake/fake_1442.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Fake/fake_1443.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Fake/fake_1444.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Fake/fake_1445.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Fake/fake_1446.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Fake/fake_1447.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Fake/fake_1448.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Fake/fake_1449.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Fake/fake_145.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Test/Fake/fake_1

In [None]:
val_real = convert_images_in_zip(zip_path, 'Dataset/Validation/Real/')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Converted Dataset/Validation/Real/real_5499.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Real/real_55.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Real/real_550.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Real/real_5500.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Real/real_5501.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Real/real_5502.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Real/real_5503.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Real/real_5504.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Real/real_5505.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Real/real_5506.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Real/real_5507.jpg to NumPy a

In [None]:
val_fake = convert_images_in_zip(zip_path, 'Dataset/Validation/Fake/')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Converted Dataset/Validation/Fake/fake_5499.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Fake/fake_55.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Fake/fake_550.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Fake/fake_5500.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Fake/fake_5501.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Fake/fake_5502.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Fake/fake_5503.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Fake/fake_5504.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Fake/fake_5505.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Fake/fake_5506.jpg to NumPy array with shape (128, 128, 3)
Converted Dataset/Validation/Fake/fake_5507.jpg to NumPy a

In [None]:
print(f"Training data(real image) array shape: {train_real.shape}")
print(f"Training data(fake image) array shape: {train_fake.shape}\n" )
print(f"\nValidation data(real image) array shape: {val_real.shape}")
print(f"Validation data(fake image) array shape: {val_fake.shape}\n")
print(f"\nTest data(real image) array shape: {test_real.shape}")
print(f"Test data(fake image) array shape: {test_fake.shape}")

Training data(real image) array shape: (70001, 128, 128, 3)
Training data(fake image) array shape: (70001, 128, 128, 3)


Validation data(real image) array shape: (19787, 128, 128, 3)
Validation data(fake image) array shape: (19641, 128, 128, 3)


Test data(real image) array shape: (5413, 128, 128, 3)
Test data(fake image) array shape: (5492, 128, 128, 3)


#Train, Test, Val

In [None]:
y_train_real = np.ones(train_real.shape[0])
y_train_fake = np.zeros(train_fake.shape[0])
y_val_real = np.ones(val_real.shape[0])
y_val_fake = np.zeros(val_fake.shape[0])
y_test_real = np.ones(test_real.shape[0])
y_test_fake = np.zeros(test_fake.shape[0])

In [None]:
print(f"Training data(real image) label shape: {y_train_real.shape}")
print(f"Training data(fake image) label shape: {y_train_fake.shape}\n" )
print(f"\nValidation data(real image) label shape: {y_val_real.shape}")
print(f"Validation data(fake image) label shape: {y_val_fake.shape}\n")
print(f"\nTest data(real image) label shape: {y_test_real.shape}")
print(f"Test data(fake image) label shape: {y_test_fake.shape}")

Training data(real image) label shape: (70001,)
Training data(fake image) label shape: (70001,)


Validation data(real image) label shape: (19787,)
Validation data(fake image) label shape: (19641,)


Test data(real image) label shape: (5413,)
Test data(fake image) label shape: (5492,)


In [None]:
X_train = np.vstack((train_real, train_fake))
y_train = np.hstack((y_train_real, y_train_fake))
X_val = np.vstack((val_real, val_fake))
y_val = np.hstack((y_val_real, y_val_fake))
X_test = np.vstack((test_real, test_fake))
y_test = np.hstack((y_test_real, y_test_fake))

In [None]:
print(f"Training data array shape: {X_train.shape}")
print(f"Training data label shape: {y_train.shape}\n" )
print(f"\nValidation data array shape: {X_val.shape}")
print(f"Validation data label shape: {y_val.shape}\n")
print(f"\nTest data array shape: {X_test.shape}")
print(f"Test data label shape: {y_test.shape}")

Training data array shape: (140002, 128, 128, 3)
Training data label shape: (140002,)


Validation data array shape: (39428, 128, 128, 3)
Validation data label shape: (39428,)


Test data array shape: (10905, 128, 128, 3)
Test data label shape: (10905,)


#Model Architecture

In [None]:
base_model = MobileNetV2(include_top=False, weights='imagenet', input_shape=(128, 128, 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_128_no_top.h5


In [None]:
input_tensor = Input(shape=(128, 128, 3))

x = base_model(input_tensor)
x = TimeDistributed(Flatten())(x)


x = Bidirectional(LSTM(128, return_sequences=True))(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)

x = Dense(128, activation='relu', kernel_regularizer=l2(0.001))(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)

x = Dense(128, activation='relu', kernel_regularizer=l2(0.001))(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)

x = GlobalAveragePooling1D()(x)
output = Dense(1, activation='sigmoid', kernel_regularizer=l2(0.001))(x)

model = Model(inputs=input_tensor, outputs=output)

In [None]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 128, 128, 3)]     0         
                                                                 
 mobilenetv2_1.00_128 (Func  (None, 4, 4, 1280)        2257984   
 tional)                                                         
                                                                 
 time_distributed_1 (TimeDi  (None, 4, 5120)           0         
 stributed)                                                      
                                                                 
 bidirectional_1 (Bidirecti  (None, 4, 256)            5374976   
 onal)                                                           
                                                                 
 batch_normalization_3 (Bat  (None, 4, 256)            1024      
 chNormalization)                                          

In [None]:
optimizer = Adam(learning_rate=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

#Model Training

In [None]:
history = model.fit(X_train, y_train, epochs=4, verbose=1,validation_data=(X_val, y_val), shuffle = True)
#history = model.fit(X_train, y_train, batch_size=8, epochs=50, verbose=1, validation_data = (X_val, y_val))

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
   8/4376 [..............................] - ETA: 22:19 - loss: 0.0540 - accuracy: 0.9688

KeyboardInterrupt: 

#Save Model

In [None]:
model.save_weights('weights_full_data/model_weights_full_data')

In [None]:
!cp -r /content/weights_full_data/ /content/drive/MyDrive/

In [None]:
model.save('/content/drive/MyDrive/deepfake.h5', save_format='h5')

  saving_api.save_model(


#Example Prediction

In [None]:
image_dir = "/content/drive/MyDrive/fake_107.jpg"

array = image_to_numpy(image_dir)

In [None]:
def predict_deepfake_model(model, image_array, threshold=0.5):
    input_data = np.expand_dims(image_array, axis=0)

    predictions = model.predict(input_data)

    probability = predictions[0][0]

    is_real = probability >= threshold

    return is_real, probability

In [None]:
is_real, probability = predict_deepfake_model(model, array, threshold=0.5)



In [None]:
is_real

False

In [None]:
probability

0.00014057044