**Loading the DataSet**

In [None]:
#As our custom dataset is present in the drive, we first need to mount to drive
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
#Dataset is present in zip file, so extracting the dataset into "Extractedset" Folder
import zipfile
import os

zf = zipfile.ZipFile('/content/drive/MyDrive/Custom_dataset.zip','r')
zf.extractall('./Extractedset/')
zf.close()

**Image Processing**

In [None]:
#Extracting Frames from videos
import cv2
import os

def extract_frames_from_videos(vf, of, interval=20):
    if not os.path.exists(of):
        os.makedirs(of)
    for v in os.listdir(vf):
        if v.endswith('.mp4'):
            v_path = os.path.join(vf, v)
            cap = cv2.VideoCapture(v_path)
            index = 0
            while True:
                ret, frame = cap.read()
                if not ret:
                    break
                if index % interval == 0:
                    frame_filename = f"{os.path.splitext(v)[0]}_frame_{index}.jpg"
                    frame_path = os.path.join(of, frame_filename)
                    cv2.imwrite(frame_path, frame)
                index += 1
            cap.release()

extract_frames_from_videos('/content/Extractedset/Custom_dataset/Real', '/content/Extractedset/Custom_dataset/Real_Frames')
extract_frames_from_videos('/content/Extractedset/Custom_dataset/Fake', '/content/Extractedset/Custom_dataset/Fake_Frames')

In [None]:
#Sizes of sub-folders Real, Fake, Real_Frames, Fake_Frames
import os

def count_items_in_folder(folder_path):
  num_items = len(os.listdir(folder_path))
  return num_items
print("Real : ",count_items_in_folder('/content/Extractedset/Custom_dataset/Real'))
print("Fake: ",count_items_in_folder('/content/Extractedset/Custom_dataset/Fake'))
print("Real Frames : ",count_items_in_folder('/content/Extractedset/Custom_dataset/Real_Frames'))
print("Fake Frames: ",count_items_in_folder('/content/Extractedset/Custom_dataset/Fake_Frames'))

Real :  17
Fake:  17
Real Frames :  318
Fake Frames:  307


**Face Detection and Extraction**

*MTCNN (Multi-task Cascaded Convolutional Networks)*              
*MTCNN is a popular deep learning-based method for face detection, which not only identifies the location of the face in an image but also detects facial landmarks like the eyes, nose, and mouth corners.*

In [None]:
!pip install mtcnn



*Face Detection: MTCNN detects faces in the images.          
Face Extraction: If a face is detected, it crops the face using the bounding box coordinates.*

In [None]:
from mtcnn import MTCNN
import cv2
import os

detector = MTCNN()

def extract_faces(inputf, outputf):
    if not os.path.exists(outputf):
        os.makedirs(inputf)

    for fname in os.listdir(inputf):
      image_path = os.path.join(inputf, fname)
        image = cv2.imread(image_path)

        result = detector.detect_faces(image)

        if result:
            x, y, width, height = result[0]['box']
            face = image[y:y+height, x:x+width]

            cv2.imwrite(os.path.join(outputf, fname), face)

extract_faces('/content/Extractedset/Custom_dataset/Real_Frames','/content/Extractedset/Custom_dataset/Faces_Real')

extract_faces('/content/Extractedset/Custom_dataset/Fake_Frames','/content/Extractedset/Custom_dataset/Faces_Fake')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m


In [None]:
print("Faces_Real : ",count_items_in_folder('/content/Extractedset/Custom_dataset/Faces_Real'))
print("Faces_Fake: ",count_items_in_folder('/content/Extractedset/Custom_dataset/Faces_Fake'))


Faces_Real :  309
Faces_Fake:  299


**Resizing the Images**                                                          

In [None]:
#standardizing the input data before feeding it into the model.
from PIL import Image
import os

def resize_images(IF, OF, size=(224, 224)):
    if not os.path.exists(OF):
        os.makedirs(OF)

    for f in os.listdir(IF):
              if f.endswith('.jpg'):
                img_path = os.path.join(IF, f)
                img = Image.open(img_path)
                img_resized = img.resize(size)
                img_resized.save(os.path.join(OF, f))

resize_images('/content/Extractedset/Custom_dataset/Faces_Real/', '/content/Extractedset/Custom_dataset/Resized_Real', (224, 224))
resize_images('/content/Extractedset/Custom_dataset/Faces_Fake', '/content/Extractedset/Custom_dataset/Resized_Fake', (224, 224))


In [None]:
print("Resized_Real : ",count_items_in_folder('/content/Extractedset/Custom_dataset/Resized_Real'))
print("Resized_Fake: ",count_items_in_folder('/content/Extractedset/Custom_dataset/Resized_Fake'))

Resized_Real :  308
Resized_Fake:  299


**Data Augmentation**

*Data augmentation is a technique used in machine learning and deep learning to artificially expand the size and diversity of a training dataset by applying various transformations to the existing data. This is especially useful in tasks involving images, where augmentation helps create more varied training examples, allowing the model to generalize better and reduce overfitting.*

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rescale=1.0/255.0,#Normalization
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.1
)
train_data = datagen.flow_from_directory(
    '/content/Extractedset/Custom_dataset/Resized',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='training'
)
val_data = datagen.flow_from_directory(
    '/content/Extractedset/Custom_dataset/Resized',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='validation'
)


Found 548 images belonging to 2 classes.
Found 59 images belonging to 2 classes.


**Model Definition and training**

In [None]:
!pip install tensorflow tensorflow-addons


Collecting tensorflow-addons
  Downloading tensorflow_addons-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons)
  Downloading typeguard-2.13.3-py3-none-any.whl.metadata (3.6 kB)
Downloading tensorflow_addons-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (611 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.8/611.8 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons
  Attempting uninstall: typeguard
    Found existing installation: typeguard 4.3.0
    Uninstalling typeguard-4.3.0:
      Successfully uninstalled typeguard-4.3.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
inflect 7.4.0 requires typeguard>=4.0.1, b

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [None]:
base_model = Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

DFM = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
    layer.trainable = False
DFM.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5




In [None]:
# Training the model
history = DFM.fit(
    train_data,
    steps_per_epoch=train_data.samples // 32 ,#batch size
    validation_data=val_data,
    validation_steps=val_data.samples // 32,
    epochs=25
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [None]:
# Evaluating the model
loss, accuracy = DFM.evaluate(val_data)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")



Validation Accuracy: 61.02%


In [None]:
DFM.save('DFM.h5')

In [6]:
!pip install tensorflow==2.12.0 tensorflow-addons==0.19.0



In [7]:
!pip install tensorflow -addons
import tensorflow as tf
from tensorflow.keras.models import load_model
model = tf.keras.models.load_model('/content/DFM.h5')


Usage:   
  pip3 install [options] <requirement specifier> [package-index-options] ...
  pip3 install [options] -r <requirements file> [package-index-options] ...
  pip3 install [options] [-e] <vcs project url> ...
  pip3 install [options] [-e] <local project path> ...
  pip3 install [options] <archive url/path> ...

no such option: -a


In [17]:
import cv2
import numpy as np

def preprocess_frame(f):
    f = cv2.resize(f, (224, 224))
    f = f.astype('float32') / 255.0
    f = np.expand_dims(f, axis=0)
    return f

cap = cv2.VideoCapture('/content/RandomTestData/id6_id3_0003.mp4')

if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

frame_count = 0
predicted_labels = []
true_labels = []
while True:
    ret, frame = cap.read()
    if not ret:
        break
    if frame_count % 20 == 0:
      true_labels.append(0)
      processed_frame = preprocess_frame(frame)
      prediction = model.predict(processed_frame)
      predicted_class = np.argmax(prediction, axis=1)
      predicted_labels.append(predicted_class)
      cv2.putText(frame, f'Predicted: {predicted_class[0]}', (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    frame_count += 1
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




In [15]:
if(predicted_class[0]==0):
  x = "Fake"
else :
  x = "Real"
print(f'Predicted: ', x)

Predicted:  Fake


In [23]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(true_labels[:len(predicted_labels)], predicted_labels))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19

    accuracy                           1.00        19
   macro avg       1.00      1.00      1.00        19
weighted avg       1.00      1.00      1.00        19

