In [1]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!ls /content/drive/MyDrive

 ArtiFact_240K.zip
 best_model_mobilenet.h5
'Colab Notebooks'
'Deepfake Duel: Truth vs. Trickery.ipynb'
 deepfake_mobile_model.h5
'Getting started.pdf'
'Ravi kumar & Uva lakshmi Wedding.mp4'
 Untitled
 Untitled0.ipynb
'Untitled (1)'
 Untitled1.ipynb
'www.TamilRockers.lu - O Kadhal Kanmani (2015) Tamil Itunes Untouched 1080p HD AVC x264 DD 5.1 & 2.0 - 4.9GB.m4v'


In [3]:
!unzip "/content/drive/MyDrive/ArtiFact_240K.zip" -d /content/ArtiFact_240K

Archive:  /content/drive/MyDrive/ArtiFact_240K.zip
replace /content/ArtiFact_240K/ArtiFact_240K/metadata.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [4]:
!pip install tensorflow pandas matplotlib --quiet

In [5]:
import os
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [6]:
class DeepfakeDataGenerator(Sequence):
    def __init__(self, base_dir, batch_size=32, img_size=(299, 299), shuffle=True):
        self.img_size = img_size
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.filepaths, self.labels, self.classes = self._load_data(base_dir)
        self.indexes = np.arange(len(self.filepaths))
        self.class_to_index = {'human_faces': 0, 'animals': 1, 'vehicles': 2}

    def _load_data(self, base_dir):
        filepaths, labels, classes = [], [], []
        for label_dir in ['real', 'fake']:
            label_val = 1 if label_dir == 'real' else 0
            for class_dir in ['human_faces', 'animals', 'vehicles']:
                path = os.path.join(base_dir, label_dir, class_dir)
                if os.path.exists(path):
                    for fname in os.listdir(path):
                        if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
                            filepaths.append(os.path.join(path, fname))
                            labels.append(label_val)
                            classes.append(class_dir)
        return filepaths, labels, classes

    def __len__(self):
        return len(self.filepaths) // self.batch_size

    def on_epoch_end(self):
        if self.shuffle:
            temp = list(zip(self.filepaths, self.labels, self.classes))
            random.shuffle(temp)
            self.filepaths, self.labels, self.classes = zip(*temp)

    def __getitem__(self, idx):
        batch_files = self.filepaths[idx*self.batch_size:(idx+1)*self.batch_size]
        batch_labels = self.labels[idx*self.batch_size:(idx+1)*self.batch_size]
        batch_classes = self.classes[idx*self.batch_size:(idx+1)*self.batch_size]

        X = np.zeros((self.batch_size, *self.img_size, 3), dtype='float32')
        y_label = np.zeros((self.batch_size, 1), dtype='float32')
        y_class = np.zeros((self.batch_size, 3), dtype='float32')

        for i, filepath in enumerate(batch_files):
            img = load_img(filepath, target_size=self.img_size)
            img = img_to_array(img) / 255.0
            X[i] = img
            y_label[i] = batch_labels[i]
            y_class[i, self.class_to_index[batch_classes[i]]] = 1

        return X, {'label_output': y_label, 'class_output': y_class}


In [7]:
input_tensor = Input(shape=(299, 299, 3))
base_model = Xception(include_top=False, weights='imagenet', input_tensor=input_tensor)

x = GlobalAveragePooling2D()(base_model.output)
x = Dense(128, activation='relu')(x)

label_output = Dense(1, activation='sigmoid', name='label_output')(x)
class_output = Dense(3, activation='softmax', name='class_output')(x)

model = Model(inputs=input_tensor, outputs=[label_output, class_output])

model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss={'label_output': 'binary_crossentropy', 'class_output': 'categorical_crossentropy'},
    metrics={'label_output': 'accuracy', 'class_output': 'accuracy'}
)


In [8]:
model.save("deepfake_detector_model.h5")



In [9]:
train_dir = "/content/ArtiFact_240K/ArtiFact_240K/train"
val_dir = "/content/ArtiFact_240K/ArtiFact_240K/validation"

# Use a smaller subset for fast testing
train_gen = DeepfakeDataGenerator(train_dir, batch_size=32)
val_gen = DeepfakeDataGenerator(val_dir, batch_size=32)

# Only use part of the dataset
train_gen.filepaths = train_gen.filepaths[:3000]
val_gen.filepaths = val_gen.filepaths[:600]


In [10]:
history = model.fit(
    train_gen,
    validation_data=val_gen,
    steps_per_epoch=100,
    validation_steps=20,
    epochs=5
)


Epoch 1/5


  self._warn_if_super_not_called()


[1m 93/100[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m6s[0m 908ms/step - class_output_accuracy: 0.9948 - class_output_loss: 0.1711 - label_output_accuracy: 0.9948 - label_output_loss: 0.1508 - loss: 0.3219



[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 928ms/step - class_output_accuracy: 0.9951 - class_output_loss: 0.1626 - label_output_accuracy: 0.9951 - label_output_loss: 0.1437 - loss: 0.3063 - val_class_output_accuracy: 1.0000 - val_class_output_loss: 2.1178e-04 - val_label_output_accuracy: 1.0000 - val_label_output_loss: 2.9569e-04 - val_loss: 5.0747e-04
Epoch 2/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 905ms/step - class_output_accuracy: 1.0000 - class_output_loss: 6.1000e-04 - label_output_accuracy: 1.0000 - label_output_loss: 7.3607e-04 - loss: 0.0013 - val_class_output_accuracy: 1.0000 - val_class_output_loss: 1.6060e-04 - val_label_output_accuracy: 1.0000 - val_label_output_loss: 2.2465e-04 - val_loss: 3.8526e-04
Epoch 3/5
[1m100/100[0m [32m━━━━━━━━━━━

In [11]:
model.save("deepfake_model.h5")




In [12]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Load model (make sure you point to the correct model path if needed)
model = load_model("deepfake_model.h5")

# Set test folder path
test_dir = "/content/ArtiFact_240K/ArtiFact_240K/test"
class_names = ['human_faces', 'animals', 'vehicles']

# Create result list
results = []
image_count = 0

# Loop through test images
for fname in sorted(os.listdir(test_dir)):
    if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
        image_count += 1
        img_path = os.path.join(test_dir, fname)

        # Load and preprocess image
        img = load_img(img_path, target_size=(299, 299))  # Adjust if using MobileNetV2 to (224, 224)
        img_arr = img_to_array(img) / 255.0
        img_arr = np.expand_dims(img_arr, axis=0)

        # Predict
        label_pred, class_pred = model.predict(img_arr, verbose=0)
        label = int(label_pred[0][0] > 0.5)
        class_idx = np.argmax(class_pred[0])
        class_name = class_names[class_idx]

        results.append([fname, label, class_name])
        print(f"[{image_count}] {fname} → Label: {'Real' if label else 'Fake'}, Class: {class_name}")

# Save results
df = pd.DataFrame(results, columns=["image", "label", "class"])
df.to_csv("test.csv", index=False)

# Summary output
print(f"\n✅ Done! Total images tested: {image_count}")
print("📄 test.csv saved.")





[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[7006] test_07006.jpg → Label: Real, Class: human_faces
[7007] test_07007.jpg → Label: Real, Class: human_faces
[7008] test_07008.jpg → Label: Real, Class: human_faces
[7009] test_07009.jpg → Label: Real, Class: human_faces
[7010] test_07010.jpg → Label: Real, Class: human_faces
[7011] test_07011.jpg → Label: Real, Class: human_faces
[7012] test_07012.jpg → Label: Real, Class: human_faces
[7013] test_07013.jpg → Label: Real, Class: human_faces
[7014] test_07014.jpg → Label: Real, Class: human_faces
[7015] test_07015.jpg → Label: Real, Class: human_faces
[7016] test_07016.jpg → Label: Real, Class: human_faces
[7017] test_07017.jpg → Label: Real, Class: human_faces
[7018] test_07018.jpg → Label: Real, Class: human_faces
[7019] test_07019.jpg → Label: Real, Class: human_faces
[7020] test_07020.jpg → Label: Real, Class: human_faces
[7021] test_07021.jpg → Label: Real, Class: human_faces
[7022] test_07022.jpg → Label: Real, Cl

In [13]:
model = load_model("deepfake_model.h5")  # Confirm this is trained from Xception




In [14]:
print(f"[{image_count}] {fname}")
print(f"  → label_pred: {label_pred[0][0]:.4f}")
print(f"  → class_pred: {class_pred[0]}, class = {class_names[np.argmax(class_pred[0])]}")


[12002] test_12002.jpg
  → label_pred: 0.9424
  → class_pred: [0.89327633 0.04826102 0.05846265], class = human_faces


In [15]:
from collections import Counter

train_gen = DeepfakeDataGenerator(train_dir, batch_size=32, img_size=(299, 299))
class_distribution = Counter(train_gen.classes)
label_distribution = Counter(train_gen.labels)

print("Class Distribution:", class_distribution)
print("Label Distribution:", label_distribution)


Class Distribution: Counter({'human_faces': 84000, 'vehicles': 58798, 'animals': 25200})
Label Distribution: Counter({1: 83999, 0: 83999})


In [16]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

classes = ['human_faces', 'animals', 'vehicles']
class_counts = [84000, 25200, 58798]
total = sum(class_counts)
class_weights_array = [total / c for c in class_counts]

# Normalize weights
class_weights_array = class_weights_array / np.sum(class_weights_array)
class_weights_dict = {i: w for i, w in enumerate(class_weights_array)}

print("Class Weights for class_output:", class_weights_dict)


Class Weights for class_output: {0: np.float64(0.1735522553705624), 1: np.float64(0.5785075179018747), 2: np.float64(0.24794022672756288)}


In [17]:
import tensorflow.keras.backend as K
import tensorflow as tf

def weighted_categorical_crossentropy(weights):
    weights = tf.constant(weights, dtype=tf.float32)

    def loss(y_true, y_pred):
        y_pred = K.clip(y_pred, 1e-7, 1 - 1e-7)
        loss = -K.sum(y_true * K.log(y_pred) * weights, axis=-1)
        return loss

    return loss


In [18]:
loss_fn = weighted_categorical_crossentropy(list(class_weights_array))

model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss={
        'label_output': 'binary_crossentropy',
        'class_output': loss_fn
    },
    metrics={
        'label_output': 'accuracy',
        'class_output': 'accuracy'
    }
)


In [20]:
from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint(
    "/content/drive/MyDrive/deepfake_model.h5",  # you can change the name
    monitor='val_loss',
    save_best_only=True,
    verbose=1
)


In [21]:
model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=3,
    steps_per_epoch=200,
    validation_steps=50,
    callbacks=[checkpoint]
)


Epoch 1/3
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 843ms/step - class_output_accuracy: 0.5159 - class_output_loss: 0.4767 - label_output_accuracy: 0.4872 - label_output_loss: 2.7784 - loss: 3.2552
Epoch 1: val_loss improved from inf to 0.32605, saving model to /content/drive/MyDrive/deepfake_model.h5




[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m222s[0m 894ms/step - class_output_accuracy: 0.5161 - class_output_loss: 0.4760 - label_output_accuracy: 0.4873 - label_output_loss: 2.7730 - loss: 3.2490 - val_class_output_accuracy: 1.0000 - val_class_output_loss: 0.0135 - val_label_output_accuracy: 1.0000 - val_label_output_loss: 0.3125 - val_loss: 0.3260
Epoch 2/3
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 892ms/step - class_output_accuracy: 0.7921 - class_output_loss: 0.1985 - label_output_accuracy: 0.5682 - label_output_loss: 0.6738 - loss: 0.8722
Epoch 2: val_loss did not improve from 0.32605
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 922ms/step - class_output_accuracy: 0.7923 - class_output_loss: 0.1984 - label_output_accuracy: 0.5683 - label_outpu

<keras.src.callbacks.history.History at 0x7a3060964490>

In [26]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import random

# Load your latest model
model = load_model("/content/drive/MyDrive/deepfake_model.h5")

# Update this if using Xception: (299, 299)
IMG_SIZE = (299, 299)  # Use (299, 299) if trained with Xception
test_dir = "/content/ArtiFact_240K/ArtiFact_240K/test"
class_names = ['human_faces', 'animals', 'vehicles']
results = []

# Get 5 random test images
test_images = [f for f in os.listdir(test_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
sample_images = random.sample(test_images, 200)  # Change number if needed

print(f"Testing {len(sample_images)} images...\n")

for idx, fname in enumerate(sample_images, 1):
    img_path = os.path.join(test_dir, fname)

    # Load and preprocess image
    img = load_img(img_path, target_size=IMG_SIZE)
    img_arr = img_to_array(img) / 255.0
    img_arr = np.expand_dims(img_arr, axis=0)

    # Predict
    label_pred, class_pred = model.predict(img_arr, verbose=0)
    label = int(label_pred[0][0] > 0.5)
    label_conf = float(label_pred[0][0])
    class_idx = np.argmax(class_pred[0])
    class_conf = float(class_pred[0][class_idx])
    class_name = class_names[class_idx]

    # Print prediction
    print(f"[{idx}] {fname} → Label: {'Real' if label else 'Fake'} ({label_conf:.2f}), "
          f"Class: {class_name} ({class_conf:.2f})")

    # Store result
    results.append([fname, label, class_name])

# Save to CSV
df = pd.DataFrame(results, columns=["image", "label", "class"])
df.to_csv("test_small.csv", index=False)

print("\n✅ test_small.csv saved.")




Testing 200 images...

[1] test_07310.jpg → Label: Real (0.52), Class: human_faces (0.77)
[2] test_05246.jpg → Label: Real (0.57), Class: human_faces (0.40)
[3] test_03605.jpg → Label: Real (0.66), Class: human_faces (0.88)
[4] test_05512.jpg → Label: Real (0.51), Class: vehicles (0.35)
[5] test_08454.jpg → Label: Real (0.88), Class: human_faces (0.98)
[6] test_08881.jpg → Label: Real (0.76), Class: human_faces (0.95)
[7] test_10653.jpg → Label: Real (0.51), Class: vehicles (0.36)
[8] test_00933.jpg → Label: Real (0.90), Class: human_faces (0.98)
[9] test_10333.jpg → Label: Real (0.54), Class: human_faces (0.37)
[10] test_06487.jpg → Label: Real (0.60), Class: human_faces (0.44)
[11] test_01330.jpg → Label: Real (0.66), Class: human_faces (0.89)
[12] test_01681.jpg → Label: Real (0.71), Class: human_faces (0.89)
[13] test_07182.jpg → Label: Real (0.58), Class: human_faces (0.80)
[14] test_08363.jpg → Label: Real (0.76), Class: human_faces (0.95)
[15] test_03416.jpg → Label: Real (0.66)