In [3]:
# Data Processing
# 1. 10% of total data is separated to use as test-set

# import pandas as pd
# import os


# df = pd.read_csv('Dataset')


#Model and dataset by @Saksham Negi

import os
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the paths to the dataset directories
dataset_dir = r'C:\Users\HP\OneDrive\Desktop\MachineLearningProjects\deepfake-detection\Dataset'
train_dir = os.path.join(dataset_dir, 'train')
val_dir = os.path.join(dataset_dir, 'validation')
test_dir = os.path.join(dataset_dir, 'test')

# Initialize an empty list to store data
data = []

# Define the subdirectories for train, validation, and test images
subdirs = {
    'train': ['fake', 'real'],
    'validation': ['fake', 'real'],
    'test': ['fake', 'real']
}

# Iterate through the subdirectories and collect image paths and labels
for set_type, classes in subdirs.items():
    for cls in classes:
        class_path = os.path.join(dataset_dir, set_type, cls)
        if os.path.isdir(class_path):
            for img_name in os.listdir(class_path):
                img_path = os.path.join(class_path, img_name)
                if os.path.isfile(img_path):
                    data.append({'Data': img_path, 'Label': cls, 'set_type': set_type})

# Create a DataFrame from the collected data
df = pd.DataFrame(data)

# Split the data into training, validation, and test sets
train_df = df[df['set_type'] == 'train']
val_df = df[df['set_type'] == 'validation']
test_df = df[df['set_type'] == 'test']

# Display the shapes of the datasets
print(f"Training set shape: {train_df.shape}")
print(f"Validation set shape: {val_df.shape}")
print(f"Test set shape: {test_df.shape}")

Training set shape: (140002, 3)
Validation set shape: (39428, 3)
Test set shape: (10905, 3)


In [6]:
# 2. validation set split
# 3. rescale to normalize 
# 4. set batch size
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
test_datagen = ImageDataGenerator(rescale=1./255)

train_gen = datagen.flow_from_dataframe(train_df, x_col = 'Data', y_col = 'Label', 
                                        target_size = (256, 256), class_mode = 'binary', batch_size = 64, shuffle = True, subset = 'training')
val_gen = datagen.flow_from_dataframe(train_df, x_col = 'Data', y_col = 'Label', 
                                        target_size = (256, 256), class_mode = 'binary', batch_size = 64, shuffle = False, subset = 'validation')
test_gen = datagen.flow_from_dataframe(test_df, x_col = 'Data', y_col = 'Label', 
                                        target_size = (256, 256), class_mode = 'binary', batch_size = 64, shuffle = False)

Found 112002 validated image filenames belonging to 2 classes.
Found 28000 validated image filenames belonging to 2 classes.
Found 10905 validated image filenames belonging to 2 classes.


In [10]:
#convolutional layers
''' 
a) conv2D
b) maxPool2D
c) Dropout
d) Flatten
e) Dense
'''

#model compilation
'''
Optimizer = Adam optimizer
Loss = Binary Crossentropy
'''

import tensorflow as tf
cnn = tf.keras.models.Sequential()
cnn.add(tf.keras.layers.Conv2D (filters=32, kernel_size=3, activation='relu', input_shape=[256, 256, 3]))
cnn.add(tf.keras.layers.MaxPool2D (pool_size=2, strides=2))
cnn.add(tf.keras.layers. Dropout(0.2))
cnn.add(tf.keras.layers.Flatten())
cnn.add(tf.keras.layers. Dense (units=64, activation='relu'))
cnn.add(tf.keras.layers. Dense(units=1, activation='sigmoid'))
cnn.compile(optimizer = 'adam', loss ='binary_crossentropy', metrics = ['accuracy'])
cnn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [17]:
#now I'm using transfer learning to improve the performance of my model

#some pre trained models I used
'''
a) DenseNet121
b) MobileNetV2
c) InceptionV3
d) ResNet50
e) VGG16
f) Xception
'''

'''
Model Compilation 
Optimizer: Adam
Loss: Binary CrossEntropy
'''

models = {
    "DenseNet121": {"model": tf.keras.applications.DenseNet121, "perf":0}, 
    "MobileNetV2":{"model": tf.keras.applications.MobileNetV2, "perf":0}, 
    "Inception V3": {"model":tf.keras.applications.InceptionV3, "perf":0}, 
    "ResNet50": {"model": tf.keras.applications.ResNet50, "perf":0},
    "VGG16":{"model":tf.keras.applications.VGG16, "perf":0},
    "Xception": {"model":tf.keras.applications. Xception, "perf":0}
}

def get_model(model):
    
    kwargs={'input_shape': (224, 224, 3),
            'include_top': False,
            'pooling': 'avg'} 
    pretrained_model =model(**kwargs)
    pretrained_model.trainable = False
    inputs= pretrained_model.input
    x= tf.keras. layers.Dense(units=64, activation ='relu')(pretrained_model.output)
    x= tf.keras.layers.Dense(units=64, activation='relu')(x)
    outputs= tf.keras.layers.Dense(1, activation='sigmoid')(x)
    model= tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='binary_crossentropy',
        metrics=['accuracy'])
    return model

In [21]:
#now I'm performing data augmentation
#the main objectives that I cover are as follows
'''

a) Resize data to utilize  'mobile-net'
b) Verticle flipping
c) Image Rotation
d) Horizontal and Vertical movement
e) fill_mode = 'nearest'
'''

def create_gen():
    train_generator= tf.keras.preprocessing.image.ImageDataGenerator(
        preprocessing_function=tf.keras.applications.inception_v3.preprocess_input, 
        validation_split=0.2
    )
    test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
        preprocessing_function=tf.keras.applications.inception_v3.preprocess_input)
    
    train_image=train_generator.flow_from_dataframe(
        dataframe=train_df, x_col="Data", y_col="Label", target_size=(224,224), class_mode='binary', 
        rescale=1./255, vertical_flip=True, rotation_range=45, width_shift_range=0.2, height_shift_range=0.2, 
        fill_mode="nearest", subset='training', batch_size=32, shuffle=True)
    
    val_image=train_generator.flow_from_dataframe(
        dataframe=train_df, x_col="Data", y_col="Label", target_size=(224,224), class_mode='binary', 
        rescale=1./255, vertical_flip=True, rotation_range=45, width_shift_range=0.2, height_shift_range=0.2, 
        fill_mode="nearest", subset='validation', batch_size=32, shuffle=False )
    
    test_image =test_generator.flow_from_dataframe(
        dataframe=test_df, x_col='Data', y_col='Label', target_size=(224,224), class_mode='binary', 
        shuffle=False, batch_size=32
    )
    return train_generator, test_generator, train_image, val_image, test_image
train_generator, test_generator, train_images, val_images, test_images = create_gen()


Found 112002 validated image filenames belonging to 2 classes.
Found 28000 validated image filenames belonging to 2 classes.
Found 10905 validated image filenames belonging to 2 classes.


In [29]:
# class Generator(nn.Module):
#     def __init__(self, ngpu):
#         super(Generator, self).__init__()
#         self.ngpu=ngpu
#         self.main=nn.Sequential(
        
#             nn.ConvTranspose2d(nz, ngf*8, 4, 1, 8, bias=False),  
#             nn.BatchNorm2d(ngf*8), 
#             nn.ReLU(True),
            
#             nn.ConvTranspose2d(ngf*8, ngf *4, 4, 2, 1, bias =False), 
#             nn.BatchNorm2d(ngf*4),
#             nn.ReLU(True), 
            
#             nn.ConvTranspose2d(ngf* 4, ngf* 2, 4, 2, 1, bias= False), #Transposed Conv
#             nn.BatchNorm2d(ngf*2), 
#             nn.ReLU(True), 
        
#             nn.ConvTranspose2d(ngf *2, ngf, 4, 2, 1, bias= False), #Transposed Conv
#             nn.BatchNorm2d(ngf),
#             nn.ReLU(True),
            
#             nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias =False), 
#             nn.Tanh() 
        
#     )
# class Discriminator (nn.Module):
#     def __init__(self, ngpu):
#         super(Discriminator, self).__init__()
#         self.ngpu=ngpu
#         self.main= nn.Sequential(
        
#             nn.Conv2d(nc, ndf, 4, 2, 1, bias =False),
#             nn.LeakyReLU(0.2, inplace=True),
            
#             nn.Conv2d(ndf, ndf*2, 4, 2, 1, bias =False), 
#             nn.BatchNorm2d(ndf*2),
#             nn.LeakyReLU(0.2, inplace=True), 
            
#             nn.Conv2d(ndf*2, ndf* 4, 4, 2, 1, bias=False),
#             nn.BatchNorm2d(ndf*4), 
#             nn.LeakyReLU(0.2, inplace=True),
            
#             nn.Conv2d(ndf*4, ndf*8, 4, 2, 1, bias=False), 
#             nn.BatchNorm2d(ndf* 8), 
#             nn.LeakyReLU(0.2, inplace=True),
            
#             nn.Conv2d(ndf*8, 1, 4, 1, 0, bias= False), 
#             nn.Sigmoid() 
#     )

NameError: name 'nn' is not defined