In [1]:
import os
import cv2
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, Flatten, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.utils import resample
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications import MobileNetV2 
from tensorflow.keras.applications import EfficientNetB0


In [2]:


# Define the folder containing the fake videos
fake_folder = r'C:\Users\Suyash Tambe\Desktop\Deepfakevideo\train_samples\FAKE'


# Get a list of all fake video files
fake_videos = os.listdir(fake_folder)

# Number of real videos you want to match
num_real_videos = 77

# If fake videos are more than the real videos, delete excess
if len(fake_videos) > num_real_videos:
    # Randomly select videos to keep
    videos_to_keep = random.sample(fake_videos, num_real_videos)
    
    # Delete excess videos
    for video in fake_videos:
        if video not in videos_to_keep:
            video_path = os.path.join(fake_folder, video)
            os.remove(video_path)
            print(f"Deleted: {video_path}")

print("Fake videos balanced to match the number of real videos.")


Fake videos balanced to match the number of real videos.


In [3]:
# Function to load videos from a folder, resize frames, and limit frames
def load_videos_from_folder(folder, frame_limit=100, resize=(199, 199)):
    videos = []
    for filename in os.listdir(folder):
        video_path = os.path.join(folder, filename)
        video = cv2.VideoCapture(video_path)
        frames = []
        count = 0
        while video.isOpened() and count < frame_limit:
            ret, frame = video.read()
            if not ret:
                break
            frame = cv2.resize(frame, resize)
            frame = frame / 255.0  
            frames.append(frame)
            count += 1
        video.release()
        videos.append(np.array(frames, dtype=np.float32)) 
    return videos

In [None]:
# Define deeper CNN model with adjusted pooling layers
def build_deep_cnn(input_shape):
    base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False  # Freeze base model layers

    cnn_model = Sequential()
    
    
    cnn_model.add(base_model)

    # Add more Conv2D layers for depth
    cnn_model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
    cnn_model.add(BatchNormalization())
    cnn_model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))  # Use 'same' padding to preserve dimensions
    cnn_model.add(Dropout(0.3))
    
    cnn_model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
    cnn_model.add(BatchNormalization())
    cnn_model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
    cnn_model.add(Dropout(0.3))

    # Adjust pooling size to avoid reducing the dimensions too much
    cnn_model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
    cnn_model.add(BatchNormalization())
    cnn_model.add(MaxPooling2D(pool_size=(1, 1), padding='same'))  # Smaller pool size
    cnn_model.add(Dropout(0.4))

    # Add a Flatten layer to prepare for Dense layers
    cnn_model.add(Flatten())
    
    return cnn_model

# Build the CNN model for frame-level classification
def build_cnn_model(input_shape, sequence_length):
    cnn = build_deep_cnn(input_shape)
    
    model = Sequential()
    
    # Apply CNN to each frame using TimeDistributed
    model.add(TimeDistributed(cnn, input_shape=(sequence_length, *input_shape)))
    
    # Flatten all frames for final classification
    model.add(Flatten())
    
    # Fully connected layers with increased size and BatchNormalization
    model.add(Dense(512, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    model.add(Dense(256, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    
    # Output layer
    model.add(Dense(1, activation='softmax'))
    
    # Compile the model with sgd optimizer and lower learning rate
    model.compile(optimizer=Adam(learning_rate=0.00001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Load real and fake videos
real_videos = load_videos_from_folder(r'C:\Users\Suyash Tambe\Desktop\Deepfakevideo\train_samples\REAL', frame_limit=100, resize=(199, 199))
fake_videos = load_videos_from_folder(r'C:\Users\Suyash Tambe\Desktop\Deepfakevideo\train_samples\FAKE', frame_limit=100, resize=(199, 199))

# Define parameters
sequence_length = 100  # Number of frames in each video
frame_shape = (199, 199, 3)  # Frame size
batch_size = 32

# Build the model (CNN only)
cnn_model = build_cnn_model(frame_shape, sequence_length)

# Print model summary
cnn_model.summary()


  super().__init__(**kwargs)


In [None]:
def video_generator(real_videos, fake_videos, batch_size):
    while True:
        X_batch = []
        y_batch = []
        for _ in range(batch_size):
            # Randomly pick a video from real or fake
            if np.random.rand() < 0.5:
                idx = np.random.randint(len(real_videos))
                X_batch.append(real_videos[idx])
                y_batch.append(1)  # Real label
            else:
                idx = np.random.randint(len(fake_videos))
                X_batch.append(fake_videos[idx])
                y_batch.append(0)  # Fake label
            
        X_batch = np.array(X_batch, dtype=np.float32)
        y_batch = np.array(y_batch, dtype=np.float32)
        yield X_batch, y_batch




# Create the data generator
train_gen = video_generator(real_videos, fake_videos, batch_size)

# Calculate steps per epoch 
steps_per_epoch = len(real_videos) // batch_size

# Define callback for saving the model
model_checkpoint = ModelCheckpoint('cnn_deepfake_best.keras', save_best_only=True, monitor='accuracy', mode='max', verbose=1)

# Try-except block to save the model if crash occurs
try:
    # Train the model using the generator with 6 epochs
    cnn_model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=6, callbacks=[model_checkpoint])
except Exception as e:
    print(f"Training interrupted due to error: {e}")
    # Save model in case of crash
    cnn_model.save('cnn_deepfake_interrupted.keras')
    print("Model saved after interruption.")
else:
    # Save the final trained model
    cnn_model.save('2_cnn_deepfake_final.keras')
    print("Training completed and model saved.")


Epoch 1/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 442s/step - accuracy: 0.5703 - loss: 0.9376    
Epoch 1: accuracy improved from -inf to 0.54688, saving model to cnn_deepfake_best.keras
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1930s[0m 452s/step - accuracy: 0.5625 - loss: 0.9463
Epoch 2/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 436s/step - accuracy: 0.7188 - loss: 0.6626  
Epoch 2: accuracy improved from 0.54688 to 0.71875, saving model to cnn_deepfake_best.keras
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m875s[0m 442s/step - accuracy: 0.7188 - loss: 0.6711
Epoch 3/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 441s/step - accuracy: 0.5625 - loss: 0.9907  
Epoch 3: accuracy did not improve from 0.71875
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m864s[0m 442s/step - accuracy: 0.5625 - loss: 0.9818
Epoch 4/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 441s/step - accurac