Importing Libraries

In [None]:
import cv2
import sys
import random
import os
import glob
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
import mediapipe as mp
import torch
from torch import nn
from torchvision import models
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from tqdm.autonotebook import tqdm
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

Functions to Load Video Names and Labels from CSV

In [None]:
# Define a custom dataset class for loading video frames and labels
class video_dataset(Dataset):
    def __init__(self, video_names, labels, sequence_length=60, transform=None):
        self.video_names = video_names  # List of video file names
        self.labels = labels  # Corresponding labels 
        self.transform = transform  # Transformations to apply to the frames
        self.count = sequence_length  # Number of frames to extract from each video

    def __len__(self):
        return len(self.video_names)  # Return the number of videos in the dataset

    def __getitem__(self, idx):
        video_path = self.video_names[idx]  # Get the path of the video file
        frames = []
        a = int(100 / self.count)  # Calculate the range for random starting frame
        first_frame = np.random.randint(0, a)  # Randomly select the starting frame
        temp_video = video_path.split('/')[-1]  # Extract the video file name
        label = lab.loc[lab["file"] == temp_video, "label"].values  # Get the label for the video
        
        # Convert label to numeric value
        if label == 'FAKE':
            label = 0
        if label == 'REAL':
            label = 1
        
        # Extract frames from the video
        for i, frame in enumerate(self.frame_extract(video_path)):
            frames.append(self.transform(frame))  # Apply transformation to each frame
            if len(frames) == self.count:  # Stop when the required number of frames is reached
                break
        
        frames = torch.stack(frames)  # Convert list of frames to a tensor
        frames = frames[:self.count]  # Ensure the tensor has the correct number of frames
        return frames, label  # Return the frames and label

    def frame_extract(self, path):
        vidObj = cv2.VideoCapture(path)
        success = 1
        while success:
            success, image = vidObj.read()  # Read a frame from the video
            if success:
                yield image  # Return the frame to the caller

# Function to plot an image tensor
def im_plot(tensor):
    image = tensor.cpu().numpy().transpose(1, 2, 0)  # Convert tensor to numpy array and reorder dimensions
    b, g, r = cv2.split(image) 
    image = cv2.merge((r, g, b))
    image = image * [0.22803, 0.22145, 0.216989] + [0.43216, 0.394666, 0.37645]  # Normalize the image
    image = image * 255.0  # Rescale pixel values to 0-255
    plt.imshow(image.astype(int)) 
    plt.show()  


Function to Count Real and Fake Videos

In [None]:
def number_of_real_and_fake_videos(data_list):
    header_list = ["file", "label"]
    # Load the CSV file containing video file names and labels
    lab = pd.read_csv('/content/drive/MyDrive/file_names.csv', names=header_list)

    # Counter 
    fake = 0  
    real = 0 

    for files_pattern in data_list:
        # Get all file paths that match the pattern
        file_paths = glob.glob(files_pattern)
        for file_path in file_paths:
            temp_video = os.path.basename(file_path)  # Extract the video file name

            # Get the label for the video
            label = lab.loc[lab["file"] == temp_video, "label"].values

            # Check if the label exists
            if len(label) > 0:
                label = label[0]
                # Increment the appropriate counter based on the label
                if label == 'FAKE':
                    fake += 1
                elif label == 'REAL':
                    real += 1
            else:
                print(f"No label found for {temp_video}")

    return real, fake


Function to Define and Load Data Transforms

In [None]:

def get_data_transforms():
    im_size = 112  # Desired image size (width and height)
    # Values for normalization
    mean = [0.485, 0.456, 0.406]  
    std = [0.229, 0.224, 0.225] 

    # Define transformations for training data
    train_transforms = transforms.Compose([
        transforms.ToPILImage(),  # Convert the image to a PIL Image
        transforms.Resize((im_size, im_size)),  # Resize the image to the desired size
        transforms.ToTensor(),  # Convert the image to a tensor
        transforms.Normalize(mean, std)  # Normalize the image with the specified mean and std
    ])

    # Define transformations for testing data (same as for training data)
    test_transforms = transforms.Compose([
        transforms.ToPILImage(), 
        transforms.Resize((im_size, im_size)), 
        transforms.ToTensor(), 
        transforms.Normalize(mean, std)  
    ])

    return train_transforms, test_transforms


Model Definition with Feature Visualization

In [None]:
# Define a custom neural network model with feature visualization
class Model(nn.Module):
    def __init__(self, num_classes, latent_dim=2048, lstm_layers=1, hidden_dim=1024, bidirectional=False):
        super(Model, self).__init__()
        # Load a pre-trained ResNeXt-50 model and remove the last two layers (classification head and avg pooling)
        model = models.resnext50_32x4d(pretrained=True)
        self.model = nn.Sequential(*list(model.children())[:-2])
        # Define an LSTM layer to process the feature maps
        self.lstm = nn.LSTM(latent_dim, hidden_dim, lstm_layers, bidirectional)
        # Define activation function and dropout layer
        self.relu = nn.LeakyReLU()
        self.dp = nn.Dropout(0.4)
        # Define a fully connected layer to output class scores
        self.linear1 = nn.Linear(hidden_dim, num_classes)
        # Define an adaptive average pooling layer
        self.avgpool = nn.AdaptiveAvgPool2d(1)

    def forward(self, x):
        # x: Input tensor of shape (batch_size, seq_length, channels, height, width)
        batch_size, seq_length, c, h, w = x.shape
        # Reshape the input tensor to process each frame individually
        x = x.view(batch_size * seq_length, c, h, w)
        # Extract feature maps using the CNN model
        fmap = self.model(x)
        # Apply adaptive average pooling
        x = self.avgpool(fmap)
        # Reshape the tensor to (batch_size, seq_length, latent_dim)
        x = x.view(batch_size, seq_length, 2048)
        # Pass the reshaped tensor through the LSTM
        x_lstm, _ = self.lstm(x, None)
        # Return the feature maps and the class scores after applying dropout and the fully connected layer
        return fmap, self.dp(self.linear1(torch.mean(x_lstm, dim=1)))


In [None]:
# Create an instance of the Model class with 2 output classes and move it to the GPU
model = Model(2).cuda()

# Create a dummy input tensor with shape (batch_size, seq_length, channels, height, width) and move it to the GPU
a, b = model(torch.from_numpy(np.empty((1, 20, 3, 112, 112))).type(torch.cuda.FloatTensor))


Functions For Training, Testing and Metrics

In [None]:
def train_epoch(epoch, num_epochs, data_loader, model, criterion, optimizer):
    model.train()  # Set the model to training mode
    losses = AverageMeter() 
    accuracies = AverageMeter()  
    t = []  # Initialize a list to keep track of timestamps 

    # Loop over batches in the data loader
    for i, (inputs, targets) in enumerate(data_loader):
        if torch.cuda.is_available():
            targets = targets.type(torch.cuda.LongTensor)  # Move targets to GPU and convert to LongTensor
            inputs = inputs.cuda()  # Move inputs to GPU
        
        _, outputs = model(inputs)  # Forward pass through the model
        loss = criterion(outputs, targets.type(torch.cuda.LongTensor))  
        acc = calculate_accuracy(outputs, targets.type(torch.cuda.LongTensor))  
        
        # Update loss and accuracy meters
        losses.update(loss.item(), inputs.size(0))
        accuracies.update(acc, inputs.size(0))
        
        optimizer.zero_grad()  # Clear the gradients of all optimized tensors
        loss.backward()  # Backward pass to compute gradients
        optimizer.step()  # Update model parameters
        
        # Print progress information
        sys.stdout.write(
            "\r[Epoch %d/%d] [Batch %d / %d] [Loss: %f, Acc: %.2f%%]"
            % (
                epoch,
                num_epochs,
                i,
                len(data_loader),
                losses.avg,
                accuracies.avg))
    
    return losses.avg, accuracies.avg  

def test(epoch, model, data_loader, criterion):
    print('Testing')
    model.eval()  # Set the model to evaluation mode
    losses = AverageMeter()  
    accuracies = AverageMeter()  
    pred = []  
    true = []  
    count = 0  

    with torch.no_grad():  # Disable gradient calculation
        # Loop over batches in the data loader
        for i, (inputs, targets) in enumerate(data_loader):
            if torch.cuda.is_available():
                targets = targets.cuda().type(torch.cuda.FloatTensor)  # Move targets to GPU and convert to FloatTensor
                inputs = inputs.cuda()  
            
            _, outputs = model(inputs)  
            loss = torch.mean(criterion(outputs, targets.type(torch.cuda.LongTensor)))  
            acc = calculate_accuracy(outputs, targets.type(torch.cuda.LongTensor))  
            
            # Get predictions and update lists
            _, p = torch.max(outputs, 1)
            true += (targets.type(torch.cuda.LongTensor)).detach().cpu().numpy().reshape(len(targets)).tolist()
            pred += p.detach().cpu().numpy().reshape(len(p)).tolist()
            
            # Update loss and accuracy meters
            losses.update(loss.item(), inputs.size(0))
            accuracies.update(acc, inputs.size(0))
            
            # Print progress information
            sys.stdout.write(
                "\r[Batch %d / %d] [Loss: %f, Acc: %.2f%%]"
                % (
                    i,
                    len(data_loader),
                    losses.avg,
                    accuracies.avg
                )
            )
        print('\nAccuracy {}'.format(accuracies.avg))  
    
    return true, pred, losses.avg, accuracies.avg  

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()  # Initialize the meter

    def reset(self):
        self.val = 0  # Current value
        self.avg = 0  # Average value
        self.sum = 0  # Sum of values
        self.count = 0  # Count of values

    def update(self, val, n=1):
        self.val = val  # Update the current value
        self.sum += val * n  # Update the sum
        self.count += n  # Update the count
        self.avg = self.sum / self.count  # Update the average value

def calculate_accuracy(outputs, targets):
    batch_size = targets.size(0)  # Get the batch size

    _, pred = outputs.topk(1, 1, True)  # Get the top-1 predictions
    pred = pred.t()  # Transpose predictions
    correct = pred.eq(targets.view(1, -1))  # Compare predictions to targets
    n_correct_elems = correct.float().sum().item()  # Count correct predictions
    return 100 * n_correct_elems / batch_size  # Return accuracy percentage


Functions to Plot Confusion Matrix, Loss and Accuracy

In [None]:
# Function to output and plot the confusion matrix
def print_confusion_matrix(y_true, y_pred):
    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    
    # Print confusion matrix values
    print('True positive = ', cm[0][0])
    print('False positive = ', cm[0][1])
    print('False negative = ', cm[1][0])
    print('True negative = ', cm[1][1])
    print('\n')
    
    # Create a DataFrame for the confusion matrix
    df_cm = pd.DataFrame(cm, range(2), range(2))
    
    # Set font scale for heatmap labels
    sn.set(font_scale=1.4)  # for label size
    
    # Plot heatmap of confusion matrix
    sn.heatmap(df_cm, annot=True, annot_kws={"size": 16})  # font size
    plt.ylabel('Actual label', size=20)
    plt.xlabel('Predicted label', size=20)
    plt.xticks(np.arange(2), ['Fake', 'Real'], size=16)
    plt.yticks(np.arange(2), ['Fake', 'Real'], size=16)
    plt.ylim([2, 0])  # Invert y-axis for better visualization
    plt.show()
    
    # Calculate and print accuracy
    calculated_acc = (cm[0][0] + cm[1][1]) / (cm[0][0] + cm[0][1] + cm[1][0] + cm[1][1])
    print("Calculated Accuracy", calculated_acc * 100)

# Function to plot training and validation loss
def plot_loss(train_loss_avg, test_loss_avg, num_epochs):
    loss_train = train_loss_avg  # Average training loss
    loss_val = test_loss_avg  # Average validation loss
    epochs = range(1, num_epochs + 1)  # Epochs range
    
    # Plot training and validation loss
    plt.plot(epochs, loss_train, 'g', label='Training loss')
    plt.plot(epochs, loss_val, 'b', label='Validation loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

# Function to plot training and validation accuracy
def plot_accuracy(train_accuracy, test_accuracy, num_epochs):
    accuracy_train = train_accuracy  # Average training accuracy
    accuracy_val = test_accuracy  # Average validation accuracy
    epochs = range(1, num_epochs + 1)  # Epochs range
    
    # Plot training and validation accuracy
    plt.plot(epochs, accuracy_train, 'g', label='Training accuracy')
    plt.plot(epochs, accuracy_val, 'b', label='Validation accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()


Functions for Image Conversion and Prediction

In [None]:
im_size = 112
mean = [0.485, 0.456, 0.406]  
std = [0.229, 0.224, 0.225]   

# Initialize Softmax function and inverse normalization transform
sm = nn.Softmax()
inv_normalize = transforms.Normalize(mean=-1*np.divide(mean, std), std=np.divide([1, 1, 1], std))

def im_convert(tensor):
    """ Convert a tensor to an image and save it. """
    image = tensor.to("cpu").clone().detach()  # Move tensor to CPU and detach
    image = image.squeeze()  # Remove single-dimensional entries
    image = inv_normalize(image)  # Apply inverse normalization
    image = image.numpy()  # Convert tensor to numpy array
    image = image.transpose(1, 2, 0)  # Change dimensions from (C, H, W) to (H, W, C)
    image = image.clip(0, 1)  # Clip values to [0, 1]
    cv2.imwrite('./2.png', image*255)  # Save the image
    return image

#  Predict the class of an image and visualize the result. 
def predict(model, img, path='./', confidence_threshold=95.9):
    fmap, logits = model(img.to('cuda'))  # Forward pass through the model
    params = list(model.parameters())  # Get model parameters
    weight_softmax = model.linear1.weight.detach().cpu().numpy()  # Get weights of the final linear layer

    logits = sm(logits)  # Apply Softmax to logits
    _, prediction = torch.max(logits, 1)  # Get the predicted class
    confidence = logits[:, int(prediction.item())].item() * 100  # Calculate confidence score

    print('Confidence of prediction:', confidence)

    # Generate heatmap from feature maps
    idx = np.argmax(logits.detach().cpu().numpy())
    bz, nc, h, w = fmap.shape
    out = np.dot(fmap[-1].detach().cpu().numpy().reshape((nc, h * w)).T,
                     weight_softmax[:, :].reshape((nc, -1)))
    predict = out.reshape(h, w)
    predict = predict - np.min(predict)  # Normalize prediction
    predict_img = predict / np.max(predict)
    predict_img = np.uint8(255 * predict_img)
    out = cv2.resize(predict_img, (im_size, im_size))  # Resize heatmap
    heatmap = cv2.applyColorMap(out, cv2.COLORMAP_JET)  # Apply color map

    # Convert image and save result
    img = im_convert(img[:, -1, :, :, :])
    result = heatmap * 0.5 + img * 0.8 * 255
    cv2.imwrite('/content/1.png', result)
    result1 = heatmap * 0.5 / 255 + img * 0.8
    r, g, b = cv2.split(result1)  # Split channels
    result1 = cv2.merge((r, g, b))  # Merge channels
    plt.imshow(result1)
    plt.show()

    # Check if confidence is above the threshold
    if confidence >= confidence_threshold:
        return [int(prediction.item()), confidence] 
    else:
        return [1, confidence] 


Class Defination For Validation Dataset Creation 

In [None]:
class ValidationDataset(Dataset):
    
    #Initialize the dataset with video names, sequence length, and optional transformations.
    def __init__(self, video_names, sequence_length=60, transform=None):
    
        self.video_names = video_names  # List of paths to video files.
        self.transform = transform  # Number of frames to sample from each video.
        self.count = sequence_length

    # Return the number of videos in the dataset.
    def __len__(self):
     
        return len(self.video_names)
    
    # Retrieve a sample from the dataset at the specified index.
def __getitem__(self, idx):
    video_path = self.video_names[idx]  # Get the video path at index `idx`
    frames = []
    
    a = int(100 / self.count)
    first_frame = np.random.randint(0, a)  # Randomly select a starting frame

    # Initialize the MediaPipe Face Detection model
    mp_face_detection = mp.solutions.face_detection
    detector = mp_face_detection.FaceDetection(min_detection_confidence=0.5)

    for i, frame in enumerate(self.frame_extract(video_path)):
        # Convert the frame to RGB as MediaPipe expects RGB format
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Detect faces using MediaPipe
        results = detector.process(rgb_frame)
        
        if results.detections:
            # Extract the first detected face bounding box
            detection = results.detections[0]
            bboxC = detection.location_data.relative_bounding_box

            ih, iw, _ = frame.shape
            x, y, w, h = int(bboxC.xmin * iw), int(bboxC.ymin * ih), \
                         int(bboxC.width * iw), int(bboxC.height * ih)
            frame = frame[y:y + h, x:x + w, :]
        
        # Apply any transformation, if provided
        if self.transform is not None:
            frame = self.transform(frame)
        
        frames.append(frame)
        
        # Stop after collecting the required number of frames
        if len(frames) == self.count:
            break
    
    # Stack frames into a tensor and add a batch dimension
    frames = torch.stack(frames)
    frames = frames[:self.count]
    
    return frames.unsqueeze(0)  # Add a batch dimension


    # Generator function to yield frames from a video file.
    def frame_extract(self, path):
          
        vidObj = cv2.VideoCapture(path)  # Open video file
        success = True
        while success:
            success, image = vidObj.read()  # Read a frame from the video
            if success:
                yield image  # Yield the frame


Preparing Video Data and Creating Data Loaders

In [None]:


# Retrieve a list of face-only video file paths 
video_files = glob.glob('/content/drive/MyDrive/Face_only_Videos/*.mp4')
random.shuffle(video_files)


# Load labels from CSV file into a DataFrame
header_list = ["file", "label"]
labels = pd.read_csv('/content/drive/MyDrive/file_names.csv', names=header_list)
lab = labels

train_videos, valid_videos = train_test_split(video_files, test_size=0.3)

# Print the number of real and fake videos in both the training and validation sets
print("TRAIN: ", "Real:", number_of_real_and_fake_videos(train_videos)[0], " Fake:", number_of_real_and_fake_videos(train_videos)[1])
print("TEST: ", "Real:", number_of_real_and_fake_videos(valid_videos)[0], " Fake:", number_of_real_and_fake_videos(valid_videos)[1])

# Get data transformations for training and testing
train_transforms, test_transforms = get_data_transforms()

# Create dataset objects for training and validation data
train_data = video_dataset(train_videos, labels, sequence_length=10, transform=train_transforms)
val_data = video_dataset(valid_videos, labels, sequence_length=10, transform=test_transforms)

# Create DataLoader objects for training and validation datasets
train_loader = DataLoader(train_data, batch_size=4, shuffle=True, num_workers=4)
valid_loader = DataLoader(val_data, batch_size=4, shuffle=False, num_workers=4)

# Retrieve an example image and label from the training dataset
image, label = train_data[0]

# Plot the first frame of the retrieved example image
im_plot(image[0, :, :, :])


Training and Evaluation of the Model

In [None]:

lr = 1e-7  # learning rate 
num_epochs = 30
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)

# Define the loss function and move it to GPU if available
criterion = nn.CrossEntropyLoss().cuda()

# Initialize lists to store average training and testing losses and accuracies
train_loss_avg = []
train_accuracy = []
test_loss_avg = []
test_accuracy = []

try:
    # Training loop for the specified number of epochs
    for epoch in range(1, num_epochs + 1):
        l, acc = train_epoch(epoch, num_epochs, train_loader, model, criterion, optimizer)
        train_loss_avg.append(l)
        train_accuracy.append(acc)
        
        # Evaluate model performance on validation set
        true, pred, tl, t_acc = test(epoch, model, valid_loader, criterion)
        test_loss_avg.append(tl)
        test_accuracy.append(t_acc)

except RuntimeError as e:
    if "NaN" in str(e):
        print("NaN detected during forward pass. Investigate further.")
    else:
        raise e

# Save the trained model's weights
torch.save(model.state_dict(), '/content/checkpoint.pt')

plot_loss(train_loss_avg, test_loss_avg, len(train_loss_avg))
plot_accuracy(train_accuracy, test_accuracy, len(train_accuracy))
print(confusion_matrix(true, pred))

classify the videos

In [None]:

train_transforms, test_transforms = get_data_transforms()

# Define the path to the video file(s) to be processed
path_to_videos = ["/content/video.mp4"]

# Create a ValidationDataset instance
video_dataset = ValidationDataset(path_to_videos, sequence_length=20, transform=train_transforms)
model = Model(2).cuda()

# Load the pre-trained model's weights
path_to_model = '/content/drive/MyDrive/checkpoint.pt'
model.load_state_dict(torch.load(path_to_model))
model.eval()


for i in range(len(path_to_videos)):
    print(path_to_videos[i])  
    # Make a prediction using the model and the video data
    prediction = predict(model, video_dataset[i], './')
    
    # Print the result based on the prediction
    if prediction[0] == 1:
        print("REAL")
    else:
        print("FAKE")
