In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

'''import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))'''

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import numpy as np
from torch.optim.lr_scheduler import ReduceLROnPlateau
import sklearn.metrics
from tqdm import tqdm
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import os
from sklearn.decomposition import PCA

# Dataset class for loading video deepfake detection data
class NumpyVideoDeepfakeDataset(Dataset):
    def __init__(self, features_file, labels_file=None):
        self.features = np.load(features_file)  # Shape: (num_samples, num_features)
        self.labels = np.load(labels_file) if labels_file else None

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        feature = torch.tensor(self.features[idx], dtype=torch.float32)
        
        if self.labels is not None:
            label = torch.tensor(self.labels[idx], dtype=torch.float32)
            return feature, label
        return feature

# Simple fully connected model for deepfake detection
class VideoDeepfakeModel(nn.Module):
    def __init__(self, input_size=1000, hidden_size=512, dropout_prob=0.2):
        super(VideoDeepfakeModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.LeakyReLU(negative_slope=0.01),
            nn.Dropout(dropout_prob),
            nn.Linear(hidden_size, 256),
            nn.LeakyReLU(negative_slope=0.01),
            nn.Dropout(dropout_prob),
            nn.Linear(256, 1)  # Final layer for binary classification
        )

    def forward(self, x):
        return self.fc(x)
    
def calculate_accuracy(outputs, labels, threshold=0.5):
    # Apply sigmoid to the outputs to convert logits to probabilities
    probabilities = torch.sigmoid(outputs)
    
    # Convert probabilities to binary predictions using the threshold
    predictions = (probabilities >= threshold).float()
    
    # Compare the predictions with the actual labels
    correct_predictions = (predictions == labels).float()
    
    # Calculate accuracy as the average of correct predictions
    accuracy = correct_predictions.sum() / len(labels)
    
    return accuracy.item()

# Compute Equal Error Rate (EER) for the given labels and predictions
def compute_eer(label, pred, positive_label=1):
    fpr, tpr, threshold = sklearn.metrics.roc_curve(label, pred, pos_label=positive_label)
    fnr = 1 - tpr
    eer_threshold = threshold[np.nanargmin(np.absolute((fnr - fpr)))]
    eer_1 = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    eer_2 = fnr[np.nanargmin(np.absolute((fnr - fpr)))]
    eer = (eer_1 + eer_2) / 2
    return eer

# Preprocessing for ResNet input
def preprocess_frame(frame):
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    return preprocess(frame)

# Use a pre-trained ResNet18 model for feature extraction
def extract_features_from_video_frames(frames_dir, device):
    # Load pre-trained ResNet model
    resnet = models.resnet18(pretrained=True)
    resnet = resnet.to(device)
    resnet.eval()  # Set the model to evaluation mode
    
    features_list = []
    for frame_name in sorted(os.listdir(frames_dir)):
        frame_path = os.path.join(frames_dir, frame_name)
        
        # Load and preprocess the image frame
        frame = Image.open(frame_path).convert('RGB')
        processed_frame = preprocess_frame(frame).unsqueeze(0).to(device)  # Add batch dimension and move to device

        with torch.no_grad():
                        features = torch.flatten(resnet(processed_frame)).cpu().numpy()

        features_list.append(features)

    # Convert the list of feature vectors to a numpy array
    video_features = np.array(features_list)

        if video_features.shape[1] != 1000:
        pca = PCA(n_components=1000)
        video_features = pca.fit_transform(video_features)

    return video_features

# Function to evaluate video for real/fake detection
def evaluate_video(frames_dir, model, device):
    # Step 1: Extract features from the video frames
    print("Extracting features from video frames...")
    video_features = extract_features_from_video_frames(frames_dir, device)
    
    if len(video_features) == 0:
        print("No features extracted from the video. Cannot make a prediction.")
        return

    # Step 2: Convert features to torch tensor and move to device
    video_features_tensor = torch.tensor(video_features, dtype=torch.float32).to(device)

    # Step 3: Pass the features through the model and get predictions
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():
        outputs = model(video_features_tensor).squeeze()

    # Apply sigmoid to get probabilities
    probabilities = torch.sigmoid(outputs).mean().item()

        prediction = "fake" if probabilities >= 0.8 else "real"
    
    print(f"Predicted: {prediction}, Probability: {probabilities:.4f}")

# Main training loop for model training and evaluation
def main(args):
    # Load training dataset
    train_dataset = NumpyVideoDeepfakeDataset(args['train_features_file'], args['train_labels_file'])
    train_loader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True, num_workers=args['num_workers'])

    # Initialize model, optimizer, and learning rate scheduler
    model = VideoDeepfakeModel(input_size=args['input_size'], hidden_size=args['hidden_size'], dropout_prob=args['dropout_prob']).to(args['device'])
    optimizer = torch.optim.AdamW(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
    criterion = nn.BCEWithLogitsLoss()
    
    lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

    best_eer = float('inf')

    for epoch in range(args['num_epochs']):
        model.train()
        total_loss = 0.0
        total_accuracy = 0.0  # To track accuracy
        all_labels = []
        all_outputs = []

        # Training loop
        for features, labels in tqdm(train_loader, desc=f"Epoch [{epoch + 1}/{args['num_epochs']}]"):
            features, labels = features.to(args['device']), labels.to(args['device'])
            optimizer.zero_grad()

            # Forward pass
            outputs = model(features).squeeze(dim=-1)  # Squeeze only the last dimension

            # Ensure labels are also squeezed correctly for compatibility
            labels = labels.float()  # Ensure labels are of float type for BCEWithLogitsLoss

            # Compute loss
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            # Calculate accuracy
            batch_accuracy = calculate_accuracy(outputs, labels)
            total_accuracy += batch_accuracy  # Accumulate accuracy over batches

            all_labels.append(labels.cpu().numpy())
            all_outputs.append(outputs.detach().cpu().numpy())

        avg_loss = total_loss / len(train_loader)
        avg_accuracy = total_accuracy / len(train_loader)  # Average accuracy over all batches
        print(f"Epoch [{epoch + 1}/{args['num_epochs']}], Loss: {avg_loss:.4f}, Accuracy: {avg_accuracy:.4f}")

        # Compute EER for training set
        all_labels = np.concatenate(all_labels)
        all_outputs = np.concatenate(all_outputs)
        train_eer = compute_eer(all_labels, all_outputs)
        print(f"Epoch [{epoch + 1}/{args['num_epochs']}], Training EER: {train_eer:.4f}")

        # Save the model if EER improves
        if train_eer < best_eer:
            best_eer = train_eer
            torch.save(model.state_dict(), 'best_model.pth')
            print("Model saved as 'best_model.pth'")

        # Adjust learning rate based on average loss
        lr_scheduler.step(avg_loss)

if __name__ == "__main__":
    args = {
        'train_features_file': '/kaggle/input/embeddings-for-sih/small data feature extraction/features.npy',
        'train_labels_file': '/kaggle/input/embeddings-for-sih/small data feature extraction/labels.npy',
        'batch_size': 16,
        'num_epochs': 25,
        'lr': 1e-5,
        'num_workers': 4,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'input_size': 1000,  # Input size for the video model
        'hidden_size': 512,
        'dropout_prob': 0.2,
        'weight_decay': 1e-5
    }

    main(args)

    # Load the best model and evaluate a video
    model = VideoDeepfakeModel(input_size=args['input_size'], hidden_size=args['hidden_size'], dropout_prob=args['dropout_prob']).to(args['device'])
    model.load_state_dict(torch.load('/kaggle/working/best_model.pth', map_location=args['device'], weights_only=True))
    evaluate_video('/kaggle/input/test-sih/test', model, args['device'])


Epoch [1/25]: 100%|██████████| 235/235 [00:01<00:00, 171.77it/s]


Epoch [1/25], Loss: 0.5156, Accuracy: 0.7886
Epoch [1/25], Training EER: 0.4771
Model saved as 'best_model.pth'


Epoch [2/25]: 100%|██████████| 235/235 [00:00<00:00, 246.21it/s]


Epoch [2/25], Loss: 0.4802, Accuracy: 0.7984
Epoch [2/25], Training EER: 0.3816
Model saved as 'best_model.pth'


Epoch [3/25]: 100%|██████████| 235/235 [00:01<00:00, 215.91it/s]


Epoch [3/25], Loss: 0.4679, Accuracy: 0.7939
Epoch [3/25], Training EER: 0.3406
Model saved as 'best_model.pth'


Epoch [4/25]: 100%|██████████| 235/235 [00:00<00:00, 247.80it/s]


Epoch [4/25], Loss: 0.4465, Accuracy: 0.8024
Epoch [4/25], Training EER: 0.3182
Model saved as 'best_model.pth'


Epoch [5/25]: 100%|██████████| 235/235 [00:00<00:00, 240.53it/s]


Epoch [5/25], Loss: 0.4313, Accuracy: 0.8048
Epoch [5/25], Training EER: 0.2996
Model saved as 'best_model.pth'


Epoch [6/25]: 100%|██████████| 235/235 [00:00<00:00, 257.78it/s]


Epoch [6/25], Loss: 0.4198, Accuracy: 0.8112
Epoch [6/25], Training EER: 0.2911
Model saved as 'best_model.pth'


Epoch [7/25]: 100%|██████████| 235/235 [00:00<00:00, 262.65it/s]


Epoch [7/25], Loss: 0.4112, Accuracy: 0.8186
Epoch [7/25], Training EER: 0.2806
Model saved as 'best_model.pth'


Epoch [8/25]: 100%|██████████| 235/235 [00:00<00:00, 248.85it/s]


Epoch [8/25], Loss: 0.3970, Accuracy: 0.8258
Epoch [8/25], Training EER: 0.2581
Model saved as 'best_model.pth'


Epoch [9/25]: 100%|██████████| 235/235 [00:01<00:00, 219.10it/s]


Epoch [9/25], Loss: 0.3906, Accuracy: 0.8269
Epoch [9/25], Training EER: 0.2541
Model saved as 'best_model.pth'


Epoch [10/25]: 100%|██████████| 235/235 [00:00<00:00, 241.94it/s]


Epoch [10/25], Loss: 0.3794, Accuracy: 0.8343
Epoch [10/25], Training EER: 0.2356
Model saved as 'best_model.pth'


Epoch [11/25]: 100%|██████████| 235/235 [00:00<00:00, 256.67it/s]


Epoch [11/25], Loss: 0.3673, Accuracy: 0.8431
Epoch [11/25], Training EER: 0.2318
Model saved as 'best_model.pth'


Epoch [12/25]: 100%|██████████| 235/235 [00:00<00:00, 255.12it/s]


Epoch [12/25], Loss: 0.3554, Accuracy: 0.8487
Epoch [12/25], Training EER: 0.2164
Model saved as 'best_model.pth'


Epoch [13/25]: 100%|██████████| 235/235 [00:00<00:00, 256.13it/s]


Epoch [13/25], Loss: 0.3471, Accuracy: 0.8535
Epoch [13/25], Training EER: 0.2217


Epoch [14/25]: 100%|██████████| 235/235 [00:00<00:00, 242.42it/s]


Epoch [14/25], Loss: 0.3522, Accuracy: 0.8532
Epoch [14/25], Training EER: 0.2067
Model saved as 'best_model.pth'


Epoch [15/25]: 100%|██████████| 235/235 [00:00<00:00, 244.62it/s]


Epoch [15/25], Loss: 0.3320, Accuracy: 0.8598
Epoch [15/25], Training EER: 0.2001
Model saved as 'best_model.pth'


Epoch [16/25]: 100%|██████████| 235/235 [00:00<00:00, 260.15it/s]


Epoch [16/25], Loss: 0.3220, Accuracy: 0.8670
Epoch [16/25], Training EER: 0.1818
Model saved as 'best_model.pth'


Epoch [17/25]: 100%|██████████| 235/235 [00:00<00:00, 266.02it/s]


Epoch [17/25], Loss: 0.3098, Accuracy: 0.8729
Epoch [17/25], Training EER: 0.1724
Model saved as 'best_model.pth'


Epoch [18/25]: 100%|██████████| 235/235 [00:00<00:00, 264.31it/s]


Epoch [18/25], Loss: 0.3084, Accuracy: 0.8769
Epoch [18/25], Training EER: 0.1715
Model saved as 'best_model.pth'


Epoch [19/25]: 100%|██████████| 235/235 [00:00<00:00, 256.60it/s]


Epoch [19/25], Loss: 0.2995, Accuracy: 0.8790
Epoch [19/25], Training EER: 0.1739


Epoch [20/25]: 100%|██████████| 235/235 [00:00<00:00, 260.84it/s]


Epoch [20/25], Loss: 0.2918, Accuracy: 0.8787
Epoch [20/25], Training EER: 0.1634
Model saved as 'best_model.pth'


Epoch [21/25]: 100%|██████████| 235/235 [00:00<00:00, 259.03it/s]


Epoch [21/25], Loss: 0.2872, Accuracy: 0.8803
Epoch [21/25], Training EER: 0.1556
Model saved as 'best_model.pth'


Epoch [22/25]: 100%|██████████| 235/235 [00:00<00:00, 259.04it/s]


Epoch [22/25], Loss: 0.2802, Accuracy: 0.8896
Epoch [22/25], Training EER: 0.1566


Epoch [23/25]: 100%|██████████| 235/235 [00:00<00:00, 249.26it/s]


Epoch [23/25], Loss: 0.2745, Accuracy: 0.8939
Epoch [23/25], Training EER: 0.1446
Model saved as 'best_model.pth'


Epoch [24/25]: 100%|██████████| 235/235 [00:00<00:00, 254.37it/s]


Epoch [24/25], Loss: 0.2645, Accuracy: 0.8939
Epoch [24/25], Training EER: 0.1431
Model saved as 'best_model.pth'


Epoch [25/25]: 100%|██████████| 235/235 [00:00<00:00, 255.62it/s]


Epoch [25/25], Loss: 0.2626, Accuracy: 0.8960
Epoch [25/25], Training EER: 0.1431
Extracting features from video frames...


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 207MB/s]


Predicted: fake, Probability: 0.9476
