# Deepfake Detection Model Training (PyTorch Version)

This notebook trains YOLO, CNN, and XGBoost models for deepfake detection with GPU acceleration using PyTorch.

In [None]:
# Import required libraries
import os
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import (accuracy_score, precision_score, 
                           recall_score, f1_score, roc_auc_score)
from ultralytics import YOLO
import matplotlib.pyplot as plt
import json
from datetime import datetime

In [None]:
# Check GPU availability
print("CUDA Available:", torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

# Create directories if they don't exist
os.makedirs('models', exist_ok=True)
os.makedirs('metrics', exist_ok=True)

# Initialize metrics dictionary
metrics = {
    'cnn': {
        'train_loss': [],
        'val_loss': [],
        'train_acc': [],
        'val_acc': [],
        'precision': 0,
        'recall': 0,
        'f1': 0,
        'auc': 0
    },
    'xgboost': {
        'accuracy': 0,
        'precision': 0,
        'recall': 0,
        'f1': 0,
        'auc': 0
    },
    'yolo': {
        'map': 0
    },
    'system': {
        'gpu_available': torch.cuda.is_available(),
        'device': str(device),
        'last_trained': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }
}

## Data Preparation

In [None]:
# [Previous data loading code remains the same...]

## CNN Model Training (PyTorch)

In [None]:
# [Previous CNN training code remains the same...]

In [None]:
# Enhanced CNN Evaluation
def evaluate_model(model, test_loader):
    model.eval()
    all_labels = []
    all_preds = []
    all_probs = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            probs = torch.sigmoid(outputs.squeeze())
            preds = (probs > 0.5).float()
            
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_probs)
    
    # Update metrics dictionary
    metrics['cnn'].update({
        'precision': float(precision),
        'recall': float(recall),
        'f1': float(f1),
        'auc': float(auc)
    })
    
    print(f'\nCNN Evaluation:')
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'AUC: {auc:.4f}')

In [None]:
# Enhanced XGBoost Evaluation
def evaluate_xgboost(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)
    
    metrics['xgboost'].update({
        'accuracy': float(accuracy),
        'precision': float(precision),
        'recall': float(recall),
        'f1': float(f1),
        'auc': float(auc)
    })
    
    print(f'\nXGBoost Evaluation:')
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'AUC: {auc:.4f}')

In [None]:
# Save models and metrics
def save_artifacts():
    # Save CNN model
    torch.save(cnn_model.state_dict(), 'models/cnn_model.pt')
    
    # Save XGBoost model
    import joblib
    joblib.dump(xgb_model, 'models/xgboost_model.pkl')
    
    # Save YOLO model (if trained)
    if 'yolov8_model' in globals():
        yolov8_model.export(format='pt', name='models/yolov8_model')
    
    # Save metrics
    with open('metrics/metrics.json', 'w') as f:
        json.dump(metrics, f, indent=4)
    
    print('\nAll models and metrics saved successfully!')

In [None]:
# Plot training curves
def plot_training_curves():
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(metrics['cnn']['train_loss'], label='Train Loss')
    plt.plot(metrics['cnn']['val_loss'], label='Val Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(metrics['cnn']['train_acc'], label='Train Accuracy')
    plt.plot(metrics['cnn']['val_acc'], label='Val Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig('metrics/training_curves.png')
    plt.show()

In [None]:
# Evaluate models
evaluate_model(cnn_model, test_loader)
evaluate_xgboost(xgb_model, X_test_features, y_test)

# Save artifacts
save_artifacts()

# Plot training curves
plot_training_curves()