# Deepfake Detection Model Training
This notebook trains YOLO, CNN, and XGBoost models for deepfake detection with GPU acceleration.

In [None]:
# Import required libraries
import os
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from ultralytics import YOLO

# Check GPU availability
print("CUDA Available:", torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

## Data Preparation

In [None]:
# Load and preprocess dataset
def load_images_from_folder(folder, label, max_samples=1000):
    images = []
    labels = []
    count = 0
    for filename in os.listdir(folder):
        if count >= max_samples:
            break
        img_path = os.path.join(folder, filename)
        try:
            img = cv2.imread(img_path)
            img = cv2.resize(img, (256, 256))
            if img is not None:
                images.append(img)
                labels.append(label)
                count += 1
        except Exception as e:
            print(f"Error loading {img_path}: {e}")
    return images, labels

# Load real and fake images
real_images, real_labels = load_images_from_folder('data/real', 0)
fake_images, fake_labels = load_images_from_folder('data/fake', 1)

# Combine and shuffle
X = np.array(real_images + fake_images)
y = np.array(real_labels + fake_labels)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize
X_train = X_train / 255.0
X_test = X_test / 255.0

## YOLO Model Training (Face Detection)

In [None]:
# Train YOLO model for face detection
yolo_model = YOLO('yolov8n.pt')  # Load pretrained YOLO

# Train on custom face dataset (assuming you have labeled face data)
results = yolo_model.train(
    data='data/faces.yaml',
    epochs=50,
    imgsz=640,
    batch=16,
    device='0'  # Use GPU 0
)

# Save trained model
yolo_model.export(format='onnx')
torch.save(yolo_model.state_dict(), 'models/yolo_model.pt')

## CNN Model Training (Deepfake Classification)

In [None]:
# Custom Dataset class
class DeepfakeDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = torch.from_numpy(self.images[idx]).permute(2, 0, 1).float()
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        return image, label

# CNN Model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 32 * 32, 128)
        self.fc2 = nn.Linear(128, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(-1, 128 * 32 * 32)
        x = self.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

# Initialize model, loss, and optimizer
cnn_model = CNN().to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(cnn_model.parameters())

# Create data loaders
train_dataset = DeepfakeDataset(X_train, y_train)
test_dataset = DeepfakeDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Training loop
for epoch in range(20):
    cnn_model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = cnn_model(images)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}')

# Save model
torch.save(cnn_model.state_dict(), 'models/cnn_model.pt')

## XGBoost Model Training (Feature-based Classification)

In [None]:
# Extract features for XGBoost
def extract_features(images):
    features = []
    for img in images:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        features.append([
            np.mean(gray), np.std(gray),
            cv2.Laplacian(gray, cv2.CV_64F).var(),
            cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=5).var(),
            cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=5).var()
        ])
    return np.array(features)

# Extract features
X_train_features = extract_features(X_train)
X_test_features = extract_features(X_test)

# Train XGBoost with GPU
xgb_model = XGBClassifier(
    tree_method='gpu_hist',  # Use GPU acceleration
    gpu_id=0,
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1
)

xgb_model.fit(X_train_features, y_train)

# Evaluate
y_pred = xgb_model.predict(X_test_features)
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred))

# Save model
xgb_model.save_model('models/xgboost_model.json')

## Model Evaluation

In [None]:
# Evaluate CNN model
test_loss, test_acc = cnn_model.evaluate(X_test, y_test, verbose=2)
print(f'\nCNN Test Accuracy: {test_acc:.4f}')

# Evaluate XGBoost model
y_pred = xgb_model.predict(X_test_features)
print(f'XGBoost Test Accuracy: {accuracy_score(y_test, y_pred):.4f}')