In [1]:
!pip install pywavelets
!pip install opencv-python
!pip install numpy
!pip install Xgboost



In [2]:
import cv2
import pywt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
import os

# Extract frames from video
def extract_frames(video_path, max_frames=50):
    cap = cv2.VideoCapture(video_path)
    frames = []
    count = 0
    while cap.isOpened() and count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
        resized_frame = cv2.resize(gray_frame, (128, 128))  # Resize for consistency
        frames.append(resized_frame)
        count += 1
    cap.release()
    return frames

# Apply wavelet transform
def apply_wavelet_transform(image):
    coeffs = pywt.wavedec2(image, 'haar', level=2)  # Haar wavelet with 2 levels
    features = []
    for level in coeffs:
        for sub_band in level:
            features.append(np.mean(sub_band))
            features.append(np.std(sub_band))
    return np.array(features)

# Load dataset and extract features from videos
def load_dataset_from_videos(video_paths, labels, max_frames=50):
    features = []
    new_labels = []
    for i, video_path in enumerate(video_paths):
        if not os.path.exists(video_path):
            print(f"Error: {video_path} does not exist.")
            continue
        
        frames = extract_frames(video_path, max_frames)
        if len(frames) == 0:  # Skip if no frames extracted
            print(f"Warning: No frames extracted from {video_path}")
            continue
        
        for frame in frames:
            wavelet_features = apply_wavelet_transform(frame)
            features.append(wavelet_features)
            new_labels.append(labels[i])  # Repeat label for each frame
    
    if len(features) == 0:
        raise ValueError("No data available: check your video paths and frame extraction.")
    
    return np.array(features), np.array(new_labels)

# Train and evaluate classifier
def train_xgboost(features, labels):
    # Stratified splitting ensures balanced class representation in training and test sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, stratify=labels, random_state=42)
    xgb_clf = XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=6, random_state=42)
    xgb_clf.fit(X_train, y_train)
    y_pred = xgb_clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy: {accuracy}')
    return xgb_clf

# Generate paths and labels from FF++ dataset
def prepare_ffpp_dataset(base_path):
    video_paths = []
    labels = []
    for label, folder in enumerate(['real', 'fake']):  # 'real' = 0, 'fake' = 1
        folder_path = os.path.join(base_path, folder)
        if not os.path.exists(folder_path):
            print(f"Warning: {folder_path} does not exist.")
            continue
        for video_file in os.listdir(folder_path):
            if video_file.endswith('.mp4'):  # Adjust for your video format
                video_paths.append(os.path.join(folder_path, video_file))
                labels.append(label)
    return video_paths, labels

# Example usage for FaceForensics++ dataset
base_path = "C:\\Users\\Pavan\\Downloads\\FF++"  # Replace with actual FF++ dataset path
video_paths, labels = prepare_ffpp_dataset(base_path)

try:
    features, labels = load_dataset_from_videos(video_paths, labels)
    if len(features) > 0:
        xgboost_classifier = train_xgboost(features, labels)
except ValueError as e:
    print(e)


Accuracy: 0.8605757196495619
