In [None]:
# =====================
# 1. IMPORTS
# =====================
import os
import cv2
import glob
import torch
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from transformers import ViTFeatureExtractor, ViTModel

device = "cuda" if torch.cuda.is_available() else "cpu"


In [None]:
# =====================
# 2. CHARGER ViT pré-entraîné
# =====================
model_name = "google/vit-base-patch16-224-in21k"
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)
vit_model = ViTModel.from_pretrained(model_name).to(device)
vit_model.eval()


In [None]:
# =====================
# 3. EXTRACTION DE FRAMES D’UNE VIDÉO
# =====================
def extract_frames(video_path, every_n=10):
    cap = cv2.VideoCapture(video_path)
    frames = []
    idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if idx % every_n == 0:
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame_rgb)
        idx += 1
    cap.release()
    return frames


In [None]:
# =====================
# 4. OBTENIR LES EMBEDDINGS VIT POUR UNE LISTE D’IMAGES
# =====================
def get_vit_embeddings(frames):
    inputs = feature_extractor(images=frames, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = vit_model(**inputs)
    # On récupère le token [CLS] → taille 768
    embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()
    return embeddings


In [None]:
# =====================
# 5. MOYENNE DES EMBEDDINGS PAR VIDÉO
# =====================
def process_video(video_path):
    frames = extract_frames(video_path, every_n=10)  # Une frame sur 10
    if not frames:
        return None
    embeddings = get_vit_embeddings(frames)
    video_embedding = np.mean(embeddings, axis=0)  # Moyenne des frames
    return video_embedding


In [None]:
# =====================
# 6. CRÉATION DU DATASET
# =====================
def create_dataset(real_dir, fake_dir):
    X, y = [], []
    
    # Réelles
    for video_path in tqdm(glob.glob(os.path.join(real_dir, "*.mp4"))):
        emb = process_video(video_path)
        if emb is not None:
            X.append(emb)
            y.append(0)
    
    # Fakes
    for video_path in tqdm(glob.glob(os.path.join(fake_dir, "*.mp4"))):
        emb = process_video(video_path)
        if emb is not None:
            X.append(emb)
            y.append(1)
    
    return np.array(X), np.array(y)

real_videos_path = r"C:\Users\EliteLaptop\Desktop\kawtar\GAN_inversion\real_videos"
fake_videos_path = r"C:\Users\EliteLaptop\Desktop\kawtar\GAN_inversion\fake_videos"

X, y = create_dataset(real_videos_path, fake_videos_path)
print("Shape X:", X.shape, "Shape y:", y.shape)


In [None]:
# =====================
# 7. ENTRAÎNEMENT DU CLASSIFIEUR
# =====================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = SVC(kernel="linear", probability=True)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred, target_names=["Real", "Fake"]))


In [None]:
# =====================
# 8. PRÉDICTION SUR UNE NOUVELLE VIDÉO
# =====================
def predict_video(video_path):
    emb = process_video(video_path)
    if emb is None:
        print("Pas de frames extraites.")
        return None
    pred = clf.predict([emb])[0]
    return "Fake" if pred == 1 else "Real"

test_video = r"C:\path\to\test_video.mp4"
print("Résultat :", predict_video(test_video))
