In [6]:
import os

def rename_files(folder_path):
    try:
        for filename in os.listdir(folder_path):
            old_name = os.path.join(folder_path, filename)
            if os.path.isfile(old_name):
                new_name = os.path.join(folder_path, filename.replace("_cropped", ""))
                os.rename(old_name,new_name)

    except Exception as e:
        print(f'An error occured : {e}')


In [2]:
train_cropped_videos_path = "/home/onyxia/work/Deepfake_Recognition/data/train_cropped_videos"
test_cropped_videos_path = "/home/onyxia/work/Deepfake_Recognition/data/train_cropped_videos"

#rename_files(train_cropped_videos_path)
#rename_files(test_cropped_videos_path)

In [4]:
import json
import pandas as pd
import os

# Chemin vers les fichiers et le metadata.json
metadata_path = "/home/onyxia/work/Deepfake_Recognition/data/train_sample_videos/metadata.json"
videos = os.listdir("/home/onyxia/work/Deepfake_Recognition/data/train_cropped_videos")

# Charger les métadonnées
with open(metadata_path, "r") as file:
    metadata = json.load(file)

# Créer une liste de dictionnaires avec fichier et label
metadata_labels = {key: value['label'] for key, value in metadata.items()}
data_list_type = [{"file": f, "label": l} for f, l in metadata_labels.items() if f in videos]

# Conversion en DataFrame
labels_df = pd.DataFrame(data_list_type)

# Aperçu du DataFrame
print(labels_df.head())


             file label
0  aagfhgtpmv.mp4  FAKE
1  aapnvogymq.mp4  FAKE
2  abarnvbtwb.mp4  REAL
3  abofeumbvv.mp4  FAKE
4  abqwwspghj.mp4  FAKE


In [5]:
labels_df['file']

0      aagfhgtpmv.mp4
1      aapnvogymq.mp4
2      abarnvbtwb.mp4
3      abofeumbvv.mp4
4      abqwwspghj.mp4
            ...      
330    etdcqxabww.mp4
331    etmcruaihe.mp4
332    etohcvnzbj.mp4
333    eudeqjhdfd.mp4
334    eukvucdetx.mp4
Name: file, Length: 335, dtype: object

In [7]:
!sudo apt-get update && sudo apt-get install -y libgl1-mesa-glx

Get:1 http://archive.ubuntu.com/ubuntu jammy InRelease [270 kB]
Get:2 https://ppa.launchpadcontent.net/git-core/ppa/ubuntu jammy InRelease [24.6 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]        
Get:4 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]      
Get:5 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]      
Get:6 https://ppa.launchpadcontent.net/git-core/ppa/ubuntu jammy/main amd64 Packages [2,962 B]
Get:7 http://archive.ubuntu.com/ubuntu jammy/universe amd64 Packages [17.5 MB] 
Get:8 http://archive.ubuntu.com/ubuntu jammy/main amd64 Packages [1,792 kB]
Get:9 http://archive.ubuntu.com/ubuntu jammy/restricted amd64 Packages [164 kB]
Get:10 http://archive.ubuntu.com/ubuntu jammy/multiverse amd64 Packages [266 kB]
Get:11 http://archive.ubuntu.com/ubuntu jammy-updates/restricted amd64 Packages [3,633 kB]
Get:12 https://apt.postgresql.org/pub/repos/apt jammy-pgdg InRelease [129 kB]  
Get:13 http://archive.ubuntu

In [48]:
!pip install torch
!pip install opencv-python



In [8]:
from torch.utils.data import Dataset
import cv2

In [49]:
import os
import cv2
import torch
from torch.utils.data import Dataset

class VideoDataset(Dataset):
    def __init__(self, video_names: list, labels, sequence_length: int, transform=None):
        """
        Args:
            video_names (list): Liste des noms des vidéos.
            labels (pd.DataFrame): DataFrame contenant les fichiers et leurs labels.
            sequence_length (int): Nombre maximum de frames à extraire par vidéo.
            transform (callable, optional): Transformation à appliquer aux frames.
        """
        self.video_names = video_names
        self.labels = labels
        self.transform = transform
        self.sequence_length = sequence_length

    def __len__(self):
        """Retourne la longueur du dataset."""
        return len(self.video_names)

    def __getitem__(self, idx):
        """
        Récupère une vidéo et ses labels.
        
        Args:
            idx (int): Index de la vidéo.

        Returns:
            torch.Tensor: Tensor des frames de la vidéo.
            torch.Tensor: Label associé à la vidéo.
        """
        video_name = self.video_names[idx]
        video_path = os.path.join(train_cropped_videos_path, video_name)

        # Vérifie si la vidéo existe
        if not os.path.exists(video_path):
            raise RuntimeError(f"Video file not found: {video_path}")

        # Récupère le label associé
        try:
            label = self.labels.loc[self.labels["file"] == video_name, "label"].values[0]
            label_mapping = {'FAKE': 0, 'REAL': 1}
            label = label_mapping[label]
        except KeyError:
            raise ValueError(f"Label not found or invalid for video: {video_name}")

        # Extraction des frames
        frames = []
        try:
            for i, frame in enumerate(self.frame_extract(video_path)):
                if self.transform:
                    frame = self.transform(frame)
                frames.append(frame)
                if len(frames) == self.sequence_length:
                    break
        except Exception as e:
            raise RuntimeError(f"Error processing video {video_path}: {e}")

        # Gestion des vidéos sans frames
        if len(frames) == 0:
            raise RuntimeError(f"No frames captured for video: {video_path}")

        # Convertit les frames en tensor
        frames = torch.stack(frames[:self.sequence_length])
        return frames, torch.tensor(label, dtype=torch.long)

    def frame_extract(self, path):
        """
        Extrait les frames d'une vidéo.
        
        Args:
            path (str): Chemin de la vidéo.
        
        Yields:
            numpy.ndarray: Frame extraite de la vidéo.
        """
        vidObj = cv2.VideoCapture(path)
        if not vidObj.isOpened():
            raise RuntimeError(f"Failed to open video file: {path}")

        while True:
            success, frame = vidObj.read()
            if not success:
                break
            yield frame
        vidObj.release()


In [50]:
import os 

video_names = os.listdir(train_cropped_videos_path)
video_names = [f for f in video_names if f.endswith("mp4")]
video_names[:5]

['dsndhujjjb.mp4',
 'dlpoieqvfb.mp4',
 'aagfhgtpmv.mp4',
 'dofusvhnib.mp4',
 'apogckdfrz.mp4']

In [21]:
!pip install torchvision



In [22]:
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset

In [51]:
im_size = 112
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

video_transforms = transforms.Compose([
                                        transforms.ToPILImage(),
                                        transforms.Resize((im_size,im_size)),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean,std)])

cropped_videos_transformed = video_dataset(labels_df['file'],labels_df,sequence_length = 10,transform = video_transforms)

video_loader = DataLoader(cropped_videos_transformed,batch_size = 1,shuffle = True,num_workers = 0)

In [52]:
# Vérifier le fonctionnement du DataLoader
for batch_idx, (frames, label) in enumerate(video_loader):
    print(f"Batch {batch_idx + 1}:")
    print(f"Frames shape: {frames.shape}")  # Devrait être [batch_size, num_frames, channels, height, width]
    print(f"Label: {label}")  # Devrait être un tenseur avec les labels
    break  # Arrêtez après un batch pour éviter d'afficher trop d'informations


Batch 1:
Frames shape: torch.Size([1, 10, 3, 112, 112])
Label: tensor([0])


In [61]:
import torch.nn as nn
import torch

class VideoClassifier(nn.Module):
    def __init__(self, num_classes=2):
        super(VideoClassifier, self).__init__()
        # Convolution et pooling
        self.conv3d = nn.Conv3d(3, 16, kernel_size=(3, 3, 3), stride=1, padding=1)
        self.pool = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=2)

        # Taille d'entrée pour la couche linéaire (calculée dynamiquement)
        self.fc_input_size = self._calculate_flattened_size((3, 10, 112, 112))
        self.fc = nn.Linear(self.fc_input_size, num_classes)

    def _calculate_flattened_size(self, input_shape):
        """
        Calcule la taille de la sortie aplatie après les convolutions et pooling.
        """
        with torch.no_grad():
            x = torch.zeros(1, *input_shape)  # Tenseur factice
            x = self.conv3d(x)
            x = self.pool(x)
            return x.numel()

    def forward(self, x):
        x = self.conv3d(x)
        x = self.pool(x)
        #print(f"Shape after pooling: {x.shape}")  # Debugging dimensions
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x


In [56]:
model = VideoClassifier(num_classes=2)

# Entrée factice
dummy_input = torch.randn(1, 3, 10, 112, 112)  # [batch_size, channels, num_frames, height, width]
outputs = model(dummy_input)

print(f"Model output shape: {outputs.shape}")  # Devrait être [1, num_classes]


Shape after pooling: torch.Size([1, 16, 5, 56, 56])
Model output shape: torch.Size([1, 2])


In [17]:
torch.cuda.is_available()

False

In [63]:
model = VideoClassifier(num_classes=2)

criterion = nn.CrossEntropyLoss()  # Pour une tâche de classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)
loss_value = []
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for batch_idx, (videos, labels) in enumerate(video_loader):
        if videos is None:
            continue
        # Déplacez les données sur le GPU si disponible
        videos = videos.to(device)  # Shape: [batch_size, channels, sequence_length, height, width]
        labels = labels.to(device)

        # Réorganiser les dimensions pour correspondre aux attentes du modèle
        videos = videos.permute(0, 2, 1, 3, 4)  # Devient : [batch_size, channels, num_frames, height, width]

        # Réinitialiser les gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(videos)
        loss = criterion(outputs, labels)

        # Backward pass et optimisation
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        loss_value.append(running_loss)

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / len(video_loader)}")

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x100352 and 250880x2)