In [1]:
import os
import glob
import json
import torch
import cv2
from PIL import Image
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from facenet_pytorch import MTCNN

from torch.utils.data import Dataset, DataLoader
from torch import nn, optim
from torch.nn import functional as F
from torchvision.models import resnet18
from albumentations import Normalize, Compose
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import multiprocessing as mp

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(f'Running on device: {device}')

  check_for_updates()


Running on device: cuda:0


# Face Extraction

In [2]:
TRAIN_DIR = 'D:\\W\\VS\\VS Folder\\DFD\\DFDC\\deepfake-detection-challenge\\train_sample_videos\\'
METADATA_PATH = TRAIN_DIR + 'metadata.json'
TMP_DIR = 'D:\\W\\VS\\VS Folder\\DFD\\DFDC MTCNN Extracted'

SCALE = 0.25
N_FRAMES = None

## Face Extractor

In [3]:
class FaceExtractor:
    def __init__(self, detector, n_frames=None, resize=None):
        """
        Parameters:
            n_frames {int} -- Total number of frames to load. These will be evenly spaced
                throughout the video. If not specified (i.e., None), all frames will be loaded.
                (default: {None})
            resize {float} -- Fraction by which to resize frames from original prior to face
                detection. A value less than 1 results in downsampling and a value greater than
                1 result in upsampling. (default: {None})
        """

        self.detector = detector
        self.n_frames = n_frames
        self.resize = resize
    
    def __call__(self, filename, save_dir):
        """Load frames from an MP4 video, detect faces and save the results.

        Parameters:
            filename {str} -- Path to video.
            save_dir {str} -- The directory where results are saved.
        """

        # Create video reader and find length
        v_cap = cv2.VideoCapture(filename)
        v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))

        # Pick 'n_frames' evenly spaced frames to sample
        if self.n_frames is None:
            sample = np.arange(0, v_len)
        else:
            sample = np.linspace(0, v_len - 1, self.n_frames).astype(int)

        # Loop through frames
        for j in range(v_len):
            success = v_cap.grab()
            if j in sample:
                # Load frame
                success, frame = v_cap.retrieve()
                if not success:
                    continue
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = Image.fromarray(frame)
                
                # Resize frame to desired size
                if self.resize is not None:
                    frame = frame.resize([int(d * self.resize) for d in frame.size])

                save_path = os.path.join(save_dir, f'{j}.png')

                self.detector([frame], save_path=save_path)

        v_cap.release()

## Get Metadata

In [4]:
with open(METADATA_PATH, 'r') as f:
    metadata = json.load(f)

metadata

{'aagfhgtpmv.mp4': {'label': 'FAKE',
  'split': 'train',
  'original': 'vudstovrck.mp4'},
 'aapnvogymq.mp4': {'label': 'FAKE',
  'split': 'train',
  'original': 'jdubbvfswz.mp4'},
 'abarnvbtwb.mp4': {'label': 'REAL', 'split': 'train', 'original': None},
 'abofeumbvv.mp4': {'label': 'FAKE',
  'split': 'train',
  'original': 'atvmxvwyns.mp4'},
 'abqwwspghj.mp4': {'label': 'FAKE',
  'split': 'train',
  'original': 'qzimuostzz.mp4'},
 'acifjvzvpm.mp4': {'label': 'FAKE',
  'split': 'train',
  'original': 'kbvibjhfzo.mp4'},
 'acqfdwsrhi.mp4': {'label': 'FAKE',
  'split': 'train',
  'original': 'ccfoszqabv.mp4'},
 'acxnxvbsxk.mp4': {'label': 'FAKE',
  'split': 'train',
  'original': 'fjlyaizcwc.mp4'},
 'acxwigylke.mp4': {'label': 'FAKE',
  'split': 'train',
  'original': 'ffcwhpnpuw.mp4'},
 'aczrgyricp.mp4': {'label': 'FAKE',
  'split': 'train',
  'original': 'slwkmefgde.mp4'},
 'adhsbajydo.mp4': {'label': 'FAKE',
  'split': 'train',
  'original': 'fysyrqfguw.mp4'},
 'adohikbdaz.mp4': {'label

In [5]:
train_df = pd.DataFrame(
    [
        (video_file, metadata[video_file]['label'], metadata[video_file]['split'], metadata[video_file]['original'] if 'original' in metadata[video_file].keys() else '')
        for video_file in metadata.keys()
    ],
    columns=['filename', 'label', 'split', 'original']
)

train_df.head()

Unnamed: 0,filename,label,split,original
0,aagfhgtpmv.mp4,FAKE,train,vudstovrck.mp4
1,aapnvogymq.mp4,FAKE,train,jdubbvfswz.mp4
2,abarnvbtwb.mp4,REAL,train,
3,abofeumbvv.mp4,FAKE,train,atvmxvwyns.mp4
4,abqwwspghj.mp4,FAKE,train,qzimuostzz.mp4


## Detection

In [None]:
# Load face detector
face_detector = MTCNN(margin=14, keep_all=True, factor=0.5, device=device).eval()

# Define face extractor
face_extractor = FaceExtractor(detector=face_detector, n_frames=N_FRAMES, resize=SCALE)

# Get the paths of all train videos
all_train_videos = glob.glob(os.path.join(TRAIN_DIR, '*.mp4'))

# Get the paths of all train videos
all_train_videos = glob.glob(os.path.join(TRAIN_DIR, '*.mp4'))

In [None]:
with torch.no_grad():
    for path in tqdm(all_train_videos):
        file_name = path.split('\\')[-1]

        save_dir = os.path.join(TMP_DIR, file_name.split(".")[0])

        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        # Detect all faces appear in the video and save them.
        face_extractor(path, save_dir)

In [None]:
cd $TMP_DIR

In [None]:
train_df.to_csv('metadata.csv', index=False)