-------------------------------------------------------------------------------------------------  

### Preprocessing Video

Working on streamlining the preprocessing. Will attempt to pull frame 100 from the entire dataset. Plan is to use cv2 to grab the frame and save the frame to a list. Then use facenet to detect face and crop on face. And then save to disk. 

<img style="float: center;" src="deepfake.jpg">

-------------------------------------------------------------------------------------------------  

In [1]:
#!pip3 install torch --user

In [2]:
#!pip3 install pandas --user

In [3]:
# from pypi https://pypi.org/project/facenet-pytorch/
# !pip3 install facenet-pytorch --user
#
# install facenet-pytorch on kaggle without internet
# !pip install ../facenet-pytorch/facenet_pytorch-2.0.0-py3-none-any.whl --user

In [4]:
import os
import json
import pandas as pd
import cv2
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image
import torch
from tqdm import tqdm
import time
from joblib import Parallel, delayed
from facenet_pytorch import MTCNN

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [5]:
# set deepfake directory chunk
chunk = 1
# frame number to grab from videos
frame_num = 250

In [6]:
video_dir = f'/data/deepfake/dfdc_train_part_{chunk}'
frame_dir = f'/data/frames/f{frame_num}'
meta_file = os.path.join(video_dir, 'metadata.json')

os.makedirs(frame_dir, exist_ok=True)

with open(meta_file) as f:
    metadata = pd.read_json(f).T

In [7]:
def grab_frames(sample, frame_num):
    video = os.path.join(video_dir, sample)
    filename = sample[:-3]+'jpg'
    reader = cv2.VideoCapture(video)
    reader.set(1, frame_num)
    _, image = reader.read()
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    images_dict = {}
    images_dict[filename] = image
    reader.release()
    return image, images_dict

In [8]:
results = Parallel(n_jobs=20)(delayed(
                   grab_frames)(sample, frame_num)
                       for sample in metadata.index)

# unpack reults
images, results_dict = zip(*results)
images = list(images)
images_dict = {}
for k,v in results_dict.items():
    images_dict.update({k:v})

In [9]:
def timer(detector, detect_fn, images, *args):
    start = time.time()
    faces, faces_dict = detect_fn(detector, images, *args)
    elapsed = time.time() - start
    print(f', {elapsed:.3f} seconds')
    return faces, elapsed, faces_dict

In [10]:
# my attempt at adding a dictionary to the for loop to keep track of filenames

detector = MTCNN(image_size=224, device=device, post_process=False)


def detect_facenet_pytorch(detector, images, batch_size):
    faces = []
    faces_dict = {}
    n = 0
    for key in images_dict.keys():
    #for lb in np.arange(0, len(images), batch_size):
        imgs_pil = [Image.fromarray(images_dict[key])]
        #print(type(imgs_pil[0]))
        try:
            faces.extend(detector(imgs_pil))
            #print(len(faces))
            faces_dict[key] = faces[n]
            n += 1
        except:
            pass
    return faces, faces_dict

times_facenet_pytorch_nb = [] # non-batched

In [11]:
# dtect faces from images
print('Detecting faces in frames', end='')
faces, elapsed, faces_dict = timer(detector, detect_facenet_pytorch, images, 1)
times_facenet_pytorch_nb.append(elapsed)

Detecting faces in frames, 510.758 seconds


In [12]:
for face in faces_dict.keys():
    try:
        image = faces_dict[face].permute(1, 2, 0).int().numpy()
        filename = os.path.join(frame_dir, face)
        cv2.imwrite(filename, image)
    except:
        faces_dict[face] = 'no face detected'
        pass

In [13]:
no_faces_detected = 0
for face in faces_dict.keys():
    if isinstance(faces_dict[face], str):
        no_faces_detected += 1
        
print(f'{no_faces_detected} frames without faces detected')

52 frames without faces detected


In [14]:
len(os.listdir(frame_dir))

1647

In [15]:
len(os.listdir(video_dir))

1704