In [None]:
!ls ../videos/dfdc_train_part_48

In [1]:
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm
from matplotlib import pyplot as plt 

In [8]:
class FaceDetector:
    def __init__(self, 
                 modelFile="res10_300x300_ssd_iter_140000.caffemodel",
                 configFile = "deploy.prototxt",
                 max_interations = 300,
                 conf_threshold = 0.60,
                 normalized_dim = (32,32)):
        self.modelFile = modelFile
        self.configFile = configFile
        self.max_interations = max_interations
        self.conf_threshold = conf_threshold
        self.normalized_dim = normalized_dim
        self.net = cv2.dnn.readNetFromCaffe(self.configFile, self.modelFile)
        
    def extract_random_faces(self, filename, num_faces):
        captured_faces = []
        iterations = 0
        v_cap = cv2.VideoCapture(filename)
        v_length = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        while len(captured_faces) < num_faces and iterations < self.max_interations:
            iterations += 1
            v_cap.set(1, np.random.randint(v_length)-1)
            
            ret, img = v_cap.read()
        
            if ret == True:
                (h, w) = img.shape[:2]
                blob = cv2.dnn.blobFromImage(cv2.resize(img, (300, 300)), 1.0, (300, 300), (103.93, 116.77, 123.68))
                self.net.setInput(blob)
                detections = self.net.forward()
                for i in range(detections.shape[2]):
                    confidence = detections[0, 0, i, 2]            
                    if confidence > self.conf_threshold:
                        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                        (x1, y1, x2, y2) = box.astype("int")
                        face=img[y1:y2, x1:x2]
                        
                        # normlize
                        face = cv2.resize(face, self.normalized_dim)
                        captured_faces.append(face)
        
        # When everything done, release the video capture and video write objects
        v_cap.release()
    
        return captured_faces

In [9]:
dfdc_train_part_48 = '../videos/dfdc_train_part_48'

In [10]:
df = pd.read_json(dfdc_train_part_48 + '/metadata.json')
df = df.T

In [11]:
df.query("label == 'REAL'").sample(10)

Unnamed: 0,label,original,split
nlrmgdqfnr.mp4,REAL,,train
rtdogbpems.mp4,REAL,,train
kmkvxunbop.mp4,REAL,,train
sxmqvznwwq.mp4,REAL,,train
wetheuhcha.mp4,REAL,,train
kfzuekxbbb.mp4,REAL,,train
bfqlqydtam.mp4,REAL,,train
yvfoaoiclp.mp4,REAL,,train
qhwkphcmhx.mp4,REAL,,train
yuqdwjizdb.mp4,REAL,,train


In [12]:
fd=FaceDetector()

In [13]:
nr = 3
nf = 4
faces = np.empty(shape=(0,32,32,3), dtype=np.int8)
labels = np.empty(shape=(0, 1), dtype="<U5")
for index, row in tqdm(df.sample(nr).iterrows(), total=nr):
    images = fd.extract_random_faces(dfdc_train_part_48+'/'+index, nf)
    if len(images) > 0:
        faces = np.append(faces,images,axis=0)
        labels = np.append(labels, np.full(shape=(len(images),1), fill_value=row.label, dtype="<U5"),axis=0)

100%|██████████| 3/3 [00:05<00:00,  2.07s/it]


In [14]:
assert len(faces) == len(labels)

In [None]:
np.savez('train',faces=faces, labels=labels)

In [15]:
labels

array([['FAKE'],
       ['FAKE'],
       ['FAKE'],
       ['FAKE'],
       ['FAKE'],
       ['FAKE'],
       ['FAKE'],
       ['FAKE'],
       ['FAKE'],
       ['FAKE'],
       ['FAKE'],
       ['FAKE']], dtype='<U5')