In [1]:
import os
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm
from matplotlib import pyplot as plt 

In [2]:
d=128

In [3]:
class VideoProcessor:
    def __init__(self, 
                 modelFile="../models/res10_300x300_ssd_iter_140000.caffemodel",
                 configFile = "../models/deploy.prototxt",
                 max_interations = 10,
                 conf_threshold = 0.60,
                 nframesdiff = 5,
                 normalized_dim = (d,d)):
        self.modelFile = modelFile
        self.configFile = configFile
        self.max_interations = max_interations
        self.conf_threshold = conf_threshold
        self.nframesdiff = nframesdiff
        self.normalized_dim = normalized_dim
        self.net = cv2.dnn.readNetFromCaffe(self.configFile, self.modelFile)
        self.mean = 128

    def extract_face(self, img):
        (h, w) = img.shape[:2]
        face = None
        blob = cv2.dnn.blobFromImage(cv2.resize(img, (300, 300)), 1.0, (300, 300), (103.93, 116.77, 123.68))
        self.net.setInput(blob)
        detections = self.net.forward()
        for i in range(detections.shape[2]):
            if detections[0, 0, i, 2] > self.conf_threshold:
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (x1, y1, x2, y2) = box.astype("int")
                face = img[y1:y2, x1:x2]
                face = cv2.resize(face, self.normalized_dim)
                break
                    
        return face

    def extract_random_faces(self, filename, num_faces):
        captured_faces = []
        iterations = 0
        v_cap = cv2.VideoCapture(filename)
        v_length = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        while len(captured_faces) < num_faces and iterations < self.max_interations:
            iterations += 1
            v_cap.set(1, np.random.randint(v_length)-1)
            
            ret, img = v_cap.read()
        
            if ret == True:
                (h, w) = img.shape[:2]
                blob = cv2.dnn.blobFromImage(cv2.resize(img, (300, 300)), 1.0, (300, 300), (103.93, 116.77, 123.68))
                self.net.setInput(blob)
                detections = self.net.forward()
                for i in range(detections.shape[2]):
                    confidence = detections[0, 0, i, 2]            
                    if confidence > self.conf_threshold:
                        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                        (x1, y1, x2, y2) = box.astype("int")
                        face=img[y1:y2, x1:x2]
                        
                        # normlize
                        face = cv2.resize(face, self.normalized_dim)
                        captured_faces.append(face)
        
        # When everything done, release the video capture and video write objects
        v_cap.release()
    
        return captured_faces
  
    def extract_random_diff(self, filename, num_diff):
        captured_diff = np.empty(shape=(0,d,d,3), dtype=np.int8)
        iterations = 0
        v_cap = cv2.VideoCapture(filename)
        v_length = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        while len(captured_diff) < num_diff and iterations < self.max_interations:
            iterations += 1
            frame = np.random.randint(v_length)-1
            v_cap.set(1, frame)
            ret, img_base = v_cap.read()
            if ret == True:
                v_cap.set(1, frame + self.nframesdiff)
                ret, img = v_cap.read()
                
            if ret == True:
                img_base = cv2.cvtColor(img_base, cv2.COLOR_BGR2RGB)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                face1 = self.extract_face(img_base)
                face2 = self.extract_face(img)
                if type(face1) == type(face2):
                    face_diff = cv2.absdiff(face2,face1)
                    face_diff = cv2.absdiff(self.mean,face_diff)
                    if face_diff is not None:
                        captured_diff = np.append(captured_diff,[face_diff],axis=0)
        
        # When everything done, release the video capture and video write objects
        v_cap.release()
        
        # Closes all the frames
        cv2.destroyAllWindows() 

        return captured_diff

        

In [4]:
df = pd.DataFrame()
for root, dirs, files in os.walk('../videos', topdown=False):
    for name in dirs:
        print(name)
        dfdir = pd.read_json('../videos/' + name + '/metadata.json')
        dfdir = dfdir.T
        dfdir['dir'] = name
        df = df.append(dfdir)
df["processed"] = 'False'

df.to_csv('../data/metadata.csv',index_label='video')

dfdc_train_part_1
dfdc_train_part_14
dfdc_train_part_48
dfdc_train_part_3
dfdc_train_part_2


### Prepare Data

In [4]:
df = pd.read_csv('../data/metadata.csv')

In [14]:
df.query('processed == False').sample(10)

Unnamed: 0,video,label,original,split,dir,processed
2532,yakzirnamy.mp4,FAKE,jeyvgixnkm.mp4,train,dfdc_train_part_14,False
4407,trnznvybjo.mp4,FAKE,sudzolvppu.mp4,train,dfdc_train_part_48,False
4912,gbinyqapyk.mp4,FAKE,ozgcmnllow.mp4,train,dfdc_train_part_48,False
6295,yjfdnylpab.mp4,FAKE,ndqxtifvbw.mp4,train,dfdc_train_part_48,False
8736,aaujbbfhqu.mp4,FAKE,qiyzfjrloz.mp4,train,dfdc_train_part_2,False
4368,hqeldyhmpu.mp4,FAKE,vdqritvjfl.mp4,train,dfdc_train_part_48,False
9447,rhncgvckxz.mp4,FAKE,fanibwbmoq.mp4,train,dfdc_train_part_2,False
9680,jlluezfnyr.mp4,FAKE,txnmkabufs.mp4,train,dfdc_train_part_2,False
9460,wxvtaveqvb.mp4,FAKE,gpsxfxrjrr.mp4,train,dfdc_train_part_2,False
3893,rgkauxyqtf.mp4,FAKE,gwzeubnydg.mp4,train,dfdc_train_part_14,False


In [7]:
batch = 0
nsample = 300

vp = VideoProcessor()

while True:
    sample = df.query('label=="FAKE" and processed == False').sample(nsample)
    if len(sample) == 0:
        break

    fakes = np.empty(shape=(0,d,d,3), dtype=np.int8)
    reals = np.empty(shape=(0,d,d,3), dtype=np.int8)

    batch += 1
    for index, row in tqdm(sample.iterrows(), total=nsample):
        f = vp.extract_random_faces('../videos/' + row.dir + '/' + row.video,2)
        if len(f) > 0:
            fakes = np.append(fakes,f,axis=0)
        r = vp.extract_random_faces('../videos/' + row.dir + '/' + row.original,2)
        if len(r) > 0:
            reals = np.append(reals,r,axis=0)

    np.savez(f'../data/train_Xception_{batch}', fakes=fakes, reals=reals)
    df.loc[sample.index,'processed'] = f'train_{batch}'
    df.to_csv('../data/metadata.csv',index = False)
    print(f'saved batch: {batch}')

100%|██████████| 300/300 [13:20<00:00,  2.42s/it]
  0%|          | 0/300 [00:00<?, ?it/s]

saved batch: 1


100%|██████████| 300/300 [13:13<00:00,  3.10s/it]
  0%|          | 0/300 [00:00<?, ?it/s]

saved batch: 2


100%|██████████| 300/300 [13:07<00:00,  2.84s/it]
  0%|          | 0/300 [00:00<?, ?it/s]

saved batch: 3


100%|██████████| 300/300 [12:43<00:00,  2.36s/it]
  0%|          | 0/300 [00:00<?, ?it/s]

saved batch: 4





KeyboardInterrupt: 

In [None]:
f[0].shape