In [4]:
import os
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm
from matplotlib import pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg
from matplotlib.figure import Figure
from PIL import Image
import dlib

In [5]:
d=128

In [79]:
class VideoProcessor:
    def __init__(self, 
                 modelFile="../models/res10_300x300_ssd_iter_140000.caffemodel",
                 configFile = "../models/deploy.prototxt",
                 landmarkFile = "../models/shape_predictor_68_face_landmarks.dat",
                 right_eye_file ='../models/haarcascade_righteye_2splits.xml',
                 left_eye_file ='../models/haarcascade_lefteye_2splits.xml',
                 mouth_file ='../models/haarcascade_smile.xml',
                 max_interations = 10,
                 conf_threshold = 0.60,
                 nframesdiff = 2,
                 normalized_dim = 500):
        self.modelFile = modelFile
        self.configFile = configFile
        self.max_interations = max_interations
        self.conf_threshold = conf_threshold
        self.nframesdiff = nframesdiff
        self.normalized_dim = normalized_dim
        self.net = cv2.dnn.readNetFromCaffe(self.configFile, self.modelFile)
        self.shape_predictor = dlib.shape_predictor(landmarkFile)
        self.right_eye_casc = cv2.CascadeClassifier(right_eye_file)
        self.left_eye_casc = cv2.CascadeClassifier(left_eye_file)
        self.mouth_casc = cv2.CascadeClassifier(mouth_file)
        self.mean = 128
        self.min_YCrCb = np.array([0,133,77],np.uint8)
        self.max_YCrCb = np.array([235,173,127],np.uint8)
        self.h = 260
        self.w = 208
        
        #self.min_HSV = np.array([0, 48, 80], dtype = "uint8")
        #self.max_HSV = np.array([20, 255, 255], dtype = "uint8")
        self.min_HSV = np.array([0, 58, 30], dtype = "uint8")
        self.max_HSV = np.array([33, 255, 255], dtype = "uint8")


    def extract_face_box(self, img):
        (h, w) = img.shape[:2]
        face_box = None
        blob = cv2.dnn.blobFromImage(cv2.resize(img, (300, 300)), 1.0, (300, 300), (103.93, 116.77, 123.68))
        self.net.setInput(blob)
        detections = self.net.forward()
        for i in range(detections.shape[2]):
            if detections[0, 0, i, 2] > self.conf_threshold:
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                face_box = box.astype("int")
                break
                    
        return face_box

    def extract_random_faces(self, filename, nframe=100, num_faces=1):
        captured_faces = []
        iterations = 0
        v_cap = cv2.VideoCapture(filename)
        v_length = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        while len(captured_faces) < num_faces and iterations < self.max_interations:
            iterations += 1
            # v_cap.set(1, np.random.randint(v_length)-1)
            v_cap.set(1, nframe)
            
            ret, img = v_cap.read()
        
            if ret == True:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                (h, w) = img.shape[:2]
                blob = cv2.dnn.blobFromImage(cv2.resize(img, (300, 300)), 1.0, (300, 300), (103.93, 116.77, 123.68))
                self.net.setInput(blob)
                detections = self.net.forward()
                for i in range(detections.shape[2]):
                    confidence = detections[0, 0, i, 2]            
                    if confidence > self.conf_threshold:
                        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                        (x1, y1, x2, y2) = box.astype("int")
                        face = img[y1:y2, x1:x2]
                        
                        # normlize
                        # face = cv2.resize(face, self.normalized_dim)
                        captured_faces.append(face)
        
        # When everything done, release the video capture and video write objects
        v_cap.release()
    
        return captured_faces

    def landmark(self, face):
        gray = cv2.cvtColor(face, cv2.COLOR_RGB2GRAY)
        shape = self.shape_predictor(face,dlib.rectangle(0,0,face.shape[1],face.shape[0]))
        coords = np.zeros((shape.num_parts, 2), dtype="int")
        for i in range(0, shape.num_parts):
            coords[i] = (shape.part(i).x, shape.part(i).y)

        return coords

    def right_eye(self, face):
        right_eye = self.right_eye_casc.detectMultiScale(
            face,
            scaleFactor=1.1,
            minNeighbors=4,
            flags=cv2.CASCADE_FIND_BIGGEST_OBJECT)
        return right_eye[0]

    def left_eye(self, face):
        right_eye = self.left_eye_casc.detectMultiScale(
            face,
            scaleFactor=1.1,
            minNeighbors=4,
            flags=cv2.CASCADE_FIND_BIGGEST_OBJECT)
        return right_eye[0]

    def mouth(self, face):
        right_eye = self.mouth_casc.detectMultiScale(
            face,
            scaleFactor=1.1,
            minNeighbors=4,
            flags=cv2.CASCADE_FIND_BIGGEST_OBJECT)
        return right_eye[0]


    def extract_diff(self, filename, nframe = 42, num_diff = 1):
        captured_diff = np.empty(shape=(0,d,d,3), dtype=np.int8)
        iterations = 0
        v_cap = cv2.VideoCapture(filename)
        v_length = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        while len(captured_diff) < num_diff and iterations < self.max_interations:
            iterations += 1
            v_cap.set(1, nframe)
            ret, img_base = v_cap.read()
            if ret == True:
                v_cap.set(1, nframe + self.nframesdiff)
                ret, img = v_cap.read()
                
            if ret == True:
                img_base = cv2.cvtColor(img_base, cv2.COLOR_BGR2RGB)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                face1 = cv2.resize(self.extract_face(img_base), (self.normalized_dim,self.normalized_dim))
                face2 = cv2.resize(self.extract_face(img), (self.normalized_dim,self.normalized_dim))
                if type(face1) == type(face2):
                    face_diff = cv2.absdiff(face2,face1)
                    # face_diff = cv2.absdiff(self.mean,face_diff)
                    face_diff = cv2.normalize(face_diff,None,0,255,cv2.NORM_MINMAX)
                    if face_diff is not None:
                        captured_diff = np.append(captured_diff,[face_diff],axis=0)
        
        # When everything done, release the video capture and video write objects
        v_cap.release()
        
        # Closes all the frames
        cv2.destroyAllWindows() 

        return captured_diff

    def extract_motion(self, filename, nframe = 42, num_motions = 1):
        captured_motions = np.empty(shape=(0,d,d,3), dtype=np.int8)
        hsv = np.empty(shape=(d,d,3), dtype=np.uint8)
        iterations = 0
        v_cap = cv2.VideoCapture(filename)
        v_length = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        while len(captured_motions) < num_motions and iterations < self.max_interations:
            iterations += 1
            v_cap.set(1, nframe)
            ret, img_base = v_cap.read()
            if ret == True:
                v_cap.set(1, nframe + self.nframesdiff)
                ret, img = v_cap.read()
                
            if ret == True:
                img_base = cv2.cvtColor(img_base, cv2.COLOR_BGR2RGB)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                face1 = cv2.resize(self.extract_face(img_base), (self.normalized_dim,self.normalized_dim))
                face2 = cv2.resize(self.extract_face(img), (self.normalized_dim,self.normalized_dim))
                
                if type(face1) == type(face2):
                    face1 = cv2.cvtColor(face1, cv2.COLOR_RGB2GRAY)
                    face2 = cv2.cvtColor(face2, cv2.COLOR_RGB2GRAY)
                    hsv[...,1] = 255

                    flow = cv2.calcOpticalFlowFarneback(face1, face2, None, 0.5, 3, 15, 3, 5, 1.2, 0)
                    mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
                    hsv[...,0] = ang*180/np.pi/2
                    hsv[...,2] = cv2.normalize(mag,None,0,255,cv2.NORM_MINMAX)
                    bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2RGB)
                    if bgr is not None:
                        captured_motions = np.append(captured_motions,[bgr],axis=0)
        
        # When everything done, release the video capture and video write objects
        v_cap.release()
        
        # Closes all the frames
        cv2.destroyAllWindows() 

        return captured_motions

    def extract_skin(self, image):
        imageYCrCb = cv2.cvtColor(image,cv2.COLOR_RGB2YCR_CB)
        skinRegionYCrCb = cv2.inRange(imageYCrCb,self.min_YCrCb,self.max_YCrCb)
        return (skinRegionYCrCb/255).sum(), cv2.bitwise_and(image, image, mask = skinRegionYCrCb)

    def extract_skin_hue(self, image):
        imagehsv = cv2.cvtColor(image,cv2.COLOR_RGB2HSV)
        skinRegionhsv = cv2.inRange(imagehsv,self.min_HSV,self.max_HSV)
        return cv2.bitwise_and(image, image, mask = skinRegionhsv)

    def extract(self, filename, output):
        nfaces = 0
        last_face_box = None
        v_cap = cv2.VideoCapture(filename)
        fourcc = cv2.VideoWriter_fourcc(*'MP4V')
        v_out = cv2.VideoWriter(output,fourcc, 30, (2*self.normalized_dim,self.normalized_dim))

        while True:
            ret, img = v_cap.read()
            if ret != True:
                break
            
            # extract face
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            face_box = self.extract_face_box(img)

            if face_box is None and last_face_box is not None:
                face_box = last_face_box
            
            if face_box is not None:
                z = np.zeros((self.normalized_dim,self.normalized_dim,3),dtype="uint8")
                (x1, y1, x2, y2) = face_box
                (x1, y1, x2, y2) = ((x1+x2)//2-self.w//2, (y1+y2)//2-self.h//2,
                                    (x1+x2)//2+self.w//2, (y1+y2)//2+self.h//2)
                (npixels,face) = self.extract_skin(img[y1:y2, x1:x2])
                # face = cv2.resize(face, (self.normalized_dim,self.normalized_dim))
                (u1,u2,u3) = face.shape
                z[0:u1,0:u2,0:u3] = face
                frame = cv2.cvtColor(z, cv2.COLOR_RGB2BGR)
                last_face_box = face_box
                nfaces += 1
            else:
                frame = np.zeros((self.normalized_dim,self.normalized_dim,3),dtype="uint8")

            fig = Figure(figsize=(5, 5), dpi=100)
            canvas = FigureCanvasAgg(fig)
            ax = fig.add_subplot(111)

            color = ('b','y','r')
            face_hsv = cv2.cvtColor(face,cv2.COLOR_RGB2HSV)
            for i,col in enumerate(color):
                hist = cv2.calcHist([face_hsv],[i],None,[256],[0,256])/npixels
                ax.plot(hist,color = col)
                ax.set_ylim([.0, .3])

            canvas.draw()
            s, (width, height) = canvas.print_to_buffer()
            im = Image.frombytes("RGBA", (width, height), s)
            fhist = cv2.cvtColor(np.array(im), cv2.COLOR_RGB2BGR)
            v_out.write(np.hstack([frame,fhist]))
                
        v_out.release()
        v_cap.release()
        
        # Closes all the frames
        cv2.destroyAllWindows() 
        print(f'faces = {nfaces}')


In [80]:
vp = VideoProcessor()

In [81]:
# REAL 2/hszwwswewp.mp4
# FAKE 2/szfiektjqw.mp4

# REAL tejfudfgpq.mp4
# FAKE imvbxbuhbp.mp4

# REAL 48/cdpnnebwfa.mp4
# FAKE 48/yyaicxrixg.mp4

# REAL 20/ftxqeraryj.mp4
# FAKE 20/kbjtcfcbys.mp4

# REAL 14/urloiqxdwi.mp4
# FAKE 14/ffztqatacr.mp4

# REAL 20/stqcnfwzrv.mp4
# FAKE 20/qelvshsgnv.mp4

# REAL 2/oobeaklccb.mp4
# FAKE 2/arcviozhqq.mp4

# REAL 14/xdxfmvnghz.mp4 (side)
# FAKE 14/xwymxohvup.mp4

In [93]:
video_filename = '../videos/dfdc_train_part_14/xdxfmvnghz.mp4'
output_filename = '../data/xdxfmvnghz_real.mp4'
vp.extract(video_filename,output_filename)

faces = 300


In [94]:
video_filename = '../videos/dfdc_train_part_14/xwymxohvup.mp4'
output_filename = '../data/xwymxohvup_fake.mp4'
vp.extract(video_filename,output_filename)

faces = 300
