# Data Preparation

- Pix2Pix GAN is a pair-wise setup where we need to provide a pair for input/output sample for training data-point
- For current Deep-Fakes pipeline, out training pair would have facial landmarks as input and video frame as output
- We will make use of ``opencv`` and ``dlib`` for handling video frames and getting facial landmarks respectively

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/raghavbali/deep_fakes_tutorial/blob/main/notebooks/hands_on_2/01_data_prep.ipynb)

In [None]:
import os
import cv2
import dlib
import numpy as np
from imutils import video

In [None]:
DOWNSAMPLE_RATIO = 4

In [None]:
def reshape_array(array):
    return np.array(array, np.int32).reshape((-1, 1, 2))


def prepare_data(video_file_path, detector, predictor, num_samples=400, downsample_ratio = DOWNSAMPLE_RATIO):
    """
    Utility to prepare data for pix2pix based deepfake.
    Output is a set of directories with original frames 
    and their corresponding facial landmarks
    Parameters:
        video_file_path : path to video to be analysed
        num_samples : number of frames/samples to be extracted
    """

    # create output directories
    os.makedirs('original', exist_ok=True)
    os.makedirs('landmarks', exist_ok=True)

    # get video capture object
    cap = cv2.VideoCapture(video_file_path)
    fps = video.FPS().start()

    # iterate through video frame by fame
    count = 0
    while cap.isOpened():
        ret, frame = cap.read()

        # resize frame
        frame_resize = cv2.resize(frame, 
                                  None, 
                                  fx=1 / downsample_ratio, 
                                  fy=1 / downsample_ratio)
        
        # gray scale 
        gray = cv2.cvtColor(frame_resize, cv2.COLOR_BGR2GRAY)

        # detect face
        faces = detector(gray, 1)

        # black background
        black_image = np.zeros(frame.shape, np.uint8)

        # Proceed only if face is detected
        if len(faces) == 1:
            for face in faces:
                # get landmarks
                detected_landmarks = predictor(gray, face).parts()
                landmarks = [[p.x * downsample_ratio, p.y * downsample_ratio] for p in detected_landmarks]

                # get landmark features
                jaw = reshape_array(landmarks[0:17])
                left_eyebrow = reshape_array(landmarks[22:27])
                right_eyebrow = reshape_array(landmarks[17:22])
                nose_bridge = reshape_array(landmarks[27:31])
                lower_nose = reshape_array(landmarks[30:35])
                left_eye = reshape_array(landmarks[42:48])
                right_eye = reshape_array(landmarks[36:42])
                outer_lip = reshape_array(landmarks[48:60])
                inner_lip = reshape_array(landmarks[60:68])

                # plot landmarks
                color = (255, 255, 255)
                thickness = 3

                cv2.polylines(black_image, [jaw], False, color, thickness)
                cv2.polylines(black_image, [left_eyebrow], False, color, thickness)
                cv2.polylines(black_image, [right_eyebrow], False, color, thickness)
                cv2.polylines(black_image, [nose_bridge], False, color, thickness)
                cv2.polylines(black_image, [lower_nose], True, color, thickness)
                cv2.polylines(black_image, [left_eye], True, color, thickness)
                cv2.polylines(black_image, [right_eye], True, color, thickness)
                cv2.polylines(black_image, [outer_lip], True, color, thickness)
                cv2.polylines(black_image, [inner_lip], True, color, thickness)

            # Display the resulting frame
            count += 1
            cv2.imwrite("original/{}.png".format(count), frame)
            cv2.imwrite("landmarks/{}.png".format(count), black_image)
            fps.update()

            # stop after num_samples
            if count == num_samples:  
                break
            elif cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            print("No face detected")

    fps.stop()
    print('Total time: {:.2f}'.format(fps.elapsed()))
    print('Approx. FPS: {:.2f}'.format(fps.fps()))

    cap.release()
    cv2.destroyAllWindows()

In [None]:
# get landmarks model if not already available
!wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
!bunzip2 "shape_predictor_68_face_landmarks.dat.bz2"

In [None]:
# instantiate objects for face and landmark detection
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')

In [None]:
# prepare data
prepare_data('obama.mp4', 
             detector, 
             predictor,
             num_samples=400, 
             downsample_ratio = DOWNSAMPLE_RATIO)

In [None]:
# zip landmarks
!zip landmarks.zip landmarks/*.*

In [None]:
# zip original fames
!zip original.zip original/*.*