# DeepFakes 101: Facial Swap (1) - Data Preprocessing

This script aims to generate a clean training data for deepfake generation by extracting facial data of training subjects via dlib's face_reocgnition. Based on the implementation by Ovalery16, in turn based on the original implementation by ShoanLu.

Main improvements were focused on compatibility with Google Colaboratory environment as a stand-alone module.


This notebook utilizes a Google Drive download to access auxilliary supporting image processing scripts. However, this could be replaced with simple git clone command.


1. Load dependencies

In [1]:
#Download auxilliary components first


!gdown https://drive.google.com/uc?id=1O0jrWmtAoSN-W8AwmO0GrqhcYPzkZB2Y
  
!unzip deepfake_aux.zip

Downloading...
From: https://drive.google.com/uc?id=1O0jrWmtAoSN-W8AwmO0GrqhcYPzkZB2Y
To: /content/deepfake_aux.zip
7.60MB [00:00, 13.5MB/s]
Archive:  deepfake_aux.zip
   creating: data/
  inflating: data/bond.jpg           
  inflating: data/Casino-Royale-Eva-Green-Daniel-Craig.jpg  
  inflating: data/Casino-Royale-Three-Piece-1000x524.jpg  
  inflating: data/Casino-Royale.jpg  
  inflating: data/casino_royale_movie_image_james_bond__1_.jpg  
  inflating: data/CR_2012.jpg        
  inflating: data/daniel-craig-casino-royale.jpg  
  inflating: data/james-bond-casino-royale.jpg  
  inflating: data/skyfall.jpg        
   creating: filter/
  inflating: filter/000001.jpg       
   creating: image/
  inflating: image/bond.jpg          
  inflating: image/bond_v2.jpg       
  inflating: image/Casino-Royale-Eva-Green-Daniel-Craig.jpg  
  inflating: image/Casino-Royale-Eva-Green-Daniel-Craig_v2.jpg  
  inflating: image/Casino-Royale.jpg  
  inflating: image/casino_royale_movie_image_james_bond

In [2]:
#Install dependencies if missing

!pip install face_recognition

!pip install scandir
!pip install h5py
!pip install opencv-python
!pip install scikit-image
!pip install dlib
!pip install tqdm



Collecting face_recognition
  Downloading https://files.pythonhosted.org/packages/3f/ed/ad9a28042f373d4633fc8b49109b623597d6f193d3bbbef7780a5ee8eef2/face_recognition-1.2.3-py2.py3-none-any.whl
Collecting face-recognition-models>=0.3.0 (from face_recognition)
[?25l  Downloading https://files.pythonhosted.org/packages/cf/3b/4fd8c534f6c0d1b80ce0973d01331525538045084c73c153ee6df20224cf/face_recognition_models-0.3.0.tar.gz (100.1MB)
[K     |████████████████████████████████| 100.2MB 297kB/s 
Building wheels for collected packages: face-recognition-models
  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone
  Stored in directory: /root/.cache/pip/wheels/d2/99/18/59c6c8f01e39810415c0e63f5bede7d83dfb0ffc039865465f
Successfully built face-recognition-models
Installing collected packages: face-recognition-models, face-recognition
Successfully installed face-recognition-1.2.3 face-recognition-models-0.3.0
Collecting scandir
  Downloading https://files.pythonhosted.org/pac

In [72]:
import cv2
from pathlib import Path
import face_recognition
from lib_1.PluginLoader import PluginLoader
from lib_1.faces_detect import detect_faces
from lib_1.FaceFilter import FaceFilter
import os
!mkdir extracted
from os import path

mkdir: cannot create directory ‘extracted’: File exists


2. Define directories

In [0]:
input_directory="../content/data/"



output_directory="../content/extracted/"

Define extraction functions

In [0]:
def load_filter():
    filter_file = '../content/filter/000001.jpg' # TODO Pass as argument
    if os.path.exists(filter_file):
        print('Loading reference image for filtering')
        return FaceFilter(filter_file)
    else:
        print("Filter not detected")

def get_faces(image):
    faces_count = 0
    filterDeepFake = load_filter()
    
    for face in detect_faces(image):
        
        if filterDeepFake is not None and not filterDeepFake.check(face):
            print('Skipping not recognized face!')
            continue
        

        yield faces_count, face


In [46]:
os.listdir(input_directory)

['Casino-Royale-Three-Piece-1000x524.jpg',
 'james-bond-casino-royale.jpg',
 'bond.jpg',
 'skyfall.jpg',
 'casino_royale_movie_image_james_bond__1_.jpg',
 'CR_2012.jpg',
 'Casino-Royale-Eva-Green-Daniel-Craig.jpg',
 'daniel-craig-casino-royale.jpg',
 'Casino-Royale.jpg']

We list the image in the input directory and we extract the faces in each of them

In [78]:
files = [i for i in os.listdir(input_directory)]
         
         
from matplotlib import pyplot as plt
from google.colab.patches import cv2_imshow

extractor_name = "Align" # TODO Pass as argument
extractor = PluginLoader.get_extractor(extractor_name)()

"""

#Single Example test

example  ="../content/data/CR_2012.jpg"


image = cv2.imread(example)

for idx, face in get_faces(image):
           resized_image = extractor.extract(image, face, 256)
           output_file = output_directory+"/"+str(Path(example).stem)
           cv2.imwrite(str(output_file) + str(idx) + Path(example).suffix, resized_image)
"""
#Simply iterating over the folder is insufficient, imread needs paths, so create them into a list.

def find_all_files(directory):
    for root, dirs, files in os.walk(directory):

        for file in files:
            p=os.path.join(root, file)
            p=p.split("/")[len(p.split("/"))-2]
            name, ext = os.path.splitext(p)

            yield os.path.join(root, file)
folder_img = find_all_files(input_directory)

try:
    for filename in folder_img:
        #print(file)
        #filename = Path(input_directory+file)
        
        
        
        image = cv2.imread(filename)
        
        
        
        for idx, face in get_faces(image):
            resized_image = extractor.extract(image, face, 256)
            output_file = output_directory+"/"+str(Path(filename).stem)
            cv2.imwrite(str(output_file) + str(idx) + Path(filename).suffix, resized_image)

except Exception as e:
    print('Failed to extract from image: {}. Reason: {}'.format(filename, e))
    
   
   

Loading Extract from Extract_Align plugin...
Loading reference image for filtering
-----
check
[0.54722875]
Loading reference image for filtering
-----
check
[0.50327047]
check
[0.72714366]
Skipping not recognized face!
check
[0.90924023]
Skipping not recognized face!
Loading reference image for filtering
-----
check
[0.43341391]
Loading reference image for filtering
-----
check
[0.58806588]
check
[0.65114009]
Skipping not recognized face!
Loading reference image for filtering
-----
check
[0.945535]
Skipping not recognized face!
check
[0.4803462]
check
[0.97828736]
Skipping not recognized face!
Loading reference image for filtering
-----
check
[0.57495226]
Loading reference image for filtering
-----
check
[0.5175336]
Loading reference image for filtering
-----
check
[0.45176478]
check
[0.77825574]
Skipping not recognized face!
Loading reference image for filtering
-----
check
[0.48051887]
check
[0.74737404]
Skipping not recognized face!


In [79]:
# Zip up results for use later

!zip -r extracted.zip extracted

  adding: extracted/ (stored 0%)
  adding: extracted/Casino-Royale0.jpg (deflated 1%)
  adding: extracted/daniel-craig-casino-royale0.jpg (deflated 1%)
  adding: extracted/Casino-Royale-Three-Piece-1000x5240.jpg (deflated 1%)
  adding: extracted/Casino-Royale-Eva-Green-Daniel-Craig0.jpg (deflated 1%)
  adding: extracted/james-bond-casino-royale0.jpg (deflated 1%)
  adding: extracted/casino_royale_movie_image_james_bond__1_0.jpg (deflated 1%)
  adding: extracted/CR_20120.jpg (deflated 1%)
  adding: extracted/skyfall0.jpg (deflated 1%)
  adding: extracted/.ipynb_checkpoints/ (stored 0%)
  adding: extracted/bond0.jpg (deflated 1%)
