### Detect key points on facial images from a captured video. Implemented in Keras - [article](https://towardsdatascience.com/facial-keypoints-detection-deep-learning-737547f73515), [code](https://github.com/acl21/Selfie_Filters_OpenCV), [article](http://danielnouri.org/notes/2014/12/17/using-convolutional-neural-nets-to-detect-facial-keypoints-tutorial/)

**Todos**
*   Image augmentation
*   Dropout
*   Batchnorm
*   Resnet transfer learning
*   Use the data rows which were discarded in dropna. Dp specialists as in Daniel Nouri tutorial
*   Save subset data for use in data_lib
*   One cycle? parameter groups?
*   Move keypt and video utils 






### Imports

In [0]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [0]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [0]:
import IPython.core.debugger as db
from pathlib import Path
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import math

from sklearn.utils import shuffle
from keras.models import Sequential
#from keras.models import load_model
from keras.layers import Convolution2D, MaxPooling2D, Dropout
from keras.layers import Flatten, Dense
#from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam

### Fetch Data from Kaggle

In [0]:
# Run this cell and select the kaggle.json file downloaded
# from the Kaggle account settings page.
from google.colab import files
files.upload()

In [0]:
# Let's make sure the kaggle.json file is present.
!ls -lha kaggle.json

In [0]:
# Next, install the Kaggle API client after forcing an upgrade
!pip uninstall -y kaggle
!pip install --upgrade pip
!pip install kaggle==1.5.6
!kaggle -v

# Reason for doing a force-upgrade. The underlying problem: Colab installs both py2 and py3 
# packages, and (for historical reasons) the py2 packages are installed second. kaggle is a 
# wrapper installed by the kaggle python package; since we do py2 second, the py2 wrapper 
# is in /usr/local/bin, and happens to be an older version.

In [0]:
# The Kaggle API client expects this file to be in ~/.kaggle,
# so move it there.
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

# This permissions change avoids a warning on Kaggle tool startup.
!chmod 600 ~/.kaggle/kaggle.json

In [0]:
# List available datasets.
!kaggle competitions list

In [0]:
# First, you have to login to Kaggle, go to that competition's page, navigate to 
# the Rules tab and accept the terms and conditions. Unless you do that, you will get
# a 403-Forbidden error when you run the command below

# Copy the carvana data set locally.
!kaggle competitions download -c facial-keypoints-detection

### Pre-process Kaggle data

In [0]:
!zipinfo facial-keypoints-detection.zip

!unzip facial-keypoints-detection.zip -d facial >> /dev/null
!unzip "facial/*.zip" -d facial >> /dev/null
!ls -l facial

### Define Data File Paths

In [0]:
root_path = Path.cwd()
data_path = root_path/'facial'
data_path.mkdir(exist_ok=True)

metadata_file_path = data_path/'IdLookupTable.csv'
training_imgs_file_path = data_path/'training.csv'
test_imgs_file_path = data_path/'test.csv'

list(data_path.iterdir())

### Explore Data

In [0]:
meta_df = pd.read_csv(metadata_file_path)
meta_df.head()

imgs_df = pd.read_csv(training_imgs_file_path)
imgs_df.columns

lr_eye_pts = list(imgs_df.columns[:4])
img_data = [imgs_df.columns[-1]]
imgs_df[lr_eye_pts + img_data]

In [0]:
test_df = pd.read_csv(test_imgs_file_path)
test_df.columns
test_df

### Build Architecture

In [0]:
#----------------------------------------------------
# Create the Unet architecture
#----------------------------------------------------
class ArchFacialKeypoints():
  def __init__(self):
    self.model = None

  # ----------------------------
  # ----------------------------
  def unet_model(self):
    # 
    self.model = None

  # ----------------------------
  # ----------------------------
  def compile_model(self, optimizer, loss, metrics):
    self.model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

  # ----------------------------
  # Returns a History object. History.history attribute is a record of training loss values and metrics
  # values at successive epochs, as well as validation loss values and validation metrics values (if applicable).
  # ----------------------------
  def train_model(self, X_train, y_train, num_epochs):
    return self.model.fit(X_train, y_train, epochs=num_epochs, batch_size=200, verbose=1, validation_split=0.2)

  # ----------------------------
  # Create a trivial model
  # Accept a 96x96 grayscale image as input, and output points with 30 entries,
  # for the predicted (horizontal and vertical) locations of 15 facial keypoints.
  # ----------------------------
  def create_trivial_model(self):
    model = Sequential()

    model.add(Convolution2D(32, (5, 5), input_shape=(96, 96, 1), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    self._conv_block(model, filters=64, drop_p=0.1)
    self._conv_block(model, filters=128, drop_p=0.2)
    self._conv_block(model, filters=30, drop_p=0.3)

    model.add(Flatten())

    model.add(Dense(64, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(30))

    self.model = model

  def _conv_block(self, model, filters, drop_p):
    model.add(Convolution2D(filters, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(drop_p))


### Define Data Preparation class

In [0]:
class DataPrep():
  def __init__(self, data_file, test=False):
    self.data_file = data_file
    self.test = test
  
  def prep(self):
    data_df = self._load(self.data_file)
    np_img, np_pts = self._extract(data_df)
    np_img, np_pts = self._scale(np_img, np_pts)
    self.np_img, self.np_pts = np_img, np_pts

  def _load(self, data_file):
    data_df = pd.read_csv(data_file)
    data_df = data_df.dropna()  # drop all rows that have missing values in them
    return data_df

  def _extract(self, data_df):
    img_df = data_df['Image']
    img_df = img_df.apply(lambda im_txt: np.fromstring(im_txt, sep = ' '))
    np_img = np.stack(img_df.values, axis=0)
    img_w = int(math.sqrt(np_img.shape[1]))
    np_img = np_img.reshape(np_img.shape[0], img_w, -1)

    np_pts = None
    if (not self.test):
      pts_cols = data_df.columns[:-1]
      np_pts = data_df[pts_cols].values

    return np_img, np_pts

  def _scale(self, np_img, np_pts):
    # scale pixel values to [0, 1]
    np_img = np_img / 255.

    if (not self.test):
      # scale target coordinates to [-1, 1]
      np_pts = (np_pts - 48) / 48
    else:
      np_pts = None

    return np_img, np_pts

  def _unscale(self, np_img, np_pts):
    # reverse the scale before display
    np_img = np_img * 255.
    np_pts = (np_pts * 48.) + 48.
    return np_img, np_pts 

  def _show_img_pts(self, ax, img, pts_x, pts_y):
    ax.axis('off')
    ax.imshow(img)
    ax.scatter(pts_x, pts_y, color='red')

  def _show_grid(self, disp_img, disp_pts):
    num_imgs = disp_img.shape[0]
    num_cols = 10
    num_rows = int (math.ceil (num_imgs / num_cols))
    figsize=(num_cols * 3, num_rows * 3)
    fig,axes = plt.subplots(num_rows, num_cols, figsize=figsize)

    for img, pts, ax in zip (disp_img, disp_pts, axes.flat):
      pts_x, pts_y= pts.T
      self._show_img_pts(ax, img, pts_x, pts_y)

    for i in range(num_imgs, len(axes.flat)): axes.flat[i].set_visible(False)

  def display(self, np_img=None, np_pts=None, idxs=None):
    np_img = np_img if (np_img is not None) else self.np_img
    np_pts = np_pts if (np_pts is not None) else self.np_pts
    if (idxs is None):
      num_batch = 16
      idxs = list(range(num_batch))

    disp_img = np_img[idxs]
    disp_pts = np_pts[idxs]
    disp_img, disp_pts = self._unscale(disp_img, disp_pts)
    disp_pts = disp_pts.reshape(disp_pts.shape[0], -1, 2)
    self._show_grid(disp_img, disp_pts)


### Define Facial Keypoint application class 

In [0]:
#----------------------------------------------------
# Facial Keypoints Application
#----------------------------------------------------
class AppFacialKeypoints():

  def __init__(self):
    self._arch = None
    self.db = None

  # ----------------------------
  # Pre-process images by reducing the image size to a manageable size
  # ----------------------------
  def pre_process_data(self):
    pass

  # ----------------------------
  # Load the data using the Data Prep class
  # ----------------------------
  def load_data(self, data_file):
    self.db = DataPrep(data_file)
    self.db.prep()

  # ----------------------------
  # Create the architecture
  # ----------------------------
  def create_arch(self):
    self._arch = ArchFacialKeypoints()
    self._arch.unet_model()
    return self._arch

  # ----------------------------
  # Create a simplified architecture with a very basic upsampling decoder
  # ----------------------------
  def create_trivial_arch(self):
    self._arch = ArchFacialKeypoints()
    self._arch.create_trivial_model()
    return self._arch

  # ----------------------------
  # Train the model
  # ----------------------------
  def run_train(self, num_epochs=1):
    # Load training set
    X_train, y_train = self.db.np_img, self.db.np_pts
    X_train, y_train = shuffle(X_train, y_train, random_state=42)  # shuffle train data
    X_train = X_train.reshape(-1, 96, 96, 1) # reshape each image as 96 x 96 x 1

    self._arch.compile_model(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['accuracy'])
    hist = self._arch.train_model(X_train, y_train, num_epochs)

  # ----------------------------
  # ----------------------------
  def run_predict(self, test_file):
    test_prep = DataPrep(test_file, test=True)
    test_prep.prep()

    # Predict the keypoints using the model
    test_imgs = test_prep.np_img[:2]
    test_imgs = test_imgs.reshape(-1, 96, 96, 1)
    test_pts = self._arch.model.predict(test_imgs)
    return test_imgs, test_pts

  def OLD_run_predict(self, test_file):
    valid_dl = self.db.valid_dl
    self._arch.model.eval()
    device = list(self._arch.model.parameters())[0].device

    inps, outs, targs = [], [], []
    with torch.no_grad():
      for _, (xb, yb) in enumerate(valid_dl):
        xb = xb.to(device)
        yhat = self._arch.model(xb)

        for x, y, p in zip (xb, yb, yhat):
          inps.append(x.cpu())
          outs.append(p.cpu())
          targs.append(y.cpu())
    return inps, outs, targs

### Load Data

In [0]:
fk_app = AppFacialKeypoints()
fk_app.load_data(training_imgs_file_path)
fk_app.db.np_img.shape, fk_app.db.np_pts.shape, fk_app.db.np_img.dtype, fk_app.db.np_pts.dtype
fk_app.db.display()

### Test run the model

In [0]:
fk_app.create_trivial_arch()
#fk_app._arch.model
fk_app.run_train(num_epochs=1)

In [0]:
fk_app.run_train(num_epochs=70)

In [0]:
test_imgs, test_pts = fk_app.run_predict(test_imgs_file_path)
test_imgs.shape, test_pts.shape

In [0]:
img_i = 0
foo_img = test_imgs[img_i].reshape(96, 96)
foo_img = foo_img * 255.
foo_pts = test_pts[img_i].reshape(-1, 2)
foo_pts = (foo_pts * 48.) + 48.
foopx, foopy = foo_pts.T
#foop[img_i], foo_pts, foopx, foopy
simg(foo_img, foopx, foopy)

### Video processing utilities

In [0]:
import cv2

#----------------------------------------------------
# Video Download and Display utility
#----------------------------------------------------
class ShowVid():
  def __init__(self, video_file=None):
    self.youtube_id = None
    self.video_file = video_file

  #----------------------------------------------------
  # Get stats on a Youtube video
  #----------------------------------------------------
  def _youtube_info(self, youtube):
    # get video information
    print (f'ID: {youtube.video_id}, Title: {youtube.title}, Length: {youtube.length}\n')

    # Get video stream formats
    stream = youtube.streams.all()
    print ('Streams:\n')
    for i in stream:
      print(i)

    # Thumbnail image URL
    return youtube.thumbnail_url

  #----------------------------------------------------
  # Download a video from Youtube to a local file using Pytube library
  #----------------------------------------------------
  def youtube_download(self, video_id, download_dir, video_stem):
    # Install pytube
    !pip install pytube3
    import pytube

    # Get Pytube's youtube object for the video
    video_url = f'https://www.youtube.com/watch?v={video_id}'
    youtube = pytube.YouTube(video_url)
    
    # Show some stats about the video
    self._youtube_info(youtube)

    # Choose the first stream format (format contains mime_type, resolution, fps, vcodec, acodec)
    video = youtube.streams.first()

    # Download the video
    self.video_file = video.download(download_dir, video_stem)
    self.youtube_id = video_id

    return self.video_file

  #----------------------------------------------------
  # Extract just the first few 'duration' seconds of a video
  #----------------------------------------------------
  def extract_short(self, short_file, duration):
    assert (self.video_file is not None)
    !ffmpeg -y -loglevel info -i {self.video_file} -t {duration} {short_file}
    self.video_file = short_file

  #----------------------------------------------------
  # Display a locally downloaded video file
  #----------------------------------------------------
  def show_mp4(self, width=640, height=480):
    import io
    import base64
    from IPython.display import HTML

    assert (self.video_file is not None)
    video_file = self.video_file

    video_encoded = base64.b64encode(io.open(video_file, 'rb').read())
    return HTML(data='''<video width="{0}" height="{1}" alt="test" controls>
                          <source src="data:video/mp4;base64,{2}" type="video/mp4" />
                        </video>'''.format(width, height, video_encoded.decode('ascii')))
    
  #----------------------------------------------------
  # Display a video on Youtube
  #----------------------------------------------------
  def show_youtube_video(self, youtube_id):
    from IPython.display import YouTubeVideo
    YouTubeVideo(youtube_id)

  #----------------------------------------------------
  # Read a video frame-by-frame and call a callback function to process
  # each frame. Stop when we have completed the given number of frames.
  # Write an output video file if required.
  #----------------------------------------------------
  def process_video(self, process_cb, num_frames, vid_out_name=None):
    assert (self.video_file is not None)
    video_file = self.video_file

    i_frames = 0
    vid = cv2.VideoCapture(str(video_file))

    # Get video width, height and frames per second
    width, height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = vid.get(cv2.CAP_PROP_FPS)
    print (f'Video size {width}x{height} at {fps} fps')

    if (vid_out_name is not None):
      # Write an AVI file and convert to MP4 later
      vid_out_avi = f'{vid_out_name}.avi'
      vid_out = cv2.VideoWriter(vid_out_avi, cv2.VideoWriter_fourcc(*'MJPG'), fps, (width, height))

    while(vid.isOpened()):
      ret, frame = vid.read()
      
      # Bail out when the video file ends
        if not ret:
            break

      frame_out = process_cb(frame)

      if (vid_out_name is not None):
        vid_out.write(frame_out)

      i_frames += 1
      if (i_frames > num_frames):
        vid.release()
        if (vid_out_name is not None):
          vid_out.release()
          # convert AVI to MP4
          vid_out_mp4 = f'{vid_out_name}.mp4'
          !ffmpeg -y -loglevel info -i {vid_out_avi} {vid_out_mp4}
        break

### Download Facial Video. Define Video IDs, File Paths

In [0]:
root_path = Path.cwd()
video_dir = root_path/'videos'
video_dir.mkdir(exist_ok=True)

video_stem = 'myvideo'
short_file = video_dir/'short.mp4'

obama_youtube_id = '9V7yi2Q8mJo'
tom_jerry_youtube_id = 'tXOIvjbNhts'

In [0]:
facial_video_id = obama_youtube_id
facial_video_dir = video_dir
facial_stem = video_stem
facial_short = short_file

#sv.show_youtube_video(facial_video_id)

In [0]:
sv = ShowVid()

facial_video = sv.youtube_download(facial_video_id, facial_video_dir, facial_stem)
sv.extract_short(facial_short, duration=10)

In [0]:
sv.show_mp4(width=480, height=360)

In [0]:
# Show a local file (not downloaded from youtube)
sv_tj = ShowVid(video_dir/'tj.mp4')
sv_tj.show_mp4(width=480, height=360)

### Video facial keypoints

In [0]:
!wget 'https://raw.githubusercontent.com/acl21/Selfie_Filters_OpenCV/master/cascades/haarcascade_frontalface_default.xml' -P facial
face_cascade = 'facial/haarcascade_frontalface_default.xml'
from functools import partial
import matplotlib.patches as patches


In [0]:
import matplotlib.patches as patches
def simg(img, pts_x=None, pts_y=None, bbox=None, figsize=(20, 5)):
    _, ax = plt.subplots(1, 1, figsize=figsize)
    ax.axis('off')
    ax.imshow(img)
    if (pts_x is not None):
      ax.scatter(pts_x, pts_y, color='red')
    if (bbox is not None):
      # Create a Rectangle patch
      x, y, w, h = bbox
      rect = patches.Rectangle((x,y),w,h, linewidth=1, edgecolor='r',facecolor='none')

      # Add the patch to the Axes
      ax.add_patch(rect)

In [0]:
def add_keypts(frame, face_coords, resized_face_pts):
  face_x, face_y, face_w, face_h = face_coords

  face_img = frame[face_y: face_y + face_h, face_x: face_x + face_w]
  resized_face_img = cv2.resize(face_img, (96, 96), interpolation = cv2.INTER_AREA)

  resized_face_pts = resized_face_pts.reshape(-1, 2)
  for pt_x, pt_y in resized_face_pts:
    green = (0,255,0)
    cv2.circle(resized_face_img, (pt_x, pt_y), 1, green, 1)

  resized_face_img = cv2.resize(resized_face_img, (face_w, face_h), interpolation = cv2.INTER_CUBIC)
  frame[face_y:face_y + face_h, face_x:face_x + face_w] = resized_face_img
  return (frame)

def myfunc(frame, cascade):
  # Detect faces using the haar cascade object
  gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
  faces = cascade.detectMultiScale(gray, 1.25, 6)

  for (face_x, face_y, face_w, face_h) in faces:
    gray_face = gray[face_y: face_y + face_h, face_x: face_x + face_w]
        
    # Normalize to match the input format of the model - Range of pixel to [0, 1]
    gray_normalized = gray_face / 255

    # Resize it to 96x96 to match the input format of the model
    gray_resized = cv2.resize(gray_normalized, (96, 96), interpolation = cv2.INTER_AREA)
    gray_resized = gray_resized.reshape(1, 96, 96, 1)

    # Predict the keypoints using the model
    keypoints = fk_app._arch.model.predict(gray_resized)

    # De-Normalize the keypoints values
    keypoints = keypoints * 48 + 48

    frame = add_keypts(frame, (face_x, face_y, face_w, face_h), keypoints)

  return frame

# Face cascade to detect faces
face_cascade = cv2.CascadeClassifier('facial/haarcascade_frontalface_default.xml')
facial_func = partial(myfunc, cascade=face_cascade)
svf = ShowVid(facial_short)
svf.process_video(facial_func, 250, 'outvid')

In [0]:
svo = ShowVid('outvid.mp4')
svo.show_mp4(width=720, height=720)

### Temp

In [0]:
fk_app.db.display()

In [0]:
def simg(img, pts_x, pts_y):
    _, ax = plt.subplots(1, 1)
    ax.axis('off')
    ax.imshow(img)
    ax.scatter(pts_x, pts_y, color='red')

img_i = 3
foo_pts = foop[img_i].reshape(-1, 2)
foopx, foopy = foo_pts.T
#foop[img_i], foo_pts, foopx, foopy
simg(foon[img_i], foopx, foopy)