## Preprocess and save input data

In [1]:
from preprocess import * 
import os
import glob
import numpy as np
from scipy.io import loadmat, savemat
import imutils
from imutils import face_utils
import dlib
import cv2
# import menpo
# import menpofit

import pickle
from pathlib import Path
import gzip
from tqdm.notebook import tqdm

data_dir = './data/lipread_20_mp4/'
modes = ['train', 'val', 'test']
 

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('./shape_predictor_68_face_landmarks.dat')

for mode in tqdm(modes): 
    
    # Data is organized: data_dir > WORD > MODE > WORD_ID.txt
    print(f'Collecting {mode} filenames...')
    data_files = [] 

    for word in os.listdir(data_dir):  

        for path in glob.glob(os.path.join(data_dir,word,mode,'*.txt')): 
            fname = os.path.splitext(os.path.basename(path))[0]
            data_files.append(fname) 
        
    print('Done collecting filenames!')   
    
    print(f'Starting extracting features from {mode} video...')  
    
    for indx, fname in enumerate(tqdm(data_files)): 

        label_vocab = fname.split('_')[0]

#         print(fname)

        vid_path = f'{data_dir}/{label_vocab}/{mode}/{fname}.mp4'
#         arr_save_path = f'{data_dir}/{label_vocab}/{mode}/{fname}.npy'
        arr_save_path = f'{data_dir}/{label_vocab}/{mode}/{fname}_mouth_frames.npy'
    
        if os.path.isfile(arr_save_path): 
            continue

        # features = dlib_features_from_vid_path(vid_path, detector, predictor, normalize=True)       
        mouth_frames = dlib_get_frames_mouth_only(vid_path, detector, predictor, normalize=True)
        
#         np.save(arr_save_path, features)
        np.save(arr_save_path, mouth_frames)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

Collecting train filenames...
Done collecting filenames!
Starting extracting features from train video...


HBox(children=(FloatProgress(value=0.0, max=19520.0), HTML(value='')))


Collecting val filenames...
Done collecting filenames!
Starting extracting features from val video...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


Collecting test filenames...
Done collecting filenames!
Starting extracting features from test video...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))





In [8]:
# Debugging cell 
data_dir = './data/lipread_20_mp4/'
mode = 'train'
label_vocab = 'ABOUT'
fname ='ABOUT_00001'

load_path = f'{data_dir}/{label_vocab}/{mode}/{fname}.npy'
features = np.load(load_path)

print(features.shape)

(29, 68, 2)


## Functions to extract facial features using supervised descent fitter

In [1]:
import menpo
import menpofit
import pickle
from pathlib import Path
import gzip

from menpodetect.dlib.detect import DlibDetector
import dlib

import cv2
import numpy as np

model_path = './keypoint_model.pkl.gz'

#load Bob model
with gzip.open(model_path) as f:
    bob_model = pickle.load(f)

#create face detector
ff_detector = DlibDetector(dlib.get_frontal_face_detector())

def features_from_vid_path(path, ff_detector, model):
    cap = cv2.VideoCapture(path)
    feature_vecs = []

    frame_ctr = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        feature_vecs.append(features_from_frame(frame, ff_detector, model))
    return feature_vecs
    
def features_from_frame(frame, ff_detector, model):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_img = menpo.image.Image.init_from_channels_at_back(frame)
    bboxes = ff_detector(frame_img)
    fr = model.fit_from_bb(frame_img, bboxes[0], max_iters=100)
    flat_vec = fr.final_shape.as_vector()
    #return normalize_vec(flat_vec, fr)
    return format_vec(flat_vec)

def format_vec(vec):
    x_pts, y_pts = vec[::2], vec[1::2]
    return np.array([[x, y] for x, y in zip(x_pts, y_pts)])
    
# Takes a flat vector and normalizes the x and y axes to fit in 0,1
def normalize_vec(vec, fr):
    x_pts, y_pts = vec[::2], vec[1::2]
    min_bounds, max_bounds = fr.final_shape.bounds()

    x_range = max_bounds[0] - min_bounds[0]
    y_range = max_bounds[1] - min_bounds[1]

    x_norm = (x_pts - min_bounds[0]) / x_range
    y_norm = (y_pts - min_bounds[1]) / y_range
    return np.array([[x, y] for x, y in zip(x_norm, y_norm)])

In [None]:
## Debugging cell 
lrs2_path = '/Volumes/T7/lrs2/'
features_from_vid_path(f'{lrs2_path}/mvlrs_v1/main/5535415699068794046/00001.mp4', ff_detector, bob_model)