In [3]:
import glob

import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
from random import shuffle
import imutils
import os
import time
import timeit
import itertools
import dlib

from imutils import face_utils

from sklearn.metrics import classification_report, confusion_matrix

from keras.models import Sequential
from keras.layers import Dense, Convolution3D, ZeroPadding3D, Activation, MaxPooling3D, Flatten, Dropout, BatchNormalization
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, CSVLogger, ReduceLROnPlateau
from keras.optimizers import SGD, rmsprop, Adam
from keras.models import model_from_json
from keras.utils import to_categorical, plot_model

np.random.seed(7)
np.set_printoptions(formatter={'float': '{: 0.3f}'.format})

# sizes of mouth region -> input shape
IMG_ROWS = 35
IMG_COLS = 50
VID_DEPTH = 28


# initialize dlib's face detector (HOG-based) and then create
# the facial landmark predictor
p = "shape_predictor_68_face_landmarks.dat"
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(p)


# selected subset of words for training
selected = ["AGREEMENT", "BENEFIT", "CONSERVATIVE", "CUSTOMERS", "EXPECTED"]
print(len(selected))


model_name = "model"
models_dir = 'models/'


def load_model(model_path, model_weights_path):

    json_file = open(model_path, 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)

    # load weights into new model
    loaded_model.load_weights(model_weights_path)
    print("Loaded Model from disk")

    # compile and evaluate loaded model
    loaded_model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
    print("Loaded Model Weights from disk")

    return loaded_model


model = load_model(models_dir + model_name + '.json', models_dir + model_name + '.h5')


def convert_video_to_3d_array(video):
    cap = cv2.VideoCapture(video)

    cnt = 0
   
    lipsFrames = []
        
    current_video_array = None

    while cap.isOpened():
        # Capture frame-by-frame
        ret, frame = cap.read()
        
        if ret == True:
        
            # convert frame to grayscale
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            
            # detect faces in the grayscale image
            rects = detector(gray, 0)
        
            # loop over the face detections
            for (i, rect) in enumerate(rects):
                # determine the facial landmarks for the face region, then
                # convert the facial landmark (x, y)-coordinates to a NumPy
                # array
                global shape
                shape = predictor(gray, rect)
                shape = face_utils.shape_to_np(shape)
            
            # crop_img = frame[top:bottom, left:right]
            crop = gray[ shape[14][1]:shape[11][1], shape[6][0]:shape[10][0]]
            crop = cv2.resize(crop, (IMG_COLS,IMG_ROWS))
            #cv2.imwrite("araba"+str(count)+".jpg", crop)     # save frame as JPG file
        
            lips_resized = cv2.resize(crop, (IMG_COLS, IMG_ROWS), interpolation=cv2.INTER_AREA)
            lipsFrames.append(lips_resized)
                
                    
        # if video is empty, skip the sample
        else:
            break
        
        cnt += 1
        if cnt > VID_DEPTH:
            current_video_array = np.array(lipsFrames, dtype="uint8")
            break
            
    cap.release()
    return current_video_array


np_frame_array = convert_video_to_3d_array('predict_word.mp4')
np.save('c:\odev\predict_word.npy', np_frame_array)


sample = np.load('c:\odev\predict_word.npy')
sample = (sample.astype("float16") - 128) / 128  # normalize to 0 - 1

sample = np.array(sample)
sample = np.expand_dims(sample, axis=0)
sample = sample.reshape(sample.shape + (1,))



prediction = model.predict(sample)
print(prediction)

prediction_class = np.argmax(prediction, axis=1)
print(prediction_class)

print(selected[prediction_class[0]])
print(prediction_class[0])


5
Loaded Model from disk
Loaded Model Weights from disk
[[ 0.001  0.993  0.000  0.000  0.006]]
[1]
BENEFIT
1
