# Test np model

> See if a numpy model, using state trained with fastai/numpy is any good.

In [1]:
from expoco.core import *
from expoco.viseme_tabular.data import *
from expoco.viseme_tabular.model import *
import ipywidgets as widgets
import numpy as np
from pathlib import Path
import cv2, time, math, json, shutil
import win32api, win32con
import mediapipe as mp
mp_face_mesh = mp.solutions.face_mesh

# Load model and data

In [2]:
# model_path = Path('test/data/viseme_tabular_dataset_20211116_113131/processed_20211117_200746/model_20211117_201151')
# relative_landmark_id=FaceLandmarks.tip_of_nose
# model = load_tabular_model(model_path)

In [3]:
# model_path = Path('data/viseme_dataset_20211115_144008/processed_20211115_144617/model_20211115_144950')
model_path = Path('data/viseme_tabular_dataset_20211130_163506/processed_20211130_171906/model_20211130_172300')
relative_landmark_id=FaceLandmarks.tip_of_nose
# model_path = Path('data/viseme_dataset_20211116_113131/processed_20211117_200746/model_20211117_201151')
# relative_landmark_id=None # tried model without making points relative - didn't really work
model = load_tabular_model(model_path)

# How long will inference take

worst case would be running inference one sample at a time ...

In [4]:
input_size = model.modules[0].weight.shape[0]

In [5]:
one_row_x, one_row_y = np.random.random([1,input_size]), np.random.randint([1,1])

In [6]:
start_time, how_many = time.time(), 1000
for i in range(how_many):
    model(one_row_x)
print(how_many, 'calls with', one_row_x.shape[0], 'rows took', time.time()-start_time, 'seconds')

1000 calls with 1 rows took 0.2810027599334717 seconds


&uarr; easily quick enough to not worry about.

out of interest, how much quicker would processing 2 samples at a time be?

In [7]:
two_rows_x, two_rows_y = np.random.random([2,input_size]), np.random.randint([2,1])

In [8]:
start_time, how_many = time.time(), 500
for i in range(how_many):
    model(two_rows_x)
print(how_many, 'calls with', two_rows_x.shape[0], 'rows took', time.time()-start_time, 'seconds')

500 calls with 2 rows took 0.08499741554260254 seconds


&uarr; nearly 4x quicker!

# Live test

In [9]:
def _capture_and_process(video_capture, face_mesh): # TODO: DRY
    retval, image = video_capture.read() # TODO: check retval
    image = cv2.flip(image, 1)
    return image, face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

In [10]:
def _update_image(image, image_widget, text, text2=None): # TODO: DRY
    image = cv2.putText(image, text, (20,40), cv2.FONT_HERSHEY_COMPLEX, 1, (200,200,200))
    if text2 is not None:
        image = cv2.putText(image, text2, (20,60), cv2.FONT_HERSHEY_COMPLEX, .5, (200,200,200))
    image_widget.value = cv2.imencode('.png', image)[1].tobytes()

In [11]:
with open(model_path.parent/'metadata.json') as f:
    metadata = json.load(f)
column_names = metadata['column_names']
y_name = 'viseme_class'
stats = np.load(model_path.parent/'stats.npz')
vocab = ['AH', 'EE', 'NO_EXPRESSION', 'OO']

In [12]:
def live_test():
    win32api.GetAsyncKeyState(win32con.VK_ESCAPE)
    video_capture = cv2.VideoCapture(0)
    try:
        face_mesh = mp_face_mesh.FaceMesh(max_num_faces=1)
        image, results = _capture_and_process(video_capture, face_mesh)
        image_widget = widgets.Image(value=cv2.imencode('.png', image)[1].tobytes())
        display(image_widget)
        while True:
            if win32api.GetAsyncKeyState(win32con.VK_ESCAPE): 
                break
            image, results = _capture_and_process(video_capture, face_mesh)
            if results.multi_face_landmarks is None:
                _update_image(image, image_widget, 'No face found')
            else:
                data = inference_data_from_landmarks(
                        landmarks=results.multi_face_landmarks[0].landmark, 
                        landmark_ids=FaceLandmarks.pointer + FaceLandmarks.mouth,
                        relative_landmark_id=relative_landmark_id, 
                        coords=['x', 'y'], 
                        stats=stats)
                output = model(data)
                class_id = np.argmax(output)
                class_label = vocab[class_id]
                _update_image(image, image_widget, f'{class_id}: {class_label}', f'{np.round(output,2)[0]}')
            time.sleep(.05)
    finally:
        video_capture.release()
#         image_widget.close()

In [13]:
live_test()

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02\x80\x00\x00\x01\xe0\x08\x02\x00\x00\x00\xba\xb3K…

# Inference using saved images of different quality

See: `_tmp_re_process_image_to_face_mesh.ipynb`

TODO: move &uarr; logic to this notebook - or add some static data to the test folder

In [None]:
temp_path = Path('/Users/Butterp/AppData/Local/Temp/tmpkrplc969')
face_mesh = mp_face_mesh.FaceMesh(max_num_faces=1)
viseme_config = VisemeConfig()
for i in range(0,110,10):
    img_name = f'img_{i}.jpeg'
    image = cv2.imread(f'{temp_path}/{img_name}')
    results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    if results.multi_face_landmarks is None:
        print(img_name, 'no face found')
    else:
        data = inference_data_from_landmarks(
                landmarks=results.multi_face_landmarks[0].landmark, 
                landmark_ids=FaceLandmarks.pointer + FaceLandmarks.mouth,
                relative_landmark_id=FaceLandmarks.tip_of_nose, 
                coords=['x', 'y'], 
                stats=stats)
        output = model(data)
        class_id = np.argmax(output)
        class_label = viseme_config.get_class_label(class_id)
        print(img_name, class_label, class_id, output)