In [2]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing import image
import keras
from keras.models import load_model
import numpy as np
import pandas as pd
from resources.data_utils import DataGenerator
from resources.utils import prediction_standardized
from sklearn.metrics import accuracy_score
import string
import random

In [3]:
from keras.initializers import glorot_normal
from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Reshape
from keras.models import Sequential

def build_model(nb_classes: int, image_length, seed=25):
    initializer = glorot_normal(seed=seed)
    model = Sequential()
    model.add(Reshape((image_length, image_length, 1), input_shape=(image_length, image_length,)))

    model.add(
        Conv2D(64, kernel_size=(5, 5), padding='same', kernel_initializer=initializer))
    model.add(MaxPooling2D(pool_size=(10, 10)))  
    model.add(Activation('relu'))

    model.add(
        Conv2D(128, kernel_size=(5, 5), padding='same', kernel_initializer=initializer))
    model.add(MaxPooling2D(pool_size=(8, 8)))  
    model.add(Activation('relu'))

    model.add(Reshape((128 * 1 * 1,), input_shape=(1, 1, 128)))
    model.add(Dense(50, activation='relu', kernel_initializer=initializer))
    model.add(Dense(nb_classes, activation='softmax', kernel_initializer=initializer))

    return model

In [4]:
def binary_label_to_decimal(labels: np.ndarray) -> np.ndarray:
    """ Turns a list of binary vectors to their decimal format."""
    output = np.zeros(labels.shape[0])
    for i, x in enumerate(labels):
        output[i] = np.argmax(x)+1
    return output.astype(int)

In [5]:
# Load trained model
model = build_model(nb_classes=127, image_length = 80)
path_model = '/Users/jialu/Documents/IE_website/Keyboard/model_1.h5' # where the trianed model is stored
model = load_model(path_model)

In [9]:
# Get all extracted, preprocessed images
path = '/Users/jialu/Documents/Encoding_Project/Encoding_simulation/dataset/dataset_keyboard_127/preprocessed_80/' # where the images extracted from the video are stored
img_arr = os.listdir(path)
decoded_text = []
img_arr_full = []
for img_name in img_arr:
    img_path = path + img_name
    img_arr_full.append(img_path)

In [7]:
# Get the true labels, i.e. the original text
text = 'hello' # The original text, entered by the user
splitted_text = [char for char in text]

# create dictionaries for converting the text into numeric representations
keyboard = string.printable
keyboard = [char for char in keyboard]
keyboard = keyboard[0:-5]
keyboard_dict = dict(zip(keyboard, np.linspace(1,len(keyboard)+1,len(keyboard)+1).astype(int)))
keyboard_dict_r = dict(zip(np.linspace(1,len(keyboard)+1,len(keyboard)+1).astype(int),keyboard))

main_labels = [] #the numeric representation of the original text
for i in splitted_text:
    splitted_text_dec = keyboard_dict.get(i)
    main_labels.append(splitted_text_dec)

['h', 'e', 'l', 'l', 'o']


In [10]:
# Get the decoded text 
df_video = pd.DataFrame({"img_path": img_arr_full, "label": main_labels})
generation_params = {"dim": (80,80),"nb_classes": 127,"column_img": "img_path","column_label": "label"}
test_generator = DataGenerator(data_frame=df_video, batch_size=len(img_arr_full), shuffle=False, **generation_params)
predictions = model.predict_generator(generator=test_generator)
predictions_dec = binary_label_to_decimal(prediction_standardized(predictions))

# Compute accuracy
acc_test = accuracy_score(main_labels, predictions_dec)
print(acc_test)

# Convert the numerical predictions into the original format 
predited_text = ''
for i in predictions_dec:
    t = keyboard_dict_r[i]
    predited_text=predited_text+t
print("The original text:", text)
print("The predicted text:", predited_text)

1.0
The original text: hello
The predicted text: hello


