## Importing the libraries for data preprocessing

In [1]:
import numpy as np
import pandas as pd
import json
import os
import csv
from tqdm import tqdm

## Reproducibility of the results

Setting the random seed for the random number generators used in the code, to ensure that the random processes, likd data shuffling or weight initialization, produce the same results every time we run the notebook.

In [2]:
import random

random.seed(42)
np.random.seed(42)

## Loading the data

### Accessing the landmarks
- Set up the paths of folders to locate the landmarks and the list (csv file) that associates the name of the video to the corresponding word in English.
- Creation of a dictionary to associate the words to a unique number.

In [3]:
# Set up of the extracted landmarks save path
landmarks_path = 'C:/Users/marcu/OneDrive/Documentos/Loyalist_College/AISC2006/predictions_wlasl_for_gd/landmarks/'

In [4]:
# Opening the file dataset_analysis.csv to load the association of landmark ids to words and its number of frames
file, word = [], []

with open(landmarks_path + "Updated Dataset_dog_happy_jump.csv", "r") as csv_file:
    csv_reader = csv.reader(csv_file)  # reading the data
    next(csv_reader)  # to skip the header
    for row in csv_reader:
        file.append(landmarks_path + str(row[0]) + '.parquet')  # getting the files to load
        word.append(row[1])  # getting the words corresponding to the files to load

# sign_dict = {}  # initializing the dictionary that will receive the data
# for idx, row in enumerate(np.unique(word)):
#     sign_dict[row] = idx  # storing the content into a dictionary

file = np.array(file)
word = np.array(word)

In [5]:
# Loading the json file adn creation of dictionary to associate the words to a unique number
with open('sign_to_prediction_index_map.json', 'r') as j:
     sign_dict = json.loads(j.read())

del j

### Loading and shaping the landmarks to the desired number of frames

#### Desired number of frames
- Each video will be reshaped to have the number of rows (or frames) equal to the desired number of frames defined below.

In [6]:
NUM_FRAMES = 30

#### Landmark points to keep
- The objective is to reduce the number of features.
- All the landmarks from the hands will be kept.

In [7]:
face_keep_points = [0, 267, 269, 270, 409, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146, 61, 185, 40, 39, 37]
face_keep_points.sort()
left_hand_keep_points = [i for i in range(21)]
pose_keep_points = [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16, 23, 24]
right_hand_keep_points = [i for i in range(21)]

face_keep_idx = [face_keep_points[i] for i in range(len(face_keep_points))]
left_hand_keep_idx = [i + 468 for i in left_hand_keep_points]
pose_keep_idx = [i + 468 + 21 for i in pose_keep_points]
right_hand_keep_idx = [i + 468 + 21 + 33 for i in right_hand_keep_points]

landmarks_to_keep = face_keep_idx + left_hand_keep_idx + pose_keep_idx + right_hand_keep_idx

del face_keep_points, left_hand_keep_points, pose_keep_points, right_hand_keep_points
del face_keep_idx, left_hand_keep_idx, pose_keep_idx, right_hand_keep_idx

In [8]:
TOTAL_ROWS = 543
desired_num_rows = len(landmarks_to_keep) * 2

#### Loading all the landmarks
- The 'z' coordinate will be dropped because, according to Google, it “should be discarded as currently the model is not fully trained to predict depth”.

In [9]:
def load_landmarks():

    # Columns to upload
    data_columns = ['x', 'y']

    landmarks = np.empty((file.shape[0], NUM_FRAMES, desired_num_rows), dtype=float)
    labels = []
    
    for i in tqdm(range(file.shape[0])):

        # Loading the file
        data = pd.read_parquet(file[i], columns=data_columns).fillna(0)
        num_frames = int(len(data) / TOTAL_ROWS)
        data = data.values.reshape(num_frames, TOTAL_ROWS, len(data_columns))
        data.astype(np.float32)

        # Dropping undesired points
        data = data[:, landmarks_to_keep]

        # Adjusting the number of frames
        if data.shape[0] > NUM_FRAMES:  # time-based sampling
            indices = np.arange(0, data.shape[0], data.shape[0] // NUM_FRAMES)[:NUM_FRAMES]
            data = data[indices]
        elif data.shape[0] < NUM_FRAMES:  # padding the videos
            rows = NUM_FRAMES - data.shape[0]
            data = np.append(np.zeros((rows, len(landmarks_to_keep), len(data_columns))), data, axis=0)

        # Reshaping the data
        landmarks[i] = data.reshape(NUM_FRAMES, len(landmarks_to_keep) * len(data_columns), order='F')
        del data

        # Creating the labels dataset
        labels.append(sign_dict[word[i]])
    return landmarks, np.array(labels)

x_test, y_test = load_landmarks()

  0%|          | 0/22 [00:00<?, ?it/s]

100%|██████████| 22/22 [00:00<00:00, 33.21it/s]


## Normalizing the x datasets

In [10]:
# # Normalizing the data
# def normalize(array):
#     return (array - np.min(array)) / (np.max(array) - np.min(array))
# x_test = normalize(x_test)

## One-hot encoding the y datasets

In [11]:
# from sklearn.preprocessing import LabelBinarizer

# label_binarizer = LabelBinarizer()
# y_test = label_binarizer.fit_transform(y_test)

# del label_binarizer

In [12]:
y_encoded = np.zeros((x_test.shape[0], 250))
for i in range(len(y_encoded)):
    y_encoded[i, y_test[i]] = 1

y_test = y_encoded

## Importing the libraries for modelling

In [13]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking
# from tensorflow.keras.utils import to_categorical
# from tensorflow.keras.callbacks import TensorBoard
# import keras_tuner as kt

# from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
# from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

## Model build

In [14]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, LayerNormalization, Activation, Dropout, LSTM, Input

def build_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    x = Dense(256)(inputs)
    x = LayerNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(0.5)(x)

    x = Dense(128)(x)
    x = LayerNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(0.5)(x)

    x = Dense(256)(x)
    x = LayerNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(0.5)(x)

    lstm_output = LSTM(256)(x)

    outputs = Dense(num_classes, activation='softmax')(lstm_output)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

# Assuming input_shape is (None, 246) and num_classes is 250
input_shape = (None, 158)
num_classes = 250
model = build_model(input_shape, num_classes)

model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, 158)]       0         
                                                                 
 dense (Dense)               (None, None, 256)         40704     
                                                                 
 layer_normalization (LayerN  (None, None, 256)        512       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, None, 256)         0         
                                                                 
 dropout (Dropout)           (None, None, 256)         0         
                                                                 
 dense_1 (Dense)             (None, None, 128)         32896     
                                                             

## Loading the weights

In [15]:
model.load_weights('07-25_ARM_GD_baseline.h5')

## Predictions

In [16]:
# Making predictions
predicted_labels = model.predict(x_test)

# Getting the predicted labels and words
predicted_labels = np.argmax(predicted_labels, axis=1)
predicted_words = np.array([list(sign_dict.keys())[list(sign_dict.values()).index(label)] for label in predicted_labels])

# Printing the predicted words
print('Predicted words:')
print(predicted_words)

2023-08-06 12:09:58.204941: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Predicted words:
['sticky' 'drawer' 'vacuum' 'red' 'cheek' 'red' 'zipper' 'beside'
 'hesheit' 'black' 'giraffe' 'thirsty' 'hungry' 'hungry' 'shhh' 'black'
 'zipper' 'puppy' 'dog' 'shhh' 'beside' 'black']


In [17]:
from sklearn.metrics import accuracy_score

In [18]:
# Calculating the accuracy score
accuracy = accuracy_score(np.argmax(y_test, axis=1), predicted_labels)

# Printing the accuracy score
print("Test Dataset Accuracy:", accuracy)

Test Dataset Accuracy: 0.0
