In [None]:
import pyarrow.parquet as pq
import json
import numpy as np
import pandas as pd
import pyarrow as pa
from joblib import Parallel, delayed
from tqdm_joblib import tqdm_joblib
import timeit

In [None]:
df = pd.read_csv('asl-signs/train.csv')
df

In [None]:
label_map = json.load(open('asl-signs/sign_to_prediction_index_map.json'))
print(label_map)

In [None]:
def frame_check(df):
    min = df['frame'].min()
    max = df['frame'].max()
    if (max-min) < 5:
        return False
    else: 
        return True

In [None]:
def frame_count(df):
    min = df['frame'].min()
    max = df['frame'].max()
    return max, min

In [None]:

def extract_file(filename):
    # start = timeit.default_timer()
    df_tmp = pq.read_table(f'asl-signs/{filename}').to_pandas()
    if not frame_check(df_tmp):
        return np.array([0]), False
    df_tmp = df_tmp.fillna(0)
    face_landmarks = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 
                     291, 146, 91, 181, 84, 17, 314, 405, 321, 375,  
                     191, 80, 81, 82, 13, 312, 311, 310, 415, 308, 
                     95, 88, 178, 87, 14, 317, 402, 318, 324]
    pose_landmarks = [11, 12, 13, 14, 15, 16,
                      17, 18, 19, 20, 21, 22]
    x_list = []
    y_list = []
    z_list = []
    all_list = []
    hand = ['left_hand', 'right_hand']
    tmp_frame = 0
    max_f, min = frame_count(df_tmp)
    frames = max_f-min+1
    df_tmp = df_tmp.query('type == @hand or (type == "face" & landmark_index == @face_landmarks) or (type == "pose" & landmark_index == @pose_landmarks) ')
    if frames > 20:
        buff = max_f-20
        df_tmp = df_tmp.query('frame >= @buff')
        for i in range(max_f-19, max_f+1):
            tmp_df = df_tmp.query('frame == @i')
            all_list.append(np.array([tmp_df['x'].astype(np.float32).to_numpy(), tmp_df['y'].astype(np.float32).to_numpy(), tmp_df['z'].astype(np.float32).to_numpy()]).flatten())
    else:
        for i in range(min, max_f+1):
            tmp_df = df_tmp.query('frame == @i')
            all_list.append(np.array([tmp_df['x'].astype(np.float32).to_numpy(), tmp_df['y'].astype(np.float32).to_numpy(), tmp_df['z'].astype(np.float32).to_numpy()]).flatten())
    ret = np.array(all_list).shape[0]
    if ret < 20:
        add_list = [[0 for i in range(279)] for j in range(20-ret)]
        all_list = add_list + all_list
    # print(timeit.default_timer() - start)
    shp = np.array(all_list).shape
    try:
        if shp[0] != 20 or shp[1]!= 279:
            print(np.array(all_list).shape)
    except:
        return np.array([0]), False       
    else:
        if shp[0] != 20 or shp[1]!= 279:
            print(np.array(all_list).shape)
        return np.asarray(all_list), True


In [None]:
def convert_label(raw):
    return label_map[raw]

In [None]:
# Parallelism

path = df['path'].to_numpy().astype(str)
raw_label = df['sign'].to_numpy().astype(str)
with tqdm_joblib(desc="Label conversion", total=94477) as progress_bar:
    label = Parallel(n_jobs=-1)(delayed(convert_label)(i) for i in raw_label)
with tqdm_joblib(desc="Data conversion", total=94477) as progress_bar:
    data, cond = zip(*Parallel(n_jobs=-1)(delayed(extract_file)(i) for i in path))


In [None]:
# Removing false data
new_df = pd.DataFrame(data=[data, cond, label]).T
new_df.columns = ['data', 'cond', 'label']
new_df

In [None]:
new_df = new_df[new_df.cond != False]
new_df = new_df.drop(['cond'], axis=1)
new_df = new_df.reset_index(drop=True)
new_df

In [None]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical


In [None]:
X_data = [data for data in new_df['data'].to_numpy()]
X_data = np.array(X_data)
print(X_data.shape)
Y_data = [label for label in new_df['label'].to_numpy()]
Y_data = np.array(Y_data)
Y_data = to_categorical(Y_data).astype(int)
print(Y_data.shape)

In [None]:
np.save("feature_data.npy", X_data)
np.save("label_data.npy", Y_data)

In [None]:
X_data = np.load("feature_data.npy")
Y_data = np.load("label_data.npy")

In [None]:
X_train, X, y_train, y = train_test_split(X_data, Y_data, test_size=0.1, shuffle=True)
X_test, X_val, y_test, y_val = train_test_split(X, y, test_size=0.5, shuffle=True)

In [None]:
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, X_val.shape, y_val.shape)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten, GRU, Conv1D
from tensorflow.keras.callbacks import TensorBoard, Callback
from tensorflow.keras.optimizers import Adam

In [None]:
class trainingCallback(Callback):
  def on_epoch_end(self, epoch, logs={}):
    
    # Check accuracy
    # if(logs.get('categorical_accuracy') < 0.95  and logs.get('loss') < 0.35 and logs.get('val_loss') < 0.35):
    if((logs.get('categorical_accuracy') > 0.95) or (logs.get('categorical_accuracy') > 0.92  and logs.get('loss') > logs.get('val_loss'))):
      # Stop if threshold is met
      print("\nAccuracy grater than 0.92 so cancelling training!")
      self.model.stop_training = True

# Instantiate class
callbacks = trainingCallback()

In [None]:
# LSTM Model
inputs = tf.keras.layers.Input(shape=(20,279))
x_1, w, h = tf.keras.layers.LSTM(256, return_sequences=False, activation='relu', return_state=True)(inputs)
x = tf.keras.layers.Dropout(0.2)(x_1)
# x = tf.keras.layers.LSTM(128, return_sequences=False, activation='relu')(x, initial_state=[w, h])
concat = tf.keras.layers.concatenate([x, w, h])
# flatten = tf.keras.layers.Flatten(concat)
x = tf.keras.layers.Dense(512, activation='relu')(concat)
x = tf.keras.layers.Dropout(0.2)(x)
# x = tf.keras.layers.Dense(64, activation='relu')(x)
# x = tf.keras.layers.Dropout(0.5)(x)
out = tf.keras.layers.Dense(250, activation='softmax', name='outputs')(x)
model_LSTM = tf.keras.Model(inputs, out)
model_LSTM.summary()

In [None]:
model_LSTM.compile(optimizer=Adam(learning_rate=1e-3), loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [None]:
model_train = model_LSTM.fit(X_train, y_train, epochs=200, batch_size=64,validation_data=(X_val,y_val), callbacks=[callbacks])


In [None]:
# LSTM Model
inputs = tf.keras.layers.Input(shape=(20,279), name='inputs')
x_1= tf.keras.layers.LSTM(256, return_sequences=True, activation='relu', name='lstm_1')(inputs)
x = tf.keras.layers.Dropout(0.2, name='drop_1')(x_1)
x, w, h= tf.keras.layers.LSTM(512, return_sequences=False, activation='relu', return_state=True, name='lstm_2')(x)
# x = tf.keras.layers.LSTM(128, return_sequences=False, activation='relu')(x, initial_state=[w, h])
concat = tf.keras.layers.concatenate([x, w, h], name='concat_1')
# flatten = tf.keras.layers.Flatten(concat)
x = tf.keras.layers.Dense(1024, activation='relu', name='dense_1')(concat)
x = tf.keras.layers.Dropout(0.2, name='drop_2')(x)
# x = tf.keras.layers.Dense(64, activation='relu')(x)
# x = tf.keras.layers.Dropout(0.5)(x)
out = tf.keras.layers.Dense(250, activation='softmax', name='outputs')(x)
model_LSTM_2 = tf.keras.Model(inputs, out)
model_LSTM_2.summary()

In [None]:
model_LSTM_2.compile(optimizer=Adam(learning_rate=1e-3), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model_train = model_LSTM_2.fit(X_train, y_train, epochs=10, batch_size=64,validation_data=(X_val,y_val), callbacks=[callbacks])
