# Data Preprocessing

This script preprocess the landmarks in the Action Recognition Approach for ASLD.

Created by:
- Marcus Vinicius da Silva Fernandes.

2023-07-15.

In [1]:
import numpy as np
import pandas as pd
import os
import csv

## Loading the data

### Accessing the landmarks
Set up the paths of folders to locate the landmarks and the list (csv file) that associates the name of the video to the corresponding word in English.

In [2]:
# Set up of the extracted landmarks save path
landmarks_path = 'C:/Users/marcu/OneDrive/Documentos/Loyalist_College/AISC2006/train_dir/'

### Loading and shaping the landmarks to the desired number of frames

#### Desired number of frames
- Each video will be reshaped to have the number of rows (or frames) equal to the desired number of frames defined below.

In [3]:
NUM_FRAMES = 60

#### Loading all the landmarks

def load_landmarks(path):
    videos = []
    i = 0

    for item in os.listdir(path):
        if item.endswith('.npy'):  # working with npy files only
            data = np.load(os.path.join(path, item))  # loading the numpy array into memory
            if data.shape[0] > NUM_FRAMES:  # time-based sampling
                indices = np.arange(0, data.shape[0], data.shape[0] // NUM_FRAMES)[:NUM_FRAMES]
                data = data[indices]
                videos.append(data)
            elif data.shape[0] < NUM_FRAMES:  # padding the videos
                data = np.pad(data, ((0, NUM_FRAMES - data.shape[0]), (0, 0)), mode='constant')
                videos.append(data)
            else:  # no change
                videos.append(data)
        i += 1
        if i == 1000:
            break

    return np.array(videos)

landmarks = load_landmarks(landmarks_path)
landmarks.shape

In [4]:
# Opening the file dataset_analysis.csv to load the association of landmark ids to words and its number of frames
id_dict = {}  # initializing the dictionary that will receive the data
num_frames = []  # initializing the list that will contain the number of frames of each landmark

with open(landmarks_path + "Updated Dataset.csv", "r") as csv_file:
    csv_reader = csv.reader(csv_file)  # reading the data
    next(csv_reader)  # to skip the header
    for row in csv_reader:
        if int(row[0]) <= 10000:
            id_dict['0' * (5 - len(row[0])) + row[0]] = row[1]  # storing the content into a dictionary
        else:
            id_dict[row[0]] = row[1]  # storing the content into a dictionary
        num_frames.append(int(row[7]))


def load_landmarks(path):
    videos, labels = [], []
    i = 0

    for item in os.listdir(path):
        if item.endswith('.npy') and item.split('.npy')[0] in id_dict:  # working with npy files only
            if i == 1000:
                return np.array(videos), labels
            data = np.load(os.path.join(path, item))  # loading the numpy array into memory
            if data.shape[0] > NUM_FRAMES:  # time-based sampling
                indices = np.arange(0, data.shape[0], data.shape[0] // NUM_FRAMES)[:NUM_FRAMES]
                data = data[indices]
                videos.append(data)
            elif data.shape[0] < NUM_FRAMES:  # padding the videos
                data = np.pad(data, ((0, NUM_FRAMES - data.shape[0]), (0, 0)), mode='constant')
                videos.append(data)
            else:  # no change
                videos.append(data)
            labels.append(id_dict[item.split('.npy')[0]])
            i += 1

    return np.array(videos), np.array(labels)

landmarks, train_labels = load_landmarks(landmarks_path)

## Dropping landmarks

Each column is composed of coordinates in the folllwing order of models:
- Pose: 2 coordinates x 33 landmarks = 66 values.
- Left hand: 2 coordinates x 21 landmarks = 42 values.
- Right hand: 2 coordinates x 21 landmarks = 42 values.
- Face: 2 coordinates x 468 landmarks = 936 values.

Each row (each frame) have a total of 1086 values.

### Pose Model

Dropping the points 9, 10, 17, 18, 19, 20, 21, 22, 25, 25, 27, 28, 29, 30, 31, and 32.

In [5]:
# Points to drop
pose_drop_points = [9, 10, 17, 18, 19, 20, 21, 22, 25, 26, 27, 28, 29, 30, 31, 32]

# Indexes of the points to drop
pose_drop_index = []
for i in pose_drop_points:
    # pose_drop_index.append([(i - 1) * 2, (i - 1) * 2 + 1, (i - 1) * 4 + 2, (i - 1) * 4 + 3])
    pose_drop_index.append([(i - 1) * 2, (i - 1) * 2 + 1])

pose_drop_index = np.array(pose_drop_index).flatten()

In [6]:
# Dropping the columns
landmarks = np.delete(landmarks, pose_drop_index, 2)
landmarks.shape

(1000, 60, 1054)

### Current composition of the columns

Each column is composed of coordinates in the folllwing order of models:
- Pose: 2 coordinates x 17 landmarks = 34 values.
- Left hand: 2 coordinates x 21 landmarks = 42 values.
- Right hand: 2 coordinates x 21 landmarks = 42 values.
- Face: 2 coordinates x 468 landmarks = 936 values.

Each row (each frame) will have a total of 1054 values after concatenation.

### Left and Right Hands Models

These models will be kept as they are. No columns will be dropped.

### Face Model

Only the landmark coordinates corresponding to the outline of the lips will be kept? 0, 267, 269, 270, 409, 291, 375, 321, 403, 314, 17, 84, 181, 91, 146, 61, 185, 40, 39, and 27.

In [7]:
# Points to keep
face_keep_points = [0, 267, 269, 270, 409, 291, 375, 321, 403, 314, 17, 84, 181, 91, 146, 61, 185, 40, 39, 27]
face_keep_points.sort()

# Points to drop
face_drop_points = np.delete([i for i in range(468)], face_keep_points, 0)

# Indexes of the points to drop
face_drop_index = []
for i in face_drop_points:
    face_drop_index.append([(i - 1) * 2 + 120, (i - 1) * 2 + 120 + 1])

face_drop_index = np.array(face_drop_index).flatten()

In [8]:
# Dropping the columns
landmarks = np.delete(landmarks, face_drop_index, 2)
landmarks.shape

(1000, 60, 158)

### Final composition of the columns

Each column is composed of coordinates in the folllwing order of models:
- Pose: 2 coordinates x 17 landmarks = 34 values.
- Left hand: 2 coordinates x 21 landmarks = 42 values.
- Right hand: 2 coordinates x 21 landmarks = 42 values.
- Face: 2 coordinates x 20 landmarks = 40 values.

Each row (each frame) will have a total of 158 values after concatenation.

# LSTM Model

In [9]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import TensorBoard
import keras_tuner as kt

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [10]:
x_train, x_val, y_train, y_val = train_test_split(landmarks, train_labels, test_size=0.2, random_state=42)

In [11]:
from sklearn.preprocessing import LabelBinarizer

label_binarizer = LabelBinarizer()
y_train = label_binarizer.fit_transform(y_train)
y_val = label_binarizer.transform(y_val)

In [12]:
def normalize(array):
    return (array - np.min(array)) / (np.max(array) - np.min(array))

In [13]:
x_train = normalize(x_train)
x_val = normalize(x_val)

In [14]:

# Optimized model

model = Sequential()

# Masking Layer
model.add(Masking(mask_value=0, input_shape=(x_train.shape[1], x_train.shape[2])))
model.add(LSTM(64,activation='relu'))
model.add(Dense(64,activation='relu'))
model.add(Dense(units=1, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
history = model.fit(x_train, y_train, epochs=500, validation_data=(x_val, y_val))

Epoch 1/500


  return dispatch_target(*args, **kwargs)


ValueError: in user code:

    File "c:\Users\marcu\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1338, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\marcu\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1322, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\marcu\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1303, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\marcu\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1081, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "c:\Users\marcu\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1139, in compute_loss
        return self.compiled_loss(
    File "c:\Users\marcu\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\compile_utils.py", line 265, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "c:\Users\marcu\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\losses.py", line 142, in __call__
        losses = call_fn(y_true, y_pred)
    File "c:\Users\marcu\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\losses.py", line 268, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "c:\Users\marcu\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\losses.py", line 2122, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "c:\Users\marcu\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\backend.py", line 5560, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (32, 269) and (32, 1) are incompatible
