# Code for training the holistic model

# Pembuatan Model

## Import Libraries 


In [1]:
from datetime import datetime
import os
import sys
import io
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import TensorBoard
from keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, log_loss, confusion_matrix

## Setup Variables

In [2]:
# Variables
n = 1  # Data duplication
handsOnly = True  # Whether to use only hands or not
learning_rate = 0.0001
epoch = 10

FOLDER_NAME = 'dataset'
ALL_CLASSES = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

In [3]:
# Create label map, representing each class as a number
label_map = {}
for (root, folders, files) in os.walk(FOLDER_NAME):
    for foldername in folders:
        if foldername in ALL_CLASSES:
            label_map[foldername] = ALL_CLASSES.index(foldername)

print(label_map)

{'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5, 'g': 6, 'h': 7, 'i': 8, 'j': 9, 'k': 10, 'l': 11, 'm': 12, 'n': 13, 'o': 14, 'p': 15, 'q': 16, 'r': 17, 's': 18, 't': 19, 'u': 20, 'v': 21, 'w': 22, 'x': 23, 'y': 24, 'z': 25}


In [4]:
# Get all datset data with its label and put it in a list
sequence, label = [], []
target_length = 14
for (root, folders, files) in os.walk(FOLDER_NAME):
    total_file = 0
    for filename in files:
        file_path = os.path.join(os.path.relpath(
            root, FOLDER_NAME), filename)
        if (filename.endswith('.npy') and os.path.split(file_path)[0] in ALL_CLASSES):
            res = np.load(f'{FOLDER_NAME}/{file_path}')
            for _ in range(target_length-res.shape[0]):
                res = np.vstack((res, res[-1, :]))
            if(handsOnly):
                res = res[:, -126:]
            sequence.append(np.array(res))
            label.append(label_map[os.path.basename(root[-1])])
            total_file += 1
    print(f"Total files: {total_file} --- {root}")

print(np.array(sequence).shape)
print(np.array(label).shape)

Total files: 0 --- dataset
Total files: 50 --- dataset\a
Total files: 50 --- dataset\b
Total files: 50 --- dataset\c
Total files: 50 --- dataset\d
Total files: 50 --- dataset\e
Total files: 50 --- dataset\f
Total files: 50 --- dataset\g
Total files: 50 --- dataset\h
Total files: 50 --- dataset\i
Total files: 50 --- dataset\j
Total files: 50 --- dataset\k
Total files: 50 --- dataset\l
Total files: 50 --- dataset\m
Total files: 50 --- dataset\n
Total files: 50 --- dataset\o
Total files: 50 --- dataset\p
Total files: 50 --- dataset\q
Total files: 50 --- dataset\r
Total files: 50 --- dataset\s
Total files: 50 --- dataset\t
Total files: 50 --- dataset\u
Total files: 50 --- dataset\v
Total files: 50 --- dataset\w
Total files: 50 --- dataset\x
Total files: 50 --- dataset\y
Total files: 50 --- dataset\z
(1300, 14, 126)
(1300,)


In [5]:
sequence = np.concatenate([sequence] * n, axis=0)
label = np.concatenate([label] * n, axis=0)


print(np.array(sequence).shape)
print(np.array(label).shape)

(1300, 14, 126)
(1300,)


In [6]:
tf.config.list_physical_devices('GPU')


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

## Training Data

In [7]:
X_train, X_test, y_train, y_test = train_test_split(np.array(sequence), tf.keras.utils.to_categorical(
    np.array(label).astype(int), num_classes=np.array(ALL_CLASSES).shape[0], dtype='float32'), test_size=0.2)

print(X_train.shape, X_test.shape)


(1040, 14, 126) (260, 14, 126)


In [8]:
training_phase = str(np.array(sequence).shape[2]) + "-tanh-lr-" + str(learning_rate).replace("0.", "") + "-dupli-" + str(n) + "-" + str(epoch) + "-epoch-" + datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = os.path.join('Logs', training_phase)
tb_callback = TensorBoard(log_dir=log_dir)

print(log_dir)

model = Sequential()
model.add(LSTM(64, return_sequences=True,
               activation='tanh', input_shape=(14, np.array(sequence).shape[2])))
model.add(LSTM(64, return_sequences=True, activation='tanh'))
model.add(LSTM(64, return_sequences=False, activation='tanh'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(np.array(ALL_CLASSES).shape[0], activation='softmax'))

optimizer = Adam(learning_rate=learning_rate)

model.compile(optimizer=optimizer, loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])

Logs\126-tanh-lr-0001-dupli-1-10-epoch-20240929-115115


In [9]:
model.fit(X_train, y_train, epochs=epoch, callbacks=[
          tb_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x21a0fc8a850>

In [10]:
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 14, 64)            48896     
                                                                 
 lstm_1 (LSTM)               (None, 14, 64)            33024     
                                                                 
 lstm_2 (LSTM)               (None, 64)                33024     
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 26)                858       
                                                                 
Total params: 122,042
Trainable params: 122,042
Non-trai

In [11]:
model.save(f'{log_dir}/action.h5')

# Evaluasi Model

In [12]:
phase_dir = f'Logs/{training_phase}'
if not os.path.exists(phase_dir):
    os.makedirs(phase_dir)

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)
classes = np.unique(y_true)

accuracy = accuracy_score(y_true, y_pred_classes)
precision = precision_score(y_true, y_pred_classes, average='weighted')
recall = recall_score(y_true, y_pred_classes, average='weighted')
f1 = f1_score(y_true, y_pred_classes, average='weighted')
loss = log_loss(y_true, y_pred, labels=classes)

# Redirect stdout to a string buffer
old_stdout = sys.stdout
sys.stdout = buffer = io.StringIO()

print(f"Training Phase: {training_phase}\n\n")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Loss: {loss}")

report = classification_report(y_true, y_pred_classes)

sys.stdout = old_stdout
output = buffer.getvalue()

# Save the output to a uniquely named text file in the Logs directory
log_filename = f'{phase_dir}/summary.txt'

with open(log_filename, 'w') as f:
    f.write(output)
    f.write("\n")
    f.write(report)
    
# Create the confusion matrix
cm = confusion_matrix(y_true, y_pred_classes)

plt.figure(figsize=(12, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=True, square=True, linewidths=0,
            xticklabels=[f'{ALL_CLASSES[cls]}' for cls in np.unique(y_true)],
            yticklabels=[f'{ALL_CLASSES[cls]}' for cls in np.unique(y_true)])


# Add labels for axes
plt.xlabel('Predicted Label', fontsize=12)
plt.ylabel('True Label', fontsize=12)

# Save the figure as a PDF
plt.savefig(f'{phase_dir}/confusion_matrix.pdf', format='pdf')
plt.close()



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
