In [None]:
# Setup environment
# !apt-get -qq install xxd
# !pip3 install pandas numpy matplotlib
# !pip3 install tensorflow==2.13.0


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import glob
import random

# if tf.test.is_gpu_available():
#     print("GPU is available")
#     # Additional GPU information
#     gpu_devices = tf.config.experimental.list_physical_devices('GPU')
#     print("Available GPU devices:", gpu_devices)
# else:
#     print("GPU is NOT available")

In [None]:
file_path = "processed_data_small_1/*"

print(f"TensorFlow version = {tf.__version__}\n")

# Set a fixed random seed value, for reproducibility, this will allow us to get
# the same random numbers each time the notebook is run
SEED = 1337
np.random.seed(SEED)
tf.random.set_seed(SEED)

# the list of gestures that data is available for
GESTURES = "abcdefghijklmnopqrstuvwxyz"

SAMPLES_PER_GESTURE = 150

NUM_GESTURES = 26

# create a one-hot encoded matrix that is used in the output
ONE_HOT_ENCODED_GESTURES = np.eye(NUM_GESTURES)

inputs = []
outputs = []

datafiles = glob.glob(file_path)
# datafiles.sort()
print("total files", len(datafiles))

for datafile in datafiles:
    out = []
    pos = datafile.rfind("/")
    letter_label = datafile[pos+1]
    tensor = pd.read_csv(datafile)
    # inputs.append(np.array(tensor.values.ravel()))
    inputs.append(tensor)

    gesture_index = 0
    for i in range(NUM_GESTURES):
        if letter_label == GESTURES[i]:
            gesture_index = i
    output = ONE_HOT_ENCODED_GESTURES[gesture_index]
    outputs.append(output)
    # print ("processed ", datafile, "output=", GESTURES[gesture_index])

print("total ", len(inputs))
# convert the list to numpy arra
inputs = np.array(inputs)
outputs = np.array(outputs)

print("input shape: ", inputs.shape, " output shape", outputs.shape)
print("Data set parsing and preparation complete.")

In [None]:
# Randomize the order of the inputs, so they can be evenly distributed for training, testing, and validation
# https://stackoverflow.com/a/37710486/2020087
num_inputs = len(inputs)
randomize = np.arange(num_inputs)
np.random.shuffle(randomize)

# Swap the consecutive indexes (0, 1, 2, etc) with the randomized indexes
inputs = inputs[randomize]
outputs = outputs[randomize]

# Split the recordings (group of samples) into three sets: training, testing and validation
TRAIN_SPLIT = int(0.6 * num_inputs)
TEST_SPLIT = int(0.2 * num_inputs + TRAIN_SPLIT)

inputs_train, inputs_test, inputs_validate = np.split(inputs, [TRAIN_SPLIT, TEST_SPLIT])
outputs_train, outputs_test, outputs_validate = np.split(outputs, [TRAIN_SPLIT, TEST_SPLIT])

print("Data set randomization and splitting complete.")

In [None]:
# # build the model and train it

model = tf.keras.Sequential(
    [
        tf.keras.layers.Conv2D(
            9,
            (3, 3),
            padding="same",
            strides=(3, 3),
            activation="relu",
            input_shape=(SAMPLES_PER_GESTURE, 9, 1),
        ),
        # tf.keras.layers.MaxPooling2D((2, 3), strides=(2,3)),
        # tf.keras.layers.Conv2D(25, (1,3), padding='same', strides=(1,3) ,activation="relu"),
        # tf.keras.layers.MaxPooling2D((2, 2), strides=(2,1)),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(58, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(55, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(26, activation="softmax"),
    ]
)
print(model.summary())

In [None]:
# # build the model and train it

# model = tf.keras.Sequential(
#     [
#     # tf.keras.layers.Conv1D(filters=500, kernel_size=3, padding='same', strides=3, activation="relu",input_shape=(SAMPLES_PER_GESTURE, 9)),
#     # tf.keras.layers.MaxPooling1D(pool_size=2),
#     tf.keras.layers.LSTM(units=100, input_shape=(SAMPLES_PER_GESTURE, 9), return_sequences=False),
#     # tf.keras.layers.Conv2D(25, (1,3), padding='same', strides=(1,3) ,activation="relu"),
#     # tf.keras.layers.MaxPooling2D((2, 2), strides=(2,1)),


#     # tf.keras.layers.GlobalMaxPooling1D(),
#     # tf.keras.layers.Dense(58, activation="relu"),
#     # tf.keras.layers.Dropout(0.1),
#     # tf.keras.layers.Dense(55, activation="relu"),
#     # tf.keras.layers.Dropout(0.1),    
#     tf.keras.layers.Dense(26, activation="softmax")
# ]
# )
# print(model.summary())


In [None]:
# model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(inputs_train, outputs_train, epochs=10, batch_size=1, validation_data=(inputs_validate, outputs_validate))

In [None]:
# import numpy as np
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Bidirectional, LSTM, Dense
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.losses import SparseCategoricalCrossentropy

# # Assuming you have your input data and labels
# # Replace X_train and y_train with your actual data and labels
# X_train = np.random.rand(7911, 150, 9)
# y_train = np.random.randint(0, 26, size=(7911, 26))

# print (X_train.shape)
# print (y_train.shape)
# print (y_train[0])
# # Define the model
# model = Sequential()
# model.add(LSTM(units=50, return_sequences=True, input_shape=(150, 9)))
# model.add(Dense(units=26, activation='softmax'))

# # Compile the model
# model.compile(optimizer=Adam(), loss=SparseCategoricalCrossentropy(), metrics=['accuracy'])


# # Train the model
# batch_size = 32
# epochs = 10
# print(model.summary())
# model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2)


In [None]:
# increase the size of the graphs. The default size is (6,4).
plt.rcParams["figure.figsize"] = (5,3)

# graph the loss, the model above is configure to use "mean squared error" as the loss function
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'g.', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

print(plt.rcParams["figure.figsize"])

In [None]:
# graph the loss again skipping a bit of the start
SKIP = 50
plt.plot(epochs[SKIP:], loss[SKIP:], 'g.', label='Training loss')
plt.plot(epochs[SKIP:], val_loss[SKIP:], 'b.', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# graph of mean absolute error
# mae = history.history['mae']
# val_mae = history.history['val_mae']
# plt.plot(epochs[SKIP:], mae[SKIP:], 'g.', label='Training MAE')
# plt.plot(epochs[SKIP:], val_mae[SKIP:], 'b.', label='Validation MAE')
# plt.title('Training and validation mean absolute error')
# plt.xlabel('Epochs')
# plt.ylabel('MAE')
# plt.legend()
# plt.show()


In [None]:
print("input shape" , inputs_test.shape)
print(type(inputs_test))
# print (len(data1))
# inputs_test = np.concatenate((inputs_test, [np.array(data1)]), axis=0)
# print("input shape" , inputs_test.shape)

In [None]:
# t1 = pd.read_csv("processed_data/a_1.dat")
# print(t1)
# single_sample = np.array(t1.values.ravel())
# single_sample1 = (t1.melt().value.tolist())
# print(type(single_sample))
# prediction = model.predict(np.expand_dims(single_sample, axis=0))
# prediction1 = model.predict(np.expand_dims(single_sample1, axis=0))

# formatted_numbers = ["{:.2f}".format(number) for number in prediction[0]]

# print("Formatted Numbers:", formatted_numbers)
# print("Prediction:", prediction)
# prediction = model.predict([t1])
# print("predictions =\n", np.round(prediction, decimals=3))
# print(single_sample)
# print(single_sample1)

In [None]:
# t1 = pd.read_csv("processed_data/a_1.dat")
# [np.array(t1.values.ravel())]

# inputs_test = np.concatenate((inputs_test, [np.array(t1.values.ravel())]), axis=0)
# # 

In [None]:
# use the model to predict the test inputs
predictions = model.predict(inputs_test)
print("pred shape" , predictions.shape)
# predictions = model.predict(inputs_test[0].reshape(1,1248))
# print(predictions[0])
# print(outputs_test[0])
# print (inputs_test[0])

# pd.DataFrame(inputs_test[0]).to_csv("test1.h", sep=',', encoding='utf-8', index=False, header=False)
# !echo "const unsigned char tt[] = {" > ./tt.h
# !cat "test1.csv" | xxd -i      >> ./tt.h
# !echo "};"                              >> ./tt.h

# print the predictions and the expected ouputs
print("predictions =\n", np.round(predictions, decimals=3))

print("actual =\n", outputs_test)
a = np.round(predictions - outputs_test, decimals=0)
print(a)
# Plot the predictions along with to the test data
# plt.clf()
# plt.title('Training data predicted vs actual values')
# plt.plot( outputs_test, 'b.', label='Actual')
# plt.plot( predictions, 'r.', label='Predicted')
# plt.show()


In [None]:
print(outputs_test[:, 0].dtype)
print(predictions[:, 0].dtype)

In [None]:
pre = (predictions + 0.5).astype(int)
# pre
a = pre-outputs_test
tests_by_letter = outputs_test.astype(bool).sum(axis=0)
print("tests by letter: ", tests_by_letter)
wrong_predict_by_letter = a.astype(bool).sum(axis=0)
print("wrong predict by letter: ", wrong_predict_by_letter)
print("wrong rate: ", np.round(wrong_predict_by_letter/tests_by_letter, decimals=2))

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
# accuracy = accuracy_score(outputs_test, predictions)
# precision = precision_score(outputs_test, predictions)
# recall = recall_score(outputs_test, predictions)
# f1 = f1_score(outputs_test, predictions)
# conf_matrix = confusion_matrix(outputs_test, predictions)

# print(f"Accuracy: {accuracy}")
# print(f"Precision: {precision}")
# print(f"Recall: {recall}")
# print(f"F1-Score: {f1}")
# print(f"Confusion Matrix:\n{conf_matrix}")
# Initialize an array to store accuracy for each class

# class_accuracies = []

# # Iterate over each class (assuming axis 1 represents classes)
# for class_index in range(26):
#     true_labels_class = outputs_test[:, class_index]
#     predicted_labels_class = pre[:, class_index]
#     accuracy = accuracy_score(true_labels_class, predicted_labels_class)
#     class_accuracies.append(accuracy)

# # Calculate micro-average accuracy (overall accuracy)
# micro_average_accuracy = accuracy_score(outputs_test, predictions)

# # Calculate macro-average accuracy (average accuracy across classes)
# macro_average_accuracy = sum(class_accuracies) / len(class_accuracies)

# # Print individual class accuracies and the macro/micro averages
# for class_index, accuracy in enumerate(class_accuracies):
#     print(f"Class {class_index}: Accuracy = {accuracy}")

# print(f"Micro-average accuracy: {micro_average_accuracy}")
# print(f"Macro-average accuracy: {macro_average_accuracy}")
# In the code above:

# We iterate over each class, treating it as a binary classification problem by selecting the true labels and predicted labels for that class.
# We calculate the accuracy for each class separately and store it in the class_accuracies list.
# We compute the micro-average accuracy, which is the overall accuracy across all samples and classes.
# We compute the macro-average accuracy, which is the average accuracy across all classes.
# This approach allows you to evaluate the performance of your multi-class classification model for each individual class and provides overall accuracy metrics as well.







In [None]:
# import matplotlib.pyplot as plt

# plt.scatter(outputs_test, predictions)
# plt.xlabel("True Values")
# plt.ylabel("Predictions")
# plt.show()

from sklearn.metrics import r2_score


r2 = r2_score(outputs_test, predictions)
print("r2=", r2)

In [None]:
outputs_test[0]

In [None]:
(predictions[0]+0.5).astype(int)

In [None]:
# tf.saved_model.save(model, 'my_saved_model')

In [None]:
# Convert the model to the TensorFlow Lite format without quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
tflite_model = converter.convert()

# Save the model to disk
open("gesture_model.tflite", "wb").write(tflite_model)
  
import os
basic_model_size = os.path.getsize("gesture_model.tflite")
print("Model is %d bytes" % basic_model_size)
  
  

In [None]:
# Create a TensorFlow Lite interpreter for the converted model
# interpreter = tf.lite.Interpreter(model_content=tflite_model)
# interpreter.allocate_tensors()

# # Get the details of the TensorFlow Lite model
# ops_details = interpreter.get_tensor_details()

# # Print the details of each operator in the TensorFlow Lite model
# for op in ops_details:
#     print("Operator Name:", op['name'])
#     print("Operator Index:", op['index'])
#     print("Operator Shape:", op['shape'])
#     print("Operator Type:", op['dtype'])
#     print()

In [None]:
!echo "const unsigned char model[] = {" > ./content/model.h
!cat gesture_model.tflite | xxd -i      >> ./content/model.h
!echo "};"                              >> ./content/model.h

import os
model_h_size = os.path.getsize("./content/model.h")
print(f"Header file, model.h, is {model_h_size:,} bytes.")

In [None]:
from sklearn.metrics import f1_score

# Assuming 'y_true' contains the true labels and 'y_pred' contains the predicted labels
# f1 = f1_score(outputs_test, pre)

In [None]:


# model.compile(optimizer='adam',
#               loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
#               metrics=['accuracy'])