# F. Real time Prediction with Text-To-Speech Translation

1. Adjusts the logging level for TensorFlow to suppress warnings and control the verbosity of log messages generated by TensorFlow

In [1]:
import os

# Set the TensorFlow logging level to suppress warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  
# '0' (default) shows all logs, 
#'1' shows only errors, 
#'2' shows only errors and critical logs

2. Import libraries/modules

In [2]:
# serial module for serial communication with arduino mega connected to the computer's serial ports
import serial

# pandas library for data manipulation and analysis
import pandas as pd

# numpy library for numerical computations and array operations
import numpy as np

#time module for various time-related functions eg: adding delays or timestamping 
import time

# loads a previously saved model from a file
from tensorflow.keras.models import load_model

# Google Text-to-Speech Python library and CLI tool that interfaces with Google Text-to-Speech API, easily convert text to speech
from gtts import gTTS

# provides a way to display audio files, among other types of media, directly within an IPython environment
from IPython.display import Audio, display

3. Load the saved model from data training section

In [3]:
saved_model_path = '/Users/sunilamaharjan/Desktop/London Metropolitan University/Summer Semester/mscProject/codes/Dataset/Number/sensor_data_training_model_withoutFs_3axis.h5'  
loaded_model = load_model(saved_model_path)

Metal device set to: Apple M2


4. Set up the initial configuration and data structures for collecting and processing sensor data

In [4]:
# serial port to which arduino mega is connected and can be determined by command "ls /dev/tty.*" in terminal
serial_port = '/dev/tty.usbmodem101' 

#speed at which data is transferred over the serial connection
baud_rate = 9600

#establishes the connection to the specified serial port
ser = serial.Serial(serial_port, baud_rate)

# Define the duration of each gesture in seconds
gesture_duration = 5

# Define the number of gestures to be predicted
num_of_gesture = 5

#dictionary for controlling the loop
loop_list = {
    1: "First",
    2: "Second",
    3: "Third",
    4: "Fourth",
    5: "Fifth",
    6: "Sixth",
    7: "Seventh",
    8: "Eighth",
    9: "Ninth"
}

#dictionary to map gestures from 0 to 9 to respective strings
class_mapping = {
    0: "Gesture for Number 0",
    1: "Gesture for Number 1",
    2: "Gesture for Number 2",
    3: "Gesture for Number 3",
    4: "Gesture for Number 4",
    5: "Gesture for Number 5",
    6: "Gesture for Number 6",
    7: "Gesture for Number 7",
    8: "Gesture for Number 8",
    9: "Gesture for Number 9"
}


5. Fuction for data collection from smart glove

In [5]:
def collect_gesture_data(duration_seconds):
    sensor_data = []  # List to store the collected sensor data
    start_time = time.time() # to record the current time

    while (time.time() - start_time) < duration_seconds:
        data = ser.readline().decode().strip().split(',')  # Reads a line from the serial port and split by comma
        sensor_reading = data[:12]  # Extracts the flex sensor and 3-axis gryoscope accelerometer readings

        if sensor_reading is not None:
            sensor_data_int = [float(val)if val != '' else 0 for val in sensor_reading]
            sensor_data.append(sensor_data_int)
        time.sleep(0.1)  # Add a small delay between consecutive readings to avoid overwhelming the serial port

    return sensor_data

6. Performing gesture prediction in real time with text to voice output

In [6]:
# asking the user to make a gesture for number
msg = "Sign recognition has been started. Please make gesture!"
print(msg)
tts = gTTS(msg)
tts.save("msg.mp3")
display(Audio("msg.mp3", autoplay=True))
time.sleep(gesture_duration)

# Perform num_of_gesture different gestures
for gesture_number in range(1, num_of_gesture + 1):  
    # Play audio message before each gesture
    msg = f"Please perform {loop_list[gesture_number]} Gesture"
    print(msg)
    tts = gTTS(msg)
    tts.save("msg.mp3")
    display(Audio("msg.mp3", autoplay = True))
    
    # Wait for the audio to finish playing
    time.sleep(gesture_duration)

    # Collect data for gesture_duration seconds per gesture
    gesture_data = collect_gesture_data(duration_seconds = gesture_duration)  

    # Concatenate the list of lists into a single DataFrame
    df_gesture_data = pd.DataFrame(gesture_data, columns=['Thumb', 'IndexFinger', 'MiddleFinger', 'RingFinger', 'LittleFinger', 'accelerometer_x', 'accelerometer_y', 'accelerometer_z', 'temperature', 'gyro_x', 'gyro_y', 'gyro_z'])

    # Preprocess the data and reshape it
    X_gesture = df_gesture_data.drop('temperature', axis=1)
    X_gesture = X_gesture.drop('accelerometer_x', axis=1)
    X_gesture = X_gesture.drop('accelerometer_y', axis=1)
    X_gesture = X_gesture.drop('accelerometer_z', axis=1)
    X_gesture = X_gesture.drop('gyro_x', axis=1)
    X_gesture = X_gesture.drop('gyro_y', axis=1)
    X_gesture = X_gesture.drop('gyro_z', axis=1)
    X_gesture_reshaped = X_gesture.values.reshape(X_gesture.shape[0], 1, X_gesture.shape[1])
    X_gesture_reshaped = np.array(X_gesture_reshaped).astype(np.float32)

    # Make predictions using the loaded model
    predictions = loaded_model.predict(X_gesture_reshaped)

    # Get the predicted class labels
    predicted_labels = np.argmax(predictions, axis=1)

    sign_language_gestures = [class_mapping[label] for label in predicted_labels]
    print(predicted_labels)
    
    # calculates unique values and their corresponding counts 
    unique_values, value_counts = np.unique(sign_language_gestures, return_counts=True)

    # Find the value that is repeated the most (has the highest count)
    most_common_value = unique_values[np.argmax(value_counts)]
    
    # Play audio message for the prediction
    predicted_gesture_text = f"Predicted Sign Language for {loop_list[gesture_number]} Gesture is {most_common_value}"
    print(predicted_gesture_text)
    tts = gTTS(predicted_gesture_text)
    tts.save("predicted_gesture.mp3")
    display(Audio("predicted_gesture.mp3", autoplay=True))
    
    # Wait for the audio to finish playing
    time.sleep(gesture_duration)


# Play completion message
msg = "Gesture Recognition completed."
print(msg)
tts = gTTS(msg)
tts.save("msg.mp3")
display(Audio("msg.mp3", autoplay=True))

Sign recognition has been started. Please make gesture!


Please perform First Gesture


[0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0]
Predicted Sign Language for First Gesture is Gesture for Number 0


Please perform Second Gesture


[0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Predicted Sign Language for Second Gesture is Gesture for Number 1


Please perform Third Gesture


[1 4 3 2 2 2 2 3 3 3 3 3 3 3 3 2 2]
Predicted Sign Language for Third Gesture is Gesture for Number 3


Please perform Fourth Gesture


[2 4 4 3 3 3 5 3 3 3 3 3 3 3 3 3]
Predicted Sign Language for Fourth Gesture is Gesture for Number 3


Please perform Fifth Gesture


[3 3 3 7 3 3 3 3 4 4 7 7 7 7 7 7 7]
Predicted Sign Language for Fifth Gesture is Gesture for Number 7


Gesture Recognition completed.
