# Loading the Data

In [14]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Set the path to your dataset directory
DATA_PATH = 'Basic_Words'
actions = np.array(['Alright', 'Hello', 'Indian', 'Namaste', 'Sign'])  # update with your actual folder names
no_of_frames = 30  # as given

# Prepare lists for data and labels
sequences, labels = [], []

# Loop through each action folder
for action_idx, action in enumerate(actions):
    action_path = os.path.join(DATA_PATH, action)
    # Loop through each video folder inside the action folder
    for video in os.listdir(action_path):
        video_path = os.path.join(action_path, video)
        if not os.path.isdir(video_path):
            continue
        sequence = []
        # Sort file names to ensure frames are in order
        frame_files = sorted(os.listdir(video_path))
        for frame_num, frame_file in enumerate(frame_files):
            if frame_file.endswith('.npy'):
                frame_path = os.path.join(video_path, frame_file)
                frame = np.load(frame_path)
                sequence.append(frame)
        # Ensure we have the desired number of frames
        if len(sequence) == no_of_frames:
            sequences.append(sequence)
            labels.append(action_idx)

# Convert to numpy arrays
X = np.array(sequences)  # Shape: (num_samples, 30, 1662)
y = to_categorical(labels, num_classes=len(actions))

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

print("Training data shape:", X_train.shape)
print("Test data shape:", X_test.shape)

Training data shape: (450, 30, 1662)
Test data shape: (50, 30, 1662)


# Transformer Model Architecture

In [17]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LayerNormalization, Dropout, GlobalAveragePooling1D
from tensorflow.keras.models import Model

def positional_encoding(sequence_length, d_model):
    # Create a positional encoding as a learnable parameter or use sinusoidal formulas.
    # Here we create a simple learnable positional embedding.
    pos_embedding = tf.Variable(tf.random.normal([sequence_length, d_model]), trainable=True)
    return pos_embedding

def transformer_block(inputs, head_size, num_heads, ff_dim, dropout=0.1):
    # Multi-head attention layer
    attn_output = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=head_size)(inputs, inputs)
    attn_output = Dropout(dropout)(attn_output)
    out1 = LayerNormalization(epsilon=1e-6)(inputs + attn_output)
    
    # Feed-forward layer
    ffn_output = Dense(ff_dim, activation='relu')(out1)
    ffn_output = Dense(inputs.shape[-1])(ffn_output)
    ffn_output = Dropout(dropout)(ffn_output)
    out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output)
    return out2

def build_transformer_model(sequence_length=30, feature_dim=1662, projection_dim=128,
                            head_size=32, num_heads=4, ff_dim=128, num_transformer_blocks=2,
                            dropout=0.1, num_classes=5):
    # Input layer
    inputs = Input(shape=(sequence_length, feature_dim))
    
    # Project to a lower dimension for computational efficiency
    x = Dense(projection_dim)(inputs)
    
    # Add positional encoding
    pos_encoding = positional_encoding(sequence_length, projection_dim)
    x = x + pos_encoding  # broadcasting over the batch dimension
    
    # Add transformer blocks
    for _ in range(num_transformer_blocks):
        x = transformer_block(x, head_size, num_heads, ff_dim, dropout)
    
    # Global pooling over time axis
    x = GlobalAveragePooling1D()(x)
    
    # Classification dense layers
    x = Dense(64, activation='relu')(x)
    x = Dropout(dropout)(x)
    x = Dense(32, activation='relu')(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs, outputs)
    return model

# Build and compile the model
model = build_transformer_model(sequence_length=no_of_frames, feature_dim=1662,
                                projection_dim=128, head_size=32, num_heads=4, ff_dim=128,
                                num_transformer_blocks=2, dropout=0.1, num_classes=len(actions))
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()


# Model Training

In [20]:
# Training parameters
epochs = 30
batch_size = 8  # Keep it small to reduce memory usage

# Train the model
history = model.fit(X_train, y_train, 
                    validation_data=(X_test, y_test),
                    epochs=epochs,
                    batch_size=batch_size)

# Save your model if training goes well
model.save('isl_transformer_model.h5')


Epoch 1/30
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 24ms/step - accuracy: 0.2297 - loss: 1.6287 - val_accuracy: 0.4400 - val_loss: 1.5534
Epoch 2/30
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.2644 - loss: 1.5437 - val_accuracy: 0.3600 - val_loss: 1.4597
Epoch 3/30
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.4493 - loss: 1.3791 - val_accuracy: 0.5000 - val_loss: 1.2705
Epoch 4/30
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.5472 - loss: 1.1751 - val_accuracy: 0.4800 - val_loss: 1.1059
Epoch 5/30
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.5169 - loss: 1.0822 - val_accuracy: 0.5800 - val_loss: 0.9529
Epoch 6/30
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6066 - loss: 0.9046 - val_accuracy: 0.6800 - val_loss: 0.8262
Epoch 7/30
[1m57/57[0m [32m━━━━



# Real-Time Testing / Inference

In [25]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention
from tensorflow.keras.models import Model
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random

In [27]:
import cv2
import mediapipe as mp

# Load the trained model
model = tf.keras.models.load_model('isl_transformer_model.h5')

# Initialize MediaPipe Hands or Pose (depending on your keypoint extraction)
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.5)

# A helper function to extract keypoints from a frame
def extract_keypoints(frame):
    # Convert the frame to RGB
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image)
    keypoints = np.zeros(1662)  # Adjust based on your MediaPipe extraction details
    if results.multi_hand_landmarks:
        # Flatten landmarks (example: this is illustrative; adjust based on actual extraction)
        keypoints = np.array([[lm.x, lm.y, lm.z] for hand_landmarks in results.multi_hand_landmarks for lm in hand_landmarks.landmark]).flatten()
    return keypoints

# Variables for real-time processing
sequence = []  # to store 30 frames
predicted_text = ''

cap = cv2.VideoCapture(0)  # open webcam
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Extract keypoints
    keypoints = extract_keypoints(frame)
    # Resize or pad keypoints if necessary to ensure length = 1662
    if keypoints.shape[0] != 1662:
        # For instance, pad with zeros
        keypoints = np.pad(keypoints, (0, 1662 - keypoints.shape[0]), 'constant')
    
    # Append the keypoints frame to our sequence
    sequence.append(keypoints)
    
    # Ensure we only keep the last 30 frames
    if len(sequence) > no_of_frames:
        sequence = sequence[-no_of_frames:]
    
    # When we have a full sequence, make a prediction
    if len(sequence) == no_of_frames:
        # Expand dims to match model input shape: (1, 30, 1662)
        input_sequence = np.expand_dims(sequence, axis=0)
        prediction = model.predict(input_sequence)
        predicted_class = np.argmax(prediction)
        predicted_text = actions[predicted_class]
    
    # Display the prediction on the frame
    cv2.putText(frame, f'Prediction: {predicted_text}', (10, 30), 
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)
    cv2.imshow('ISL Real-time Prediction', frame)
    
    # Break loop with 'q' key
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

TypeError: too many positional arguments

# testing 

In [47]:
model.load_weights('isl_transformer_model.h5')

In [49]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [51]:
yhat = model.predict(X_test)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


In [53]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [55]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[34,  1],
        [ 0, 15]],

       [[43,  0],
        [ 0,  7]],

       [[47,  0],
        [ 0,  3]],

       [[38,  0],
        [ 1, 11]],

       [[36,  1],
        [ 1, 12]]], dtype=int64)

In [57]:
accuracy_score(ytrue, yhat)

0.96

# Testing in real time

## Mediapipe detection

In [67]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB

    image.flags.writeable = False  # Mark image as not writeable for performance
    results = model.process(image)  # Make predictions
    image.flags.writeable = True  # Mark image as writeable again
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # Convert back to BGR
    return image, results

In [73]:
def draw_styled_landmarks(image,
                          results):  # this function is just for funsies and change the color of the lines and dots. There is no compulson to use this function
    if results.face_landmarks:
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                                  mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                                  mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1,
                                                         circle_radius=1))  # Draw face mesh
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2,
                                                         circle_radius=2))  # Draw pose connections
    if results.left_hand_landmarks:
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2,
                                                         circle_radius=2))  # Draw left hand connections
    if results.right_hand_landmarks:
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2,
                                                         circle_radius=2))  # Draw right hand connections

In [77]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in
                     results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33 * 4)
    face = np.array([[res.x, res.y, res.z] for res in
                     results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468 * 3)
    lh = np.array([[res.x, res.y, res.z] for res in
                   results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(
        21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in
                   results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(
        21 * 3)
    return np.concatenate([pose, face, lh, rh])

In [60]:
from scipy import stats

In [62]:
plt.figure(figsize=(18,18))
plt.imshow(prob_viz(res, actions, image, colors))

NameError: name 'prob_viz' is not defined

<Figure size 1800x1800 with 0 Axes>

In [86]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

# Assume these are predefined
actions = ['Alright', 'Hello', 'Indian', 'Namaste', 'Sign']  # Example actions
colors = [(245, 117, 16)] * len(actions)  # Create enough colors for each action

# Load pre-trained model (assumed as 'model')
# model = ...

# Set up Mediapipe Holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        #print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]
 
            # Viz probabilities
            #image = prob_viz(res, actions, image, colors)
            
        # Display sentence on screen
        cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3, 30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Indian
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Indian
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Indian
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Indian
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Namaste
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Namaste
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Namaste
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Namaste
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Namaste
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Namaste
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Namaste
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Namaste
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

# Translation

In [89]:
pip install googletrans==4.0.0-rc1

Defaulting to user installation because normal site-packages is not writeable
Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2025.1.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting httpcore==0.9.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Down



In [91]:
from googletrans import Translator

def translate_text(text, target_lang):
    translator = Translator()
    translated = translator.translate(text, dest=target_lang)
    return translated.text

# Example usage
generated_text = "Hello, how are you?"

# Convert to Gujarati
gujarati_text = translate_text(generated_text, 'gu')
print("Gujarati:", gujarati_text)

# Convert to Hindi
hindi_text = translate_text(generated_text, 'hi')
print("Hindi:", hindi_text)

Gujarati: હેલો, તમે કેમ છો?
Hindi: नमस्ते, आप कैसे हैं?


# updated real time testing code

In [98]:
pip install opencv-python numpy mediapipe googletrans==4.0.0-rc1 pillow

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


## Load Fonts in Your Python Script

In [103]:
from PIL import ImageFont, ImageDraw, Image
import cv2
import numpy as np

# Load fonts for Gujarati & Hindi
gujarati_font = ImageFont.truetype("NotoSansGujarati-Regular.ttf", 28)
hindi_font = ImageFont.truetype("NotoSansDevanagari-Regular.ttf", 28)

# Create a blank image
image = np.zeros((200, 600, 3), dtype=np.uint8)  # Black background
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB for PIL

# Convert OpenCV image to PIL image
pil_img = Image.fromarray(image)
draw = ImageDraw.Draw(pil_img)

# Text to Display
english_text = "Hello"
gujarati_text = "હેલો"
hindi_text = "नमस्ते"

# Draw Text on Image
draw.text((50, 50), "EN: " + english_text, (255, 255, 255), font=gujarati_font)
draw.text((50, 100), "GU: " + gujarati_text, (255, 255, 255), font=gujarati_font)
draw.text((50, 150), "HI: " + hindi_text, (255, 255, 255), font=hindi_font)

# Convert back to OpenCV image
image = np.array(pil_img)
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

# Show the image
cv2.imshow("Text Rendering Test", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

## Integrate This in Your Video Processing Code

In [None]:
import cv2
import numpy as np
import mediapipe as mp
from googletrans import Translator
import threading
from PIL import ImageFont, ImageDraw, Image

# Initialize Google Translator
translator = Translator()

# Load fonts for Gujarati & Hindi
gujarati_font = ImageFont.truetype("NotoSansGujarati-Regular.ttf", 28)
hindi_font = ImageFont.truetype("NotoSansDevanagari-Regular.ttf", 28)

# New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

# Predefined sign language actions
actions = ['Alright', 'Hello', 'Indian', 'Namaste', 'Sign']  
colors = [(245, 117, 16)] * len(actions)  

# Load pre-trained model
# model = ...

# Set up Mediapipe Holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Global translation variables
translated_gujarati = ""
translated_hindi = ""

# Function to translate text in a separate thread
def translate_text(text):
    global translated_gujarati, translated_hindi
    translated_gujarati = translator.translate(text, dest='gu').text
    translated_hindi = translator.translate(text, dest='hi').text

# OpenCV Webcam Capture
cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            detected_text = actions[np.argmax(res)]
            print("Detected:", detected_text)
            
            predictions.append(np.argmax(res))
            
            # Ensure stable predictions
            if np.unique(predictions[-10:])[0] == np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if detected_text != sentence[-1]:
                            sentence.append(detected_text)
                            threading.Thread(target=translate_text, args=(' '.join(sentence),)).start()
                    else:
                        sentence.append(detected_text)
                        threading.Thread(target=translate_text, args=(' '.join(sentence),)).start()

            if len(sentence) > 5: 
                sentence = sentence[-5:]

        # **Display Sentence on Screen using PIL for Gujarati & Hindi Support**
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB for PIL
        pil_img = Image.fromarray(image)
        draw = ImageDraw.Draw(pil_img)
        
        # Draw text
        draw.text((10, 30), "EN: " + ' '.join(sentence), (0, 0, 0), font=gujarati_font)
        draw.text((10, 70), "GU: " + translated_gujarati, (0, 0, 0), font=gujarati_font)
        draw.text((10, 110), "HI: " + translated_hindi, (0, 0, 0), font=hindi_font)
        
        # Convert back to OpenCV image
        image = np.array(pil_img)
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Show to screen
        cv2.imshow('Real-Time Sign Language Translation', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Detected: Hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Detected: Hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Detected: Hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Detected: Hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Detected: Hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Detected: Hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Detected: Hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Detected: Hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Detected: Hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Detected: Hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Detected: Hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━