In [1]:
pip install mediapipe opencv-python tensorflow


Note: you may need to restart the kernel to use updated packages.


In [13]:
import mediapipe as mp
import cv2
import os
import csv

# Mediapipe hands module setup
# MediaPipe hands module setup with advanced configurations
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,  # Set to False for video processing
    max_num_hands=2,
    model_complexity=1,  # 0=Faster/less accurate, 1=Balanced, 2=More accurate/slower
    min_detection_confidence=0.7,  # Minimum confidence for hand detection
    min_tracking_confidence=0.5,   # Minimum confidence for hand tracking
)
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

# Customize drawing specifications
drawing_spec = mp_drawing.DrawingSpec(
    color=(0, 255, 0),  # Green color for landmarks
    thickness=2,
    circle_radius=2
)

connection_spec = mp_drawing.DrawingSpec(
    color=(255, 0, 0),  # Red color for connections
    thickness=2
)


# Path to your dataset
data_path = "./original_images"

# CSV file to save the landmarks
csv_file = "C:/Users/rs222/OneDrive/Desktop/Major_Project/nikki_version/colored_images/features.csv"
# Create or open the CSV file for writing landmarks
with open(csv_file, mode='w', newline='') as f:
    csv_writer = csv.writer(f)
    
    # Write the header (21 landmarks x 2 coordinates + label)
    header = [f'x{i}' for i in range(21)] + [f'y{i}' for i in range(21)] + [f'z{i}' for i in range(21)]
    header.append('label')
    csv_writer.writerow(header)

    # Loop through each subfolder (0-9, A-Z)
    for label in os.listdir(data_path):
        folder_path = os.path.join(data_path, label)
        for img_file in os.listdir(folder_path):
            img_path = os.path.join(folder_path, img_file)
            image = cv2.imread(img_path)
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            # Process the image to get hand landmarks
            result = hands.process(image_rgb)

            if result.multi_hand_landmarks:
                for hand_landmarks in result.multi_hand_landmarks:
                    # Extract the 21 hand landmark points
                    landmarks = []
                    for landmark in hand_landmarks.landmark:
                        landmarks.append(landmark.x)
                        landmarks.append(landmark.y)
                        landmarks.append(landmark.z)
                    
                    # Add label to the landmarks
                    landmarks.append(label)
                    
                    # Write the landmarks and label to CSV
                    csv_writer.writerow(landmarks)

print("Landmark extraction completed and saved to CSV.")


Landmark extraction completed and saved to CSV.


In [4]:
pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.5.2-cp39-cp39-win_amd64.whl.metadata (13 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.5.2-cp39-cp39-win_amd64.whl (11.0 MB)
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
    --------------------------------------- 0.3/11.0 MB ? eta -:--:--
   -- ------------------------------------- 0.8/11.0 MB 2.2 MB/s eta 0:00:05
   --- ------------------------------------ 1.0/11.0 MB 2.2 MB/s eta 0:00:05
   ----- ---------------------------------- 1.6/11.0 MB 2.1 MB/s eta 0:00:05
   ------- -------------------------------- 2.1/11.0 MB 2.1 MB/s eta 0:00:05
   -------- ------------------------------- 2.4/11.0 MB 2.0 MB/s eta 0:00:05
   --------- ------------------------------ 2.6/11.0 MB 1.9 MB/s eta 0:00:05
   ---


[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [18]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

# Load the landmarks dataset from CSV
# Load the landmarks dataset from CSV with explicit settings
data = pd.read_csv(
    "C:/Users/rs222/OneDrive/Desktop/Major_Project/nikki_version/colored_images/features.csv",
    low_memory=False,  # Prevents dtype warnings
    dtype={
        # Specify all feature columns as float
        **{f'x{i}': np.float32 for i in range(21)},
        **{f'y{i}': np.float32 for i in range(21)},
        **{f'z{i}': np.float32 for i in range(21)},
        # Specify label column as string/object
        'label': str
    }
)
# Separate features and labels
X = data.iloc[:, :-1].values  # All columns except the last (features)
y = data.iloc[:, -1].values   # Last column (label)

# Convert labels to one-hot encoding
label_binarizer = LabelBinarizer()
y = label_binarizer.fit_transform(y)
print(y)

[[1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]]


In [19]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

# Load the landmarks dataset from CSV
# Load the landmarks dataset from CSV with explicit settings
data = pd.read_csv(
    "C:/Users/rs222/OneDrive/Desktop/Major_Project/nikki_version/colored_images/features.csv",
    low_memory=False,  # Prevents dtype warnings
    dtype={
        # Specify all feature columns as float
        **{f'x{i}': np.float32 for i in range(21)},
        **{f'y{i}': np.float32 for i in range(21)},
        **{f'z{i}': np.float32 for i in range(21)},
        # Specify label column as string/object
        'label': str
    }
)
# Separate features and labels
X = data.iloc[:, :-1].values  # All columns except the last (features)
y = data.iloc[:, -1].values   # Last column (label)

# Convert labels to one-hot encoding
label_binarizer = LabelBinarizer()
y = label_binarizer.fit_transform(y)
print(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=500)

# Create the CNN model (or a simpler dense model)
model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_shape=(63,)),  # 21 landmarks * 2 coordinates (x, y)
    tf.keras.layers.Dense(128, activation='swish'),
    tf.keras.layers.Dense(36, activation='softmax')  # 36 classes (0-9 and A-Z)
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20)

# Save the trained model
model.save('sign_language_model.h5')




[[1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]]
Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1103/1103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.3876 - loss: 2.1063 - val_accuracy: 0.7469 - val_loss: 0.7498
Epoch 2/20
[1m1103/1103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7736 - loss: 0.6876 - val_accuracy: 0.8361 - val_loss: 0.4979
Epoch 3/20
[1m1103/1103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8323 - loss: 0.4958 - val_accuracy: 0.8438 - val_loss: 0.4378
Epoch 4/20
[1m1103/1103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8666 - loss: 0.3942 - val_accuracy: 0.8677 - val_loss: 0.3515
Epoch 5/20
[1m1103/1103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8831 - loss: 0.3345 - val_accuracy: 0.8944 - val_loss: 0.2959
Epoch 6/20
[1m1103/1103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8999 - loss: 0.2940 - val_accuracy: 0.8856 - val_loss: 0.3010
Epoch 7/20
[1m1103/1103[0



In [14]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf

# Load Mediapipe Hands and the trained model
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

model = tf.keras.models.load_model('sign_language_model.h5')

# Label map (0-9, A-Z)
labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')

# Webcam input
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        continue

    # Flip the frame horizontally for a later selfie-view display
    frame = cv2.flip(frame, 1)
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame with Mediapipe
    result = hands.process(image_rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Extract the landmarks
            landmarks = []
            for landmark in hand_landmarks.landmark:
                landmarks.append(landmark.x)
                landmarks.append(landmark.y)

            # Convert the landmarks to numpy array and predict
            landmarks = np.array(landmarks).flatten().reshape(1, -1)
            prediction = model.predict(landmarks)
            class_id = np.argmax(prediction)
            predicted_label = labels[class_id]

            # Display the predicted label on the screen
            cv2.putText(frame, predicted_label, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            # Draw the hand landmarks on the frame
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    cv2.imshow('Sign Language Detection', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break





: 

In [1]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from collections import deque, Counter

# Load Mediapipe Hands and the trained model
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

model = tf.keras.models.load_model('sign_language_model.h5')

# Label map (0-9, A-Z)
labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')

# Webcam input
cap = cv2.VideoCapture(0)

# Buffers for debouncing and word formation
letter_buffer = deque(maxlen=15)  # Buffer to store letters for word formation
prediction_buffer = deque(maxlen=10)  # Buffer to smooth predictions over time
word = ''  # To hold the current word being formed
sentence = ''  # Sentence starts as empty and gets dynamically built
last_letter = ''  # To store the last letter to avoid duplicates
cooldown_counter = 0  # Counter for cooldown to avoid rapid repeated inputs
no_gesture_counter = 0  # Counter to detect no gestures, which can trigger word separation
stabilization_counter = 0  # Counter to ensure a stable prediction before accepting it

def process_gesture_input(predicted_label):
    global word, sentence, last_letter, cooldown_counter, no_gesture_counter, stabilization_counter

    # Check if the letter has changed and add a cooldown to prevent fast repetitions
    if predicted_label != last_letter and cooldown_counter == 0:
        if predicted_label == 'SPACE':  # Assuming you have a gesture for space
            sentence += word + ' '  # Append the current word with space to the sentence
            word = ''  # Clear the word after adding it to the sentence
        elif predicted_label == 'DELETE':  # Assuming you have a gesture for delete/backspace
            word = word[:-1]  # Remove the last character from the current word
        elif predicted_label == 'END':  # Assuming a gesture for ending a sentence
            sentence += word + '. '  # Add a period at the end of the sentence
            word = ''  # Reset the word buffer after completing the sentence
        else:
            word += predicted_label  # Add letter to the word
            last_letter = predicted_label  # Update last letter to avoid duplicates
            cooldown_counter = 10  # Start cooldown period to prevent duplicates

    # Optional: Show real-time word and sentence in terminal (for debugging)
    print(f"Current word: {word}")
    print(f"Current sentence: {sentence}")

def get_most_common_prediction(predictions):
    """Returns the most common prediction in the buffer."""
    if predictions:
        common_prediction = Counter(predictions).most_common(1)[0][0]
        return common_prediction
    return ''

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        continue

    # Flip the frame horizontally for a later selfie-view display
    frame = cv2.flip(frame, 1)
    image_rgb1 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image_rgb  =cv2.resize(image_rgb1, (250,250))
    # Process the frame with Mediapipe
    result = hands.process(image_rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Reset no_gesture_counter when a gesture is detected
            no_gesture_counter = 0

            # Extract the landmarks
            landmarks = []
            for landmark in hand_landmarks.landmark:
                landmarks.append(landmark.x)
                landmarks.append(landmark.y)
                landmarks.append(landmark.z)

            # Convert the landmarks to numpy array and predict
            landmarks = np.array(landmarks).flatten().reshape(1, -1)
            prediction = model.predict(landmarks)
            class_id = np.argmax(prediction)
            predicted_label = labels[class_id]

            # Add prediction to the buffer
            prediction_buffer.append(predicted_label)

            # Get the most common prediction from the buffer
            final_prediction = get_most_common_prediction(prediction_buffer)

            # Stabilization logic: Only accept a prediction if it appears consistently
            # over a few frames to avoid random initial noise
            if final_prediction != '':
                if prediction_buffer.count(final_prediction) >= 7:  # Example threshold: 7 stable predictions
                    stabilization_counter += 1
                else:
                    stabilization_counter = 0

                # Accept the prediction only if the stabilization counter is sufficient
                if stabilization_counter > 2:  # Example threshold: 2 stabilized frames
                    process_gesture_input(final_prediction)
                    stabilization_counter = 0  # Reset stabilization counter after accepting a prediction

            # Reduce the cooldown counter
            if cooldown_counter > 0:
                cooldown_counter -= 1

            # Display the predicted label, current word, and sentence
            cv2.putText(frame, f"Letter: {final_prediction}", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(frame, f"Word: {word}", (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
            cv2.putText(frame, f"Sentence: {sentence}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)

            # Draw the hand landmarks on the frame
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    else:
        # Increment no_gesture_counter when no gesture is detected
        no_gesture_counter += 1
        if no_gesture_counter > 30:  # Example threshold for word separation
            # Assume a word is completed if no gesture is seen for a certain number of frames
            if word != '':
                sentence += word + ' '  # Append word to the sentence with space
                word = ''  # Clear the word buffer after appending it to the sentence
            no_gesture_counter = 0  # Reset the counter for the next word

    # Show the frame
    cv2.imshow('Sign Language Detection', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Current word: 5
Current sentence: 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Current word: 5
Current sentence: 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms

KeyboardInterrupt: 

In [None]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from collections import deque, Counter

print(f"OpenCV version: {cv2.__version__}")
print(f"NumPy version: {np.__version__}")
print(f"TensorFlow version: {tf.__version__}")
print(f"Mediapipe version: {mp.__version__}")

# Load Mediapipe Hands and the trained model
# MediaPipe hands module setup with advanced configurations
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,  # Set to False for video processing
    max_num_hands=2,
    model_complexity=1,  # 0=Faster/less accurate, 1=Balanced, 2=More accurate/slower
    min_detection_confidence=0.7,  # Minimum confidence for hand detection
    min_tracking_confidence=0.5,   # Minimum confidence for hand tracking
)
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

# Customize drawing specifications
drawing_spec = mp_drawing.DrawingSpec(
    color=(0, 255, 0),  # Green color for landmarks
    thickness=2,
    circle_radius=2
)

connection_spec = mp_drawing.DrawingSpec(
    color=(255, 0, 0),  # Red color for connections
    thickness=2
)
mp_drawing = mp.solutions.drawing_utils

model = tf.keras.models.load_model('sign_language_model.h5')

# Label map (0-9, A-Z)
labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')

# Webcam input
cap = cv2.VideoCapture(0)

# Buffers for debouncing and word formation
letter_buffer = deque(maxlen=15)  # Buffer to store letters for word formation
prediction_buffer = deque(maxlen=10)  # Buffer to smooth predictions over time
word = ''  # To hold the current word being formed
sentence = ''  # Sentence starts as empty and gets dynamically built
last_letter = ''  # To store the last letter to avoid duplicates
cooldown_counter = 0  # Counter for cooldown to avoid rapid repeated inputs
no_gesture_counter = 0  # Counter to detect no gestures, which can trigger word separation
stabilization_counter = 0  # Counter to ensure a stable prediction before accepting it

def process_gesture_input(predicted_label):
    global word, sentence, last_letter, cooldown_counter, no_gesture_counter, stabilization_counter

    # Check if the letter has changed and add a cooldown to prevent fast repetitions
    if predicted_label != last_letter and cooldown_counter == 0:
        if predicted_label == 'SPACE':  # Assuming you have a gesture for space
            sentence += word + ' '  # Append the current word with space to the sentence
            word = ''  # Clear the word after adding it to the sentence
        elif predicted_label == 'DELETE':  # Assuming you have a gesture for delete/backspace
            word = word[:-1]  # Remove the last character from the current word
        elif predicted_label == 'END':  # Assuming a gesture for ending a sentence
            sentence += word + '. '  # Add a period at the end of the sentence
            word = ''  # Reset the word buffer after completing the sentence
        else:
            word += predicted_label  # Add letter to the word
            last_letter = predicted_label  # Update last letter to avoid duplicates
            cooldown_counter = 10  # Start cooldown period to prevent duplicates

    # Optional: Show real-time word and sentence in terminal (for debugging)
    print(f"Current word: {word}")
    print(f"Current sentence: {sentence}")

def preprocess_frame(frame):
    """Preprocess the frame before landmark extraction"""
    # Convert to grayscale for better contrast
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur to reduce noise
    blurred = cv2.GaussianBlur(gray, (7, 7), 0)
    
    # Apply adaptive thresholding to handle different lighting conditions
    thresh = cv2.adaptiveThreshold(
        blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
        cv2.THRESH_BINARY_INV, 11, 2
    )
    
    # Enhance contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(gray)
    
    return enhanced

def get_most_common_prediction(predictions):
    """Returns the most common prediction in the buffer."""
    if predictions:
        common_prediction = Counter(predictions).most_common(1)[0][0]
        return common_prediction
    return ''

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        continue

    # Flip the frame horizontally for a later selfie-view display
    frame = cv2.flip(frame, 1)
    processed_frame = preprocess_frame(frame)

    image_rgb1 = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
    image_rgb  =cv2.resize(image_rgb1, (250,250))
    # Process the frame with Mediapipe
    result = hands.process(image_rgb)
    
    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Reset no_gesture_counter when a gesture is detected
            no_gesture_counter = 0

            # Extract the landmarks
            landmarks = []
            for landmark in hand_landmarks.landmark:
                landmarks.append(landmark.x)
                landmarks.append(landmark.y)
                landmarks.append(landmark.z)

            # Convert the landmarks to numpy array and predict
            landmarks = np.array(landmarks).flatten().reshape(1, -1)
            prediction = model.predict(landmarks)
            class_id = np.argmax(prediction)
            predicted_label = labels[class_id]

            # Add prediction to the buffer
            prediction_buffer.append(predicted_label)

            # Get the most common prediction from the buffer
            final_prediction = get_most_common_prediction(prediction_buffer)

            # Stabilization logic: Only accept a prediction if it appears consistently
            # over a few frames to avoid random initial noise
            if final_prediction != '':
                if prediction_buffer.count(final_prediction) >= 7:  # Example threshold: 7 stable predictions
                    stabilization_counter += 1
                else:
                    stabilization_counter = 0

                # Accept the prediction only if the stabilization counter is sufficient
                if stabilization_counter > 2:  # Example threshold: 2 stabilized frames
                    process_gesture_input(final_prediction)
                    stabilization_counter = 0  # Reset stabilization counter after accepting a prediction

            # Reduce the cooldown counter
            if cooldown_counter > 0:
                cooldown_counter -= 1

            # Display the predicted label, current word, and sentence
            cv2.putText(frame, f"Letter: {final_prediction}", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(frame, f"Word: {word}", (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
            cv2.putText(frame, f"Sentence: {sentence}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)

            # Draw the hand landmarks on the frame
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    else:
        # Increment no_gesture_counter when no gesture is detected
        no_gesture_counter += 1
        if no_gesture_counter > 30:  # Example threshold for word separation
            # Assume a word is completed if no gesture is seen for a certain number of frames
            if word != '':
                sentence += word + ' '  # Append word to the sentence with space
                word = ''  # Clear the word buffer after appending it to the sentence
            no_gesture_counter = 0  # Reset the counter for the next word

    # Show the frame
    cv2.imshow('Sign Language Detection', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


OpenCV version: 4.10.0


AttributeError: module 'numpy' has no attribute '_version_'