In [None]:
from tensorflow.keras.models import load_model

# load the model
model_path = r"C:\Users\shaga\Desktop\machn learning\sign_language_model.h5"  
model = load_model(model_path)

model.summary()



Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 512)               1573376   
                                                                 
 dense_5 (Dense)             (None, 256)               131328    
                                                                 
 dense_6 (Dense)             (None, 128)               32896     
                                                                 
 dense_7 (Dense)             (None, 30)                3870      
                                                                 
Total params: 1741470 (6.64 MB)
Trainable params: 1741470 (6.64 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
import joblib  
import time  

# load model
model_path = r" sign_language_model2.h5 = the FNN model with face and hand landmarks"  # update with your path
model = load_model(model_path)

# initialise mediapipe face and hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)

mp_draw = mp.solutions.drawing_utils

# load the scaler that was saved
scaler = joblib.load(r'update correct path where scaler is this is for hands and face so scaler is - scaler.pkl')  # Use the correct path

# List of the labels
labels = ["all", "before", "black", "book", "candy", "chair", "clothes", "computer", "cool", "cousin", 
          "deaf", "drink", "fine", "finish", "go", "help", "hot", "like", "many", "mother", "no", 
          "now", "orange", "table", "thanksgiving", "thin", "walk", "who", "year", "yes"]

# Function to flatten landmarks and ensure correct padding it was done when pre processed
def flatten_landmarks(landmarks, expected_length, missing_value=0.0):
    flattened = []
    if landmarks is None or len(landmarks) == 0: 
        flattened = [missing_value] * expected_length
    else:
        for landmark in landmarks:  
            flattened.extend([landmark.x, landmark.y, landmark.z])  # extract x, y, z

    # pad to the expected length 1530 here
    while len(flattened) < expected_length:
        flattened.append(missing_value)
    return flattened[:expected_length]

# set up video capture
cap = cv2.VideoCapture(0)  

# timer for the 3 second delay after each prediction
prediction_wait_time = 3
last_prediction_time = time.time()  # track the time of the last prediction

while cap.isOpened():
    ret, frame = cap.read()

    if not ret:
        break

    # frame must be converted to rgb since mediapipe works with that
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # process the frame with mediapipe
    hand_results = hands.process(rgb_frame)
    face_results = face_mesh.process(rgb_frame)

    # initialise landmarks
    right_hand_landmarks = None
    left_hand_landmarks = None
    face_landmarks = None

    # process hand landmarks
    if hand_results.multi_hand_landmarks:
        # go over detected hands
        for idx, hand_landmarks in enumerate(hand_results.multi_hand_landmarks):
            if idx == 0:  # this assumes the first detected hand is the right hand
                right_hand_landmarks = flatten_landmarks(hand_landmarks.landmark, expected_length=63) # 21 landmarks * 3 thats why 63
            elif idx == 1:  # this assumes the second detected hand is the left hand
                left_hand_landmarks = flatten_landmarks(hand_landmarks.landmark, expected_length=63)  # 21 landmarks * 3 thats why 63

    # Process face landmarks
    if face_results.multi_face_landmarks:
        face_landmarks = flatten_landmarks(face_results.multi_face_landmarks[0].landmark, expected_length=1404)  # 468 landmarks * 3 thats why 1404

    # makes sure the feature vector is properly mapped and has length 1530 since 126 + 126 + 1404 = 1530
    features = []
    features.extend(right_hand_landmarks if right_hand_landmarks else [0] * 63)
    features.extend(left_hand_landmarks if left_hand_landmarks else [0] * 63)
    features.extend(face_landmarks if face_landmarks else [0] * 1404)

    # check if the feature vector is the correct length 1530
    if len(features) != 1530:
        print(f"Warning: The feature vector length is {len(features)}, expected 1530.")

    # scale the features using the pre-fitted scaler
    scaled_features = scaler.transform([features])[0]
    predicted_label = "Waiting for prediction"
    predicted_class = -1
    # check if enough time has passed since the last prediction
    if (time.time() - last_prediction_time) >= prediction_wait_time:
        
        # make prediction
        prediction = model.predict(np.array([scaled_features]))
        predicted_class = np.argmax(prediction)  # and get the class index with the highest probability

        # map the predicted class index to the correct label
        predicted_label = labels[predicted_class]

        # print the predicted label and its confidence
        print(f"Predicted Label: {predicted_label}, Confidence: {prediction[0][predicted_class]:.2f}")

        # update the time of the last prediction
        last_prediction_time = time.time()

    # Countdown timer before the next prediction
    remaining_time = prediction_wait_time - int(time.time() - last_prediction_time)

    # Get frame dimensions
    height, width, _ = frame.shape

    # Calculate position for the countdown text
    countdown_position = (20, 40)  # Top-left position
    font_scale = 0.8
    font_thickness = 2
    font = cv2.FONT_HERSHEY_SIMPLEX

    # Display countdown
    cv2.putText(frame, f"Next Prediction in {remaining_time}s", countdown_position, font, font_scale, (0, 255, 0), font_thickness)

    # Display predicted label and class number at bottom-right
    predicted_label_position = (width - 300, height - 50)
    predicted_class_number_position = (width - 300, height - 100)

    cv2.putText(frame, f"Predicted: {predicted_label}", predicted_label_position, font, 1, (0, 255, 0), font_thickness)
    cv2.putText(frame, f"Class No: {predicted_class}", predicted_class_number_position, font, 1, (0, 255, 0), font_thickness)

    # Draw hand landmarks on the frame
    if hand_results.multi_hand_landmarks:
        for hand_landmark in hand_results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmark, mp_hands.HAND_CONNECTIONS)

    # Draw face landmarks on the frame
    if face_results.multi_face_landmarks:
        for face_landmark in face_results.multi_face_landmarks:
            mp_draw.draw_landmarks(frame, face_landmark, mp_face_mesh.FACEMESH_TESSELATION)

    # Show the frame (you can exit by pressing 'q')
    cv2.imshow("Hand and Face Tracking with Prediction", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv2.destroyAllWindows()


Predicted Label: before, Confidence: 1.00
Predicted Label: drink, Confidence: 0.93
Predicted Label: before, Confidence: 0.65
Predicted Label: before, Confidence: 1.00
Predicted Label: no, Confidence: 1.00
Predicted Label: before, Confidence: 0.84
Predicted Label: no, Confidence: 1.00
Predicted Label: no, Confidence: 1.00
Predicted Label: no, Confidence: 1.00
Predicted Label: who, Confidence: 0.99
Predicted Label: before, Confidence: 0.56
Predicted Label: before, Confidence: 1.00
Predicted Label: before, Confidence: 0.72
Predicted Label: who, Confidence: 0.88
Predicted Label: who, Confidence: 0.87
Predicted Label: who, Confidence: 0.90
Predicted Label: who, Confidence: 0.56
Predicted Label: before, Confidence: 0.65
Predicted Label: who, Confidence: 1.00
Predicted Label: who, Confidence: 0.80
Predicted Label: who, Confidence: 1.00
Predicted Label: no, Confidence: 1.00
Predicted Label: who, Confidence: 1.00
Predicted Label: who, Confidence: 0.93
Predicted Label: no, Confidence: 1.00
Predi

In [None]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
import joblib  
import time  

# load the trained model
model_path = r"update correct path where model is = FNN model with hands only landmarks"  # Update with your path
model = load_model(model_path)

# Initialize MediaPipe Hands model
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)

mp_draw = mp.solutions.drawing_utils

# Load the pre-saved scaler
scaler = joblib.load(r'scaler_hands_only.pkl')  # Use the correct path

# List of labels corresponding to your classes
labels = ["all", "before", "black", "book", "candy", "chair", "clothes", "computer", "cool", "cousin", 
          "deaf", "drink", "fine", "finish", "go", "help", "hot", "like", "many", "mother", "no", 
          "now", "orange", "table", "thanksgiving", "thin", "walk", "who", "year", "yes"]

# Function to flatten hand landmarks and ensure correct padding
def flatten_hand_landmarks(landmarks, expected_length, missing_value=0.0):
    flattened = []
    if landmarks is None or len(landmarks) == 0:  
        flattened = [missing_value] * expected_length
    else:
        for landmark in landmarks[:21]:  
            flattened.extend([landmark.x, landmark.y, landmark.z])  

    # Pad to the expected length if less than expected
    while len(flattened) < expected_length:
        flattened.append(missing_value)
    return flattened[:expected_length]

# Set up video capture
cap = cv2.VideoCapture(0) 

# Timer for the 3-second delay after each prediction
prediction_wait_time = 3
last_prediction_time = time.time()  

while cap.isOpened():
    ret, frame = cap.read()

    if not ret:
        break

    # Convert frame to RGB 
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process frame with MediaPipe Hands
    hand_results = hands.process(rgb_frame)

    # Initialize landmarks
    right_hand_landmarks = None
    left_hand_landmarks = None

    # Process hand landmarks
    if hand_results.multi_hand_landmarks:
        # Iterate over detected hands
        for idx, hand_landmarks in enumerate(hand_results.multi_hand_landmarks):
            if idx == 0:  
                right_hand_landmarks = flatten_hand_landmarks(hand_landmarks.landmark, expected_length=63)  # 21 landmarks for right hand
            elif idx == 1:  
                left_hand_landmarks = flatten_hand_landmarks(hand_landmarks.landmark, expected_length=63)  # 21 landmarks for left hand

    # Ensure feature vector is properly mapped and has length 126 (63 + 63)
    features = []
    features.extend(right_hand_landmarks if right_hand_landmarks else [0] * 63)
    features.extend(left_hand_landmarks if left_hand_landmarks else [0] * 63)

    # Check if the feature vector is the correct length (126)
    if len(features) != 126:
        print(f"Warning: The feature vector length is {len(features)}, expected 126.")

    # Scale the features using the pre-fitted scaler
    scaled_features = scaler.transform([features])[0]
    predicted_label = "Waiting for prediction"
    predicted_class = -1
    # Check if enough time has passed since the last prediction
    if (time.time() - last_prediction_time) >= prediction_wait_time:
        
        # Make prediction
        prediction = model.predict(np.array([scaled_features]))
        predicted_class = np.argmax(prediction)  # Gets the class index with the highest probability

        # Map the predicted class index to the coresponding label
        predicted_label = labels[predicted_class]

        # Print the predicted label and its confidence
        print(f"Predicted Label: {predicted_label}, Confidence: {prediction[0][predicted_class]:.2f}")

        # Update the time of the last predictiion
        last_prediction_time = time.time()

    # Countdown timer before the next prediction
    remaining_time = prediction_wait_time - int(time.time() - last_prediction_time)

    # Get frame dimensions
    height, width, _ = frame.shape

    # Calculate position for the countdown text
    countdown_position = (20, 40)  
    font_scale = 0.8
    font_thickness = 2
    font = cv2.FONT_HERSHEY_SIMPLEX

    # Display countdown
    cv2.putText(frame, f"Next Prediction in {remaining_time}s", countdown_position, font, font_scale, (0, 255, 0), font_thickness)

    # Display predicted label and class number at bottom-right
    predicted_label_position = (width - 300, height - 50)
    predicted_class_number_position = (width - 300, height - 100)

    cv2.putText(frame, f"Predicted: {predicted_label}", predicted_label_position, font, 1, (0, 255, 0), font_thickness)
    cv2.putText(frame, f"Class No: {predicted_class}", predicted_class_number_position, font, 1, (0, 255, 0), font_thickness)

    # Draw hand landmarks on the frame
    if hand_results.multi_hand_landmarks:
        for hand_landmark in hand_results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmark, mp_hands.HAND_CONNECTIONS)

    # Show the frame you can exit by pressing 'q'
    cv2.imshow("Hand Tracking with Prediction", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Predicted Label: go, Confidence: 0.12
Predicted Label: go, Confidence: 0.24
Predicted Label: computer, Confidence: 0.06
Predicted Label: computer, Confidence: 0.06
Predicted Label: computer, Confidence: 0.06
Predicted Label: finish, Confidence: 0.37
Predicted Label: go, Confidence: 0.50
Predicted Label: book, Confidence: 1.00
Predicted Label: book, Confidence: 1.00
Predicted Label: computer, Confidence: 0.06
Predicted Label: go, Confidence: 0.80
Predicted Label: finish, Confidence: 0.83
Predicted Label: finish, Confidence: 0.81
Predicted Label: finish, Confidence: 0.81
Predicted Label: finish, Confidence: 0.82
Predicted Label: finish, Confidence: 0.82
Predicted Label: finish, Confidence: 0.80
Predicted Label: finish, Confidence: 0.82
Predicted Label: finish, Confidence: 0.82
Predicted Label: help, Confidence: 0.57
Predicted Label: thanksgiving, Confidence: 0.90
Predicted Label: thanksgiving, Confidence: 0.44
Predicted Label: book, Confidence: 0.70
Predicted Label: book, Confidence: 0.8

RNN 

In [None]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
import joblib  
import time  

# Load the trained model
model_path = r"RNN.h5 model with face and hands landmarks"  # Update with your path
model = load_model(model_path)

# Initialize MediaPipe Hands and FaceMesh models
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)

mp_draw = mp.solutions.drawing_utils

# Load the pre-saved scaler
scaler = joblib.load(r'scaler.pkl - hand and face landamrks')  # Use the correct path

# List of labels
labels = ["all", "before", "black", "book", "candy", "chair", "clothes", "computer", "cool", "cousin", 
          "deaf", "drink", "fine", "finish", "go", "help", "hot", "like", "many", "mother", "no", 
          "now", "orange", "table", "thanksgiving", "thin", "walk", "who", "year", "yes"]

# Function to flatten hand landmarks and ensure correct padding
def flatten_hand_landmarks(landmarks, expected_length, missing_value=0.0):
    flattened = []
    if landmarks is None or len(landmarks) == 0:  
        flattened = [missing_value] * expected_length
    else:
        for landmark in landmarks[:21]: 
            flattened.extend([landmark.x, landmark.y, landmark.z])  # Extract x, y, z coordinates

    # Pad to the expected length if less than expected
    while len(flattened) < expected_length:
        flattened.append(missing_value)
    return flattened[:expected_length]

# Function to flatten face landmarks and ensure correct padding
def flatten_face_landmarks(landmarks, expected_length, missing_value=0.0):
    flattened = []
    if landmarks is None or len(landmarks) == 0:  
        flattened = [missing_value] * expected_length
    else:
        for landmark in landmarks:  
            flattened.extend([landmark.x, landmark.y, landmark.z])

    # Pad to the expected length if less than expected
    while len(flattened) < expected_length:
        flattened.append(missing_value)
    return flattened[:expected_length]

# Set up video capture
cap = cv2.VideoCapture(0)  # Use 0 for the default webcam

# Timer for the 3-second delay after each prediction
prediction_wait_time = 3
last_prediction_time = time.time()  # Track the time of the last prediction

# Store frames for accumulating 30 time steps
sequence_length = 30
frame_sequence = []  # Will store 30 frames of features

# Initialize default prediction values
predicted_label = "Waiting for prediction"
predicted_class = -1

while cap.isOpened():
    ret, frame = cap.read()

    if not ret:
        break

    # Convert frame to RGB 
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process frame with MediaPipe Hands and FaceMesh
    hand_results = hands.process(rgb_frame)
    face_results = face_mesh.process(rgb_frame)

    # Initialize landmarks
    right_hand_landmarks = None
    left_hand_landmarks = None
    face_landmarks = None

    # Process hand landmarks
    if hand_results.multi_hand_landmarks:
        # Iterate over detected hands
        for idx, hand_landmarks in enumerate(hand_results.multi_hand_landmarks):
            if idx == 0:  
                right_hand_landmarks = flatten_hand_landmarks(hand_landmarks.landmark, expected_length=63)  # 21 landmarks for right hand
            elif idx == 1:  
                left_hand_landmarks = flatten_hand_landmarks(hand_landmarks.landmark, expected_length=63)  # 21 landmarks for left hand

    # Process face landmarks
    if face_results.multi_face_landmarks:
        face_landmarks = flatten_face_landmarks(face_results.multi_face_landmarks[0].landmark, expected_length=1404)  # 468 landmarks * 3

    # Ensure feature vector is properly mapped and has length 1530 
    features = []
    features.extend(right_hand_landmarks if right_hand_landmarks else [0] * 63)
    features.extend(left_hand_landmarks if left_hand_landmarks else [0] * 63)
    features.extend(face_landmarks if face_landmarks else [0] * 1404)

    
    if len(features) != 1530:
        print(f"Warning: The feature vector length is {len(features)}, expected 1530.")

    
    frame_sequence.append(features)

    # If we have accumulated 30 frames (time steps), make a prediction
    if len(frame_sequence) >= sequence_length:
        # Scale the features using the pre-fitted scaler
        scaled_features = scaler.transform(frame_sequence)  

        # Reshape the sequence to (1, 30, 1530) for the model
        scaled_features_reshaped = np.reshape(scaled_features, (1, sequence_length, 1530))

        # Check if enough time has passed since the last prediction
        if (time.time() - last_prediction_time) >= prediction_wait_time:
            # Make prediction with the reshaped sequence
            prediction = model.predict(scaled_features_reshaped)
            predicted_class = np.argmax(prediction)  # Get the class index with the highest probability

            # Map the predicted class index to the corresponding label
            predicted_label = labels[predicted_class]

            # Print the predicted label and its confidence
            print(f"Predicted Label: {predicted_label}, Confidence: {prediction[0][predicted_class]:.2f}")

            # Update the time of the last prediction
            last_prediction_time = time.time()

        # After making a prediction, clear the sequence and start over
        frame_sequence = []

    # Countdown timer before the next prediction
    remaining_time = prediction_wait_time - int(time.time() - last_prediction_time)

    height, width, _ = frame.shape

    countdown_position = (20, 40) 
    font_scale = 0.8
    font_thickness = 2
    font = cv2.FONT_HERSHEY_SIMPLEX

    cv2.putText(frame, f"Next Prediction in {remaining_time}s", countdown_position, font, font_scale, (0, 255, 0), font_thickness)

    predicted_label_position = (width - 300, height - 50)
    predicted_class_number_position = (width - 300, height - 100)

    cv2.putText(frame, f"Predicted: {predicted_label}", predicted_label_position, font, 1, (0, 255, 0), font_thickness)
    cv2.putText(frame, f"Class No: {predicted_class}", predicted_class_number_position, font, 1, (0, 255, 0), font_thickness)

    if hand_results.multi_hand_landmarks:
        for hand_landmark in hand_results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmark, mp_hands.HAND_CONNECTIONS)

    if face_results.multi_face_landmarks:
        for face_landmark in face_results.multi_face_landmarks:
            mp_draw.draw_landmarks(frame, face_landmark, mp_face_mesh.FACEMESH_TESSELATION)

    cv2.imshow("Hand and Face Tracking with Prediction", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv2.destroyAllWindows()


Predicted Label: many, Confidence: 0.08
Predicted Label: many, Confidence: 0.08
Predicted Label: many, Confidence: 0.08
Predicted Label: many, Confidence: 0.08
Predicted Label: many, Confidence: 0.08
Predicted Label: many, Confidence: 0.08
Predicted Label: many, Confidence: 0.08
Predicted Label: many, Confidence: 0.08
Predicted Label: many, Confidence: 0.08
Predicted Label: many, Confidence: 0.08
Predicted Label: many, Confidence: 0.08
Predicted Label: many, Confidence: 0.08
Predicted Label: many, Confidence: 0.08
Predicted Label: many, Confidence: 0.07
Predicted Label: before, Confidence: 0.19
Predicted Label: before, Confidence: 0.14
Predicted Label: before, Confidence: 0.21
Predicted Label: before, Confidence: 0.22
Predicted Label: before, Confidence: 0.21
Predicted Label: before, Confidence: 0.21
Predicted Label: before, Confidence: 0.20
Predicted Label: before, Confidence: 0.21
Predicted Label: before, Confidence: 0.20
Predicted Label: before, Confidence: 0.21
Predicted Label: bef

In [None]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
import joblib  
import time 

# Load the trained model
model_path = r"RNN_no_face.h5"  # Update with your path
model = load_model(model_path)


mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)

mp_draw = mp.solutions.drawing_utils

scaler = joblib.load(r'scaler_hands_only.pkl')  # Use the correct path

# List of labels 
labels = ["all", "before", "black", "book", "candy", "chair", "clothes", "computer", "cool", "cousin", 
          "deaf", "drink", "fine", "finish", "go", "help", "hot", "like", "many", "mother", "no", 
          "now", "orange", "table", "thanksgiving", "thin", "walk", "who", "year", "yes"]

# Function to flatten hand landmarks and ensure correct padding
def flatten_hand_landmarks(landmarks, expected_length, missing_value=0.0):
    flattened = []
    if landmarks is None or len(landmarks) == 0:
        flattened = [missing_value] * expected_length
    else:
        for landmark in landmarks[:21]:  
            flattened.extend([landmark.x, landmark.y, landmark.z]) 

    while len(flattened) < expected_length:
        flattened.append(missing_value)
    return flattened[:expected_length]

cap = cv2.VideoCapture(0) 

prediction_wait_time = 3
last_prediction_time = time.time()  
sequence_length = 30
frame_sequence = []  

predicted_label = "Waiting for prediction"
predicted_class = -1

while cap.isOpened():
    ret, frame = cap.read()

    if not ret:
        break

    # Convert frame to RGB 
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    hand_results = hands.process(rgb_frame)

    # Initialize landmarks
    right_hand_landmarks = None
    left_hand_landmarks = None

    # Process hand landmarks
    if hand_results.multi_hand_landmarks:
        # Iterate over detected hands
        for idx, hand_landmarks in enumerate(hand_results.multi_hand_landmarks):
            if idx == 0:  
                right_hand_landmarks = flatten_hand_landmarks(hand_landmarks.landmark, expected_length=63)  # 21 landmarks for right hand
            elif idx == 1:  
                left_hand_landmarks = flatten_hand_landmarks(hand_landmarks.landmark, expected_length=63)  # 21 landmarks for left hand

    # Ensure feature vector is properly mapped and has length 126 (63 + 63)
    features = []
    features.extend(right_hand_landmarks if right_hand_landmarks else [0] * 63)
    features.extend(left_hand_landmarks if left_hand_landmarks else [0] * 63)

    # Check if the feature vector is the correct length (126)
    if len(features) != 126:
        print(f"Warning: The feature vector length is {len(features)}, expected 126.")

    # Add the current frame's features to the sequence
    frame_sequence.append(features)

    # If 30 frames gets collected, make a prediction
    if len(frame_sequence) >= sequence_length:
        # Scale the features using the pre-fitted scaler
        scaled_features = scaler.transform(frame_sequence) 

        # Reshape the sequence to (1, 30, 126) for the model
        scaled_features_reshaped = np.reshape(scaled_features, (1, sequence_length, 126))

        # Check if enough time has passed since the last prediction
        if (time.time() - last_prediction_time) >= prediction_wait_time:
            # Make prediction with the reshaped sequence
            prediction = model.predict(scaled_features_reshaped)
            predicted_class = np.argmax(prediction)  # Get the class index with the highest probability

            # Map the predicted class index to the corresponding label
            predicted_label = labels[predicted_class]

            # Print the predicted label and its confidence
            print(f"Predicted Label: {predicted_label}, Confidence: {prediction[0][predicted_class]:.2f}")

            # Update the time of the last prediction
            last_prediction_time = time.time()

        # After making a prediction, clear the sequence and start over
        frame_sequence = []

    # Countdown timer before the next prediction
    remaining_time = prediction_wait_time - int(time.time() - last_prediction_time)

    # Get frame dimensions
    height, width, _ = frame.shape

    # Calculate position for the countdown text
    countdown_position = (20, 40) 
    font_scale = 0.8
    font_thickness = 2
    font = cv2.FONT_HERSHEY_SIMPLEX

    # Display countdown
    cv2.putText(frame, f"Next Prediction in {remaining_time}s", countdown_position, font, font_scale, (0, 255, 0), font_thickness)

    # Display predicted label and class number at bottom-right
    predicted_label_position = (width - 300, height - 50)
    predicted_class_number_position = (width - 300, height - 100)

    cv2.putText(frame, f"Predicted: {predicted_label}", predicted_label_position, font, 1, (0, 255, 0), font_thickness)
    cv2.putText(frame, f"Class No: {predicted_class}", predicted_class_number_position, font, 1, (0, 255, 0), font_thickness)

    # Draw hand landmarks on the frame
    if hand_results.multi_hand_landmarks:
        for hand_landmark in hand_results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmark, mp_hands.HAND_CONNECTIONS)

    # Show the frame (you can exit by pressing 'q')
    cv2.imshow("Hand Tracking with Prediction", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Predicted Label: mother, Confidence: 0.06
Predicted Label: cousin, Confidence: 0.08
Predicted Label: computer, Confidence: 0.07
Predicted Label: computer, Confidence: 0.08
Predicted Label: computer, Confidence: 0.08
Predicted Label: computer, Confidence: 0.06
Predicted Label: computer, Confidence: 0.06
Predicted Label: computer, Confidence: 0.07
Predicted Label: computer, Confidence: 0.08
Predicted Label: help, Confidence: 0.06
Predicted Label: cousin, Confidence: 0.07
Predicted Label: many, Confidence: 0.05
Predicted Label: mother, Confidence: 0.05
Predicted Label: computer, Confidence: 0.08
Predicted Label: mother, Confidence: 0.06
Predicted Label: mother, Confidence: 0.05
Predicted Label: mother, Confidence: 0.05
Predicted Label: help, Confidence: 0.06
Predicted Label: computer, Confidence: 0.06
Predicted Label: computer, Confidence: 0.07
Predicted Label: computer, Confidence: 0.07
Predicted Label: computer, Confidence: 0.07
Predicted Label: computer, Confidence: 0.08
Predicted Labe

Alphabet Programs

In [None]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf

# Load trained model
model = tf.keras.models.load_model('alphabet_model_1.h5')

# Custom class labels (A-Z + space)
class_names = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "space"]

# Initialize MediaPipe Hands module
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.8) 
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

# Set the frame width and height to match the model's input size
frame_width, frame_height = 200, 200
padding = 55  

# Set the threshold for the model's prediction confidence
threshold = 0.6  

while True:
    ret, frame = cap.read()
    
    if not ret:
        break

    # Flip the frame horizontally for a mirror view
    frame = cv2.flip(frame, 1)
    
    # Convert the frame to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Perform hand detection
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        # If hands are detected, focus on the region of the hands
        for hand_landmarks in results.multi_hand_landmarks:
            # Get the bounding box around the hand based on the landmarks
            x_min, y_min = int(frame.shape[1]), int(frame.shape[0])
            x_max, y_max = 0, 0

            # Loop through hand landmarks to find the bounding box
            for landmark in hand_landmarks.landmark:
                x, y = int(landmark.x * frame.shape[1]), int(landmark.y * frame.shape[0])
                x_min, y_min = min(x_min, x), min(y_min, y)
                x_max, y_max = max(x_max, x), max(y_max, y)

            x_min = max(x_min - padding, 0)
            y_min = max(y_min - padding, 0)
            x_max = min(x_max + padding, frame.shape[1])
            y_max = min(y_max + padding, frame.shape[0])

            # Ensure the bounding box is valid and non-zero
            if x_max > x_min and y_max > y_min:
                # Crops the frame to focus on the hand region
                hand_roi = frame[y_min:y_max, x_min:x_max]
                
                if hand_roi.size > 0:  # Ensure that the hand region is not empty
                    hand_roi_resized = cv2.resize(hand_roi, (frame_width, frame_height))  
                    frame_input = np.expand_dims(hand_roi_resized, axis=0) 
                    
                    # Display the resized image (hand_roi_resized) that is being used by the model
                    cv2.imshow("Model Input Image", hand_roi_resized)

                    # Make prediction
                    predictions = model.predict(frame_input)
                    predicted_class_prob = np.max(predictions, axis=1)[0]  # Get the maximum prediction probability
                    
                    # Only predict if the probability is greater than or equal to the threshold
                    if predicted_class_prob >= threshold:
                        predicted_class = np.argmax(predictions, axis=1)[0]
                        predicted_label = class_names[predicted_class]
                    else:
                        predicted_label = "Uncertain" 
                    
                    # Displays the predicted label on the screen
                    cv2.putText(frame, predicted_label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

                    # Draws landmarks and bounding box
                    mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                    # Draws a green bounding box around the hand with padding
                    cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
                else:
                    print("Hand ROI is empty!")
            else:
                print("Invalid bounding box!")

    # Shows the frame with hand landmarks and bounding box
    cv2.imshow("Hand Detection with Prediction", frame)

    # Breaks the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




In [None]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf

# Load trained model
model = tf.keras.models.load_model('alphabet_model_inception.h5')

# Custom class labels (A-Z + space)
class_names = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "space"]

# Initialize MediaPipe Hands module
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)  
mp_drawing = mp.solutions.drawing_utils

# Open the webcam 
cap = cv2.VideoCapture(0)

# Set the frame width and height to match the model's input size
frame_width, frame_height = 200, 200
padding = 25  

while True:
    ret, frame = cap.read()
    
    if not ret:
        break

    # Flip the frame horizontally for a mirror view
    frame = cv2.flip(frame, 1)
    
    # Convert the frame to RGB 
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Perform hand detection
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        # If hands are detected, focus on the region of the hands
        for hand_landmarks in results.multi_hand_landmarks:
            # Get the bounding box around the hand based on the landmarks
            x_min, y_min = int(frame.shape[1]), int(frame.shape[0])
            x_max, y_max = 0, 0

            # Loop through hand landmarks to find the bounding box
            for landmark in hand_landmarks.landmark:
                x, y = int(landmark.x * frame.shape[1]), int(landmark.y * frame.shape[0])
                x_min, y_min = min(x_min, x), min(y_min, y)
                x_max, y_max = max(x_max, x), max(y_max, y)

            # Add the padding to the bounding box
            x_min = max(x_min - padding, 0)
            y_min = max(y_min - padding, 0)
            x_max = min(x_max + padding, frame.shape[1])
            y_max = min(y_max + padding, frame.shape[0])

            # Ensures the bounding box is valid and non-zero
            if x_max > x_min and y_max > y_min:
                # Crops the frame to focus on the hand region
                hand_roi = frame[y_min:y_max, x_min:x_max]
                
                if hand_roi.size > 0: 
                    # Resize to match the model's input size
                    hand_roi_resized = cv2.resize(hand_roi, (frame_width, frame_height))  
                    frame_input = np.expand_dims(hand_roi_resized, axis=0)  
                    
                    # Make prediction
                    predictions = model.predict(frame_input)
                    predicted_class = np.argmax(predictions, axis=1)[0]
                    predicted_label = class_names[predicted_class]
                    
                    # Display the predicted letter on the screen
                    cv2.putText(frame, predicted_label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

                    # Draws landmarks and bounding box
                    mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                    # Draws a green bounding box around the hand with padding
                    cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
                else:
                    print("Hand ROI is empty!")
            else:
                print("Invalid bounding box!")

    # Shows the frame with hand landmarks and bounding box
    cv2.imshow("Hand Detection with Prediction", frame)

    # Breaks the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()




Mediapipe

In [None]:
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp
import pickle
import time

# Load the trained model
model = tf.keras.models.load_model('Mediapipe_Alphabet.h5')

# Load the label encoder
with open('hand_landmarks_data.pickle', 'rb') as f:
    dataset = pickle.load(f)

# Get the label encoder that was used during training
labels = dataset['labels']
unique_labels = sorted(set(labels))  # make sure labels are in correct order
label_dict = {i: unique_labels[i] for i in range(len(unique_labels))}  # Map indices to letters

# Initialize MediaPipe Hands module
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Open webcam
cap = cv2.VideoCapture(0)

# Stores the predicted letters to form a word
predicted_word = ""
last_prediction_time = time.time()  # Timer to control prediction updates
prediction_interval = 5  # Time interval for reading letters (seconds)
countdown_time = 5  # Countdown for 5 seconds

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip the frame horizontally for natural movement
    frame = cv2.flip(frame, 1)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB

    # Process the frame to detect hands
    results = hands.process(frame_rgb)

    # Check if it's time to make a new prediction 
    current_time = time.time()
    if current_time - last_prediction_time >= prediction_interval:
        last_prediction_time = current_time  # Resets timer

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Extractslandmark coordinates
                landmarks = []
                x_vals = [lm.x for lm in hand_landmarks.landmark]
                y_vals = [lm.y for lm in hand_landmarks.landmark]

                for lm in hand_landmarks.landmark:
                    landmarks.append(lm.x - min(x_vals))  # Normalize X
                    landmarks.append(lm.y - min(y_vals))  # Normalize Y

                # make sure the correct shape (42 values)
                if len(landmarks) == 42:
                    # Converts to NumPy array and normalize 
                    landmarks = np.array(landmarks).reshape(1, -1)  # Reshapes for model input

                    # Make a prediction
                    prediction = model.predict(landmarks)
                    predicted_index = np.argmax(prediction)  # Get class index
                    predicted_letter = label_dict[predicted_index]  # Maps to letter

                    # If the predicted letter is "space", add a space between words add space between words
                    if predicted_letter == "space":
                        predicted_word += " "  
                    else:
                        predicted_word += predicted_letter  # Otherwise add the letter

    # Draws landmarks if detected
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Countdown before next prediction which is 5
    remaining_time = countdown_time - int(time.time() - last_prediction_time) % countdown_time

    # Displays the countdown
    cv2.putText(frame, f"Next Prediction in {remaining_time}s", (50, 50), 
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Displays the prediction and word
    cv2.putText(frame, f"Prediction: {predicted_word}", (50, 100), 
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
    cv2.putText(frame, "Press 'C' to clear, 'D' to delete last letter", (50, 150), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

    # Shows the webcam feed
    cv2.imshow("Hand Sign Detection", frame)

    # Keyboard controls
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):  # Quit
        break
    elif key == ord('c'):  # Clear the entire word
        predicted_word = ""
    elif key == ord('d'):  # Delete the last letter
        if predicted_word:  # Only delete if the word is not empty
            predicted_word = predicted_word[:-1]  # Removes last letter

cap.release()
cv2.destroyAllWindows()

# Saves the predicted word to a file
with open("predicted_word.txt", "w") as f:
    f.write(predicted_word)

print(f"Final Predicted Word: {predicted_word}")
print("Word saved to predicted_word.txt")


Final Predicted Word: HELLO
Word saved to predicted_word.txt
