In [None]:

import numpy as np
import mediapipe as mp
import joblib
from sklearn.preprocessing import LabelEncoder
import tkinter as tk
from tkinter import messagebox
from PIL import Image, ImageTk
import os
import time
from datetime import datetime
import cv2

# Load model and labels
model = joblib.load('mlp_tsl_static.pkl')
le = LabelEncoder()
le.fit([chr(i) for i in range(ord('A'), ord('Z') + 1)])

# MediaPipe setup
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1,
                       min_detection_confidence=0.7, min_tracking_confidence=0.5)

# Normalize landmarks
def normalize_landmarks(landmarks):
    coords = np.array(landmarks).reshape(-1, 3).astype(np.float32)
    coords_min = coords.min(axis=0)
    coords_max = coords.max(axis=0)
    norm_coords = (coords - coords_min) / (coords_max - coords_min + 1e-6)
    return norm_coords.flatten().reshape(1, -1)

# Save to file
def save_output_to_file(text):
    os.makedirs('sound', exist_ok=True)
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    path = f'sound/output_{timestamp}.txt'
    with open(path, 'w', encoding='utf-8') as f:
        f.write(text)

# Main app
class TSLApp:
    def __init__(self, root):
        self.root = root
        self.root.title("TSL - Bridging Silence")
        self.root.configure(bg="#f0f4f8")

        self.video_running = False
        self.cap = None

        self.prev_letter = ""
        self.letter_hold_start = None
        self.last_seen_time = time.time()
        self.word = ""
        self.sentence = ""
        self.saved_sentences = []

        # UI
        self.video_label = tk.Label(root, bg="#e6ecf0")
        self.video_label.pack(padx=10, pady=10)

        self.prediction_label = tk.Label(root, text="Letter: ", font=("Arial", 18), fg="#007acc", bg="#f0f4f8")
        self.prediction_label.pack()

        self.controls = tk.Frame(root, bg="#f0f4f8")
        self.controls.pack(pady=10)

        tk.Button(self.controls, text="Start", command=self.start_video,
                  bg="#28a745", fg="white", font=("Arial", 12)).grid(row=0, column=0, padx=5)
        tk.Button(self.controls, text="Stop", command=self.stop_video,
                  bg="#dc3545", fg="white", font=("Arial", 12)).grid(row=0, column=1, padx=5)
        tk.Button(self.controls, text="Clear", command=self.clear_predictions,
                  bg="#ffc107", font=("Arial", 12)).grid(row=0, column=2, padx=5)
        tk.Button(self.controls, text="Speak", command=self.speak_text,
                  bg="#17a2b8", fg="white", font=("Arial", 12)).grid(row=0, column=3, padx=5)

    def start_video(self):
        if not self.video_running:
            self.cap = cv2.VideoCapture(0)
            self.video_running = True
            self.update_video()

    def stop_video(self):
        self.video_running = False
        if self.cap:
            self.cap.release()
        self.video_label.config(image='')

    def clear_predictions(self):
        self.word = ""
        self.sentence = ""
        self.saved_sentences.clear()
        self.prediction_label.config(text="Letter: ")

    def speak_text(self):
        # Disabled pyttsx3: only save to file and clear text
        full_sentence = (self.sentence + self.word).strip()
        if full_sentence:
            save_output_to_file(full_sentence)
            self.saved_sentences.append(full_sentence)
            self.word = ""
            self.sentence = ""
            self.prediction_label.config(text="Letter: ")

    def update_video(self):
        if not self.video_running:
            return

        ret, frame = self.cap.read()
        if not ret:
            self.stop_video()
            return

        frame = cv2.flip(frame, 1)
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb)

        current_time = time.time()
        hand_detected = False
        current_letter = ""

        if results.multi_hand_landmarks:
            hand_detected = True
            self.last_seen_time = current_time

            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                landmarks = [(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark]

                try:
                    X = normalize_landmarks(landmarks)
                    pred_index = model.predict(X)[0]
                    current_letter = le.inverse_transform([pred_index])[0]

                    if current_letter == self.prev_letter:
                        if not self.letter_hold_start:
                            self.letter_hold_start = current_time
                        if current_time - self.letter_hold_start >= 1:
                            if not self.word or self.word[-1] != current_letter:
                                self.word += current_letter
                    else:
                        self.letter_hold_start = current_time

                    self.prev_letter = current_letter

                except Exception as e:
                    print("Prediction error:", e)

        else:
            # No hand detected
            time_since_last = current_time - self.last_seen_time
            if time_since_last >= 2 and self.word and (not self.word.endswith(" ")):
                self.word += " "
            if time_since_last >= 5 and self.word.strip():
                self.sentence += self.word.strip() + " "
                self.word = ""

        display_text = f"Letter: {current_letter}\nWord: {self.word}\nSentence: {self.sentence}"
        self.prediction_label.config(text=display_text)

        img = Image.fromarray(rgb)
        imgtk = ImageTk.PhotoImage(image=img)
        self.video_label.imgtk = imgtk
        self.video_label.configure(image=imgtk)

        self.root.after(10, self.update_video)

# Run app
if __name__ == "__main__":
    root = tk.Tk()
    app = TSLApp(root)
    root.mainloop()
    

In [3]:
%pip  install  numpy 2.0.2 mediapipe joblib scikit-learn opencv-python Pillow


Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement 2.0.2 (from versions: none)
ERROR: No matching distribution found for 2.0.2
