In [2]:
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp
from PIL import ImageFont, ImageDraw, Image

# Load model
model = tf.keras.models.load_model('nslt_model.h5')

# Load class names in Nepali
class_names = ["नमस्कार", "म", "घर", "धन्यवाद"]

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7)

# Initialize webcam
cap = cv2.VideoCapture(0)

# Load a Nepali-compatible font (Download and place in your working directory)
fontpath = "NotoSansDevanagari-Regular.ttf"
font = ImageFont.truetype(fontpath, 30)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
        
    # Flip frame horizontally for mirror effect
    frame = cv2.flip(frame, 1)
    
    # Process frame with MediaPipe Hands
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Get hand coordinates
            x = [lm.x for lm in hand_landmarks.landmark]
            y = [lm.y for lm in hand_landmarks.landmark]
            
            # Convert normalized coordinates to pixel values
            height, width = frame.shape[:2]
            x_min = int(min(x) * width)
            x_max = int(max(x) * width)
            y_min = int(min(y) * height)
            y_max = int(max(y) * height)
            
            # Expand bounding box
            expand = 0.1
            x_min = max(0, x_min - int((x_max - x_min) * expand))
            x_max = min(width, x_max + int((x_max - x_min) * expand))
            y_min = max(0, y_min - int((y_max - y_min) * expand))
            y_max = min(height, y_max + int((y_max - y_min) * expand))
            
            # Crop and preprocess hand image
            hand_image = frame[y_min:y_max, x_min:x_max]
            if hand_image.size != 0:
                hand_image = cv2.resize(hand_image, (224, 224))
                hand_image = np.expand_dims(hand_image, axis=0) / 255.0
                
                # Make prediction
                pred = model.predict(hand_image)
                pred_class = class_names[np.argmax(pred)]
                confidence = np.max(pred)
                
                # Convert OpenCV image to PIL for Nepali text rendering
                pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                draw = ImageDraw.Draw(pil_img)

                # Draw bounding box
                draw.rectangle([x_min, y_min, x_max, y_max], outline="green", width=2)

                # Draw text using Nepali font
                text = f'{pred_class} ({confidence:.2f})'
                draw.text((x_min + 10, y_min - 40), text, font=font, fill=(0, 255, 0))

                # Convert back to OpenCV format
                frame = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)

    # Show the frame
    cv2.imshow('Sign Language Detection', frame)

    # Exit on 'q' press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 422ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 307ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 214ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 202ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 210ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 209ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 252ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 194ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 238ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 184ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 199ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [3]:
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp
from PIL import ImageFont, ImageDraw, Image

# Load the trained model
model = tf.keras.models.load_model('DSML3model.h5')

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.5)

# Class mapping
CLASS_MAP = {
    0: 'क', 1: 'ख', 2: 'ग', 3: 'घ', 4: 'ङ', 5: 'च',
    6: 'छ', 7: 'ज', 8: 'झ', 9: 'ञ', 10: 'ट', 11: 'ठ',
    12: 'ड', 13: 'ढ', 14: 'ण', 15: 'त', 16: 'थ', 17: 'द',
    18: 'ध', 19: 'न', 20: 'प', 21: 'फ', 22: 'ब', 23: 'भ',
    24: 'म', 25: 'य', 26: 'र', 27: 'ल', 28: 'व', 29: 'श',
    30: 'ष', 31: 'स', 32: 'ह', 33: 'क्ष', 34: 'त्र', 35: 'ज्ञ'
}

# Initialize webcam
cap = cv2.VideoCapture(0)

# Load a Nepali-compatible font
fontpath = "NotoSansDevanagari-Regular.ttf"  # Ensure the font file is in the working directory
font = ImageFont.truetype(fontpath, 30)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
        
    # Mirror the frame
    frame = cv2.flip(frame, 1)
    
    # Convert to RGB for MediaPipe
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Process with MediaPipe Hands
    results = hands.process(rgb_frame)
    
    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        
        # Get bounding box coordinates
        x_coords = [lm.x * frame.shape[1] for lm in hand_landmarks.landmark]
        y_coords = [lm.y * frame.shape[0] for lm in hand_landmarks.landmark]
        
        x_min, x_max = int(min(x_coords)), int(max(x_coords))
        y_min, y_max = int(min(y_coords)), int(max(y_coords))
        
        # Add padding
        padding = int(0.2 * max(x_max - x_min, y_max - y_min))
        x_min = max(0, x_min - padding)
        y_min = max(0, y_min - padding)
        x_max = min(frame.shape[1], x_max + padding)
        y_max = min(frame.shape[0], y_max + padding)
        
        # Crop and preprocess hand region
        hand_roi = frame[y_min:y_max, x_min:x_max]
        if hand_roi.size > 0:
            processed = cv2.resize(hand_roi, (128, 128)) / 255.0
            prediction = model.predict(np.expand_dims(processed, 0), verbose=0)
            pred_class = np.argmax(prediction)
            
            # Convert OpenCV frame to PIL format
            pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            draw = ImageDraw.Draw(pil_img)

            # Draw bounding box
            draw.rectangle([x_min, y_min, x_max, y_max], outline="green", width=2)

            # Draw text using Nepali font
            draw.text((x_min + 10, y_min - 40), CLASS_MAP[pred_class], font=font, fill=(0, 255, 0))

            # Convert back to OpenCV format
            frame = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
    
    # Show frame
    cv2.imshow('Nepali Sign Language Detection', frame)
    
    # Break on 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()




In [4]:
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp
from PIL import ImageFont, ImageDraw, Image

# Load the trained models
word_model = tf.keras.models.load_model('nslt_model.h5')  # Model for common words
alphabet_model = tf.keras.models.load_model('DSML3model.h5')  # Model for alphabets

# Define class names for words
word_class_names = ["नमस्कार", "म", "घर", "धन्यवाद"]

# Define class names for Nepali alphabets
alphabet_class_map = {
    0: 'क', 1: 'ख', 2: 'ग', 3: 'घ', 4: 'ङ', 5: 'च',
    6: 'छ', 7: 'ज', 8: 'झ', 9: 'ञ', 10: 'ट', 11: 'ठ',
    12: 'ड', 13: 'ढ', 14: 'ण', 15: 'त', 16: 'थ', 17: 'द',
    18: 'ध', 19: 'न', 20: 'प', 21: 'फ', 22: 'ब', 23: 'भ',
    24: 'म', 25: 'य', 26: 'र', 27: 'ल', 28: 'व', 29: 'श',
    30: 'ष', 31: 'स', 32: 'ह', 33: 'क्ष', 34: 'त्र', 35: 'ज्ञ'
}

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7)

# Initialize webcam
cap = cv2.VideoCapture(0)

# Load a Nepali-compatible font (Ensure the font file is in the working directory)
font_path = "NotoSansDevanagari-Regular.ttf"
font = ImageFont.truetype(font_path, 30)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip frame horizontally for mirror effect
    frame = cv2.flip(frame, 1)

    # Convert to RGB for MediaPipe
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process with MediaPipe Hands
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Get bounding box coordinates
            x = [lm.x for lm in hand_landmarks.landmark]
            y = [lm.y for lm in hand_landmarks.landmark]

            # Convert normalized coordinates to pixel values
            height, width = frame.shape[:2]
            x_min, x_max = int(min(x) * width), int(max(x) * width)
            y_min, y_max = int(min(y) * height), int(max(y) * height)

            # Add padding
            padding = int(0.2 * max(x_max - x_min, y_max - y_min))
            x_min = max(0, x_min - padding)
            y_min = max(0, y_min - padding)
            x_max = min(width, x_max + padding)
            y_max = min(height, y_max + padding)

            # Crop and preprocess hand region
            hand_roi = frame[y_min:y_max, x_min:x_max]
            if hand_roi.size > 0:
                # Resize separately for each model
                word_processed = cv2.resize(hand_roi, (224, 224)) / 255.0
                alphabet_processed = cv2.resize(hand_roi, (128, 128)) / 255.0

                # Expand dimensions
                word_processed = np.expand_dims(word_processed, axis=0)
                alphabet_processed = np.expand_dims(alphabet_processed, axis=0)

                # Predict using both models
                word_pred = word_model.predict(word_processed, verbose=0)
                alphabet_pred = alphabet_model.predict(alphabet_processed, verbose=0)

                # Select the best prediction
                word_confidence = np.max(word_pred)
                alphabet_confidence = np.max(alphabet_pred)

                if word_confidence > alphabet_confidence:
                    pred_class = word_class_names[np.argmax(word_pred)]
                    confidence = word_confidence
                else:
                    pred_class = alphabet_class_map[np.argmax(alphabet_pred)]
                    confidence = alphabet_confidence


                # Convert OpenCV frame to PIL format
                pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                draw = ImageDraw.Draw(pil_img)

                # Draw bounding box
                draw.rectangle([x_min, y_min, x_max, y_max], outline="green", width=2)

                # Draw text using Nepali font
                text = f'{pred_class} ({confidence:.2f})'
                draw.text((x_min + 10, y_min - 40), text, font=font, fill=(0, 255, 0))

                # Convert back to OpenCV format
                frame = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)

    # Show the frame
    cv2.imshow('Nepali Sign Language Detection', frame)

    # Exit on 'q' press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()


