In [1]:
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2


In [2]:
def draw_keyboard(frame, alpha=0.5):
    overlay = frame.copy()
    origin_x, origin_y = 50, 50
    key_width, key_height = 60, 60
    padding = 10
    rows = ["1234567890-=", "QWERTYUIOP[]\\", "ASDFGHJKL;'", "ZXCVBNM,./"]
    key_positions = []  # Store key positions for collision detection
    key_color = (0, 0, 200)
    text_color = (255, 255, 255)
    border_color = (255, 255, 255)

    for i, row in enumerate(rows):
        x = origin_x
        for key in row:
            top_left = (x, origin_y + i * (key_height + padding))
            bottom_right = (x + key_width, origin_y + i * (key_height + padding) + key_height)
            # Store the key position and its label
            key_positions.append((key, top_left, bottom_right))

            cv2.rectangle(overlay, top_left, bottom_right, key_color, -1)
            cv2.rectangle(overlay, top_left, bottom_right, border_color, 2)
            cv2.putText(overlay, key, (x + 15, origin_y + i * (key_height + padding) + int(key_height/2) + 10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, text_color, 1, cv2.LINE_AA)
            x += key_width + padding

    cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame)
    return key_positions

In [3]:
BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult
VisionRunningMode = mp.tasks.vision.RunningMode
# Create a gesture recognizer instance with the live stream mode:
recognized_gesture = ""
landmarks = []
def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    global recognized_gesture
    global landmarks
    #print(result)
    if result.gestures:
        #print("Gestures detected:")
        # Take the gesture with the highest confidence
        recognized_gesture = ""
        for gesture in result.gestures:
            #print(f"Gesture: {gesture[0].category_name} ({gesture[0].score:.2f})")
            # Append each gesture to the recognized_gesture variable
            recognized_gesture += f"{gesture[0].category_name} ({gesture[0].score:.2f}), "
    else:
        recognized_gesture = "NOT FOUND"
    if result.hand_landmarks:
        landmarks = result.hand_landmarks
    #if result.hand_landmarks:
    #    hand_landmarks = result.hand_landmarks[0].landmark  # Take the first hand detected
    #else:
    #    hand_landmarks = []
    #print('gesture recognition result: {}'.format(result))


options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path='C:\\Users\\Marco\\Desktop\\GeReco\\gesture_recognizer.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    num_hands=2,
    result_callback=print_result)

# Initialize MediaPipe and OpenCV
mp_image = mp.Image
mp_image_format = mp.ImageFormat

In [4]:
import time

TIME_THRESHOLD = 2
key_timer = None

def is_finger_over_key(finger_x, finger_y, key_positions):
    global key_timer
    for key, top_left, bottom_right in key_positions:
        if top_left[0] <= finger_x <= bottom_right[0] and top_left[1] <= finger_y <= bottom_right[1]:
            if key_timer is None:
                key_timer = time.time()
                print(f"Start timer for key: {key}, start_time: {key_timer}")
            #     print(f"Start timer for key: {key}, start_time: {time.time()}")
            #     start_time = time.time()
            elif time.time() - key_timer >= 2:
                print(f"Finger over key: {key}")
                key_timer = None

    

In [7]:
INDEX_TIP = 8

cap = cv2.VideoCapture(0)


if not cap.isOpened():
    print("Cannot open camera")
    exit()
    
# Set the desired resolution
frame_width = 1280  # Width in pixels
frame_height = 720  # Height in pixels
cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height)
# Initialize the MediaPipe Hands module

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        print("Can't receive frame (stream end?). Exiting ...")
        break
    with GestureRecognizer.create_from_options(options) as recognizer:
        
        start_time = time.time()  # Tempo di riferimento iniziale
        # Convert the frame (OpenCV image) to MediaPipe's Image object
        numpy_frame_from_opencv = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
        mp_image_object = mp_image(image_format=mp_image_format.SRGB, data=numpy_frame_from_opencv)


        frame_timestamp_ms = int((time.time() - start_time) * 1000)

        recognizer.recognize_async(mp_image_object, timestamp_ms=frame_timestamp_ms)

                # Overlay the recognized gesture on the frame
        cv2.putText(frame, f"Gesture: {recognized_gesture}", (10, 700), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                # Draw hand landmarks on the frame

        if landmarks:
            for it, hand in enumerate(landmarks):
                cv2.putText(frame, f"Index Tip Position Y: {landmarks[it][INDEX_TIP].y * frame_height:.2f}, X: {landmarks[it][INDEX_TIP].x * frame_width:.2f}", (10, 600 + it * 50), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                for landmark in hand:
                    #print(f"Landmark: {landmark.x}, {landmark.y}, {landmark.z}")
                    x = int(landmark.x * frame.shape[1])
                    y = int(landmark.y * frame.shape[0])
                    cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)
        
         # Draw connections
        connections = [
            (0, 1), (1, 2), (2, 3), (3, 4),  # Thumb
            (0, 5), (5, 6), (6, 7), (7, 8),  # Index finger
            (0, 9), (9, 10), (10, 11), (11, 12),  # Middle finger
            (0, 13), (13, 14), (14, 15), (15, 16),  # Ring finger
            (0, 17), (17, 18), (18, 19), (19, 20)  # Pinky
        ]
        
        for hand in landmarks:
            for start, end in connections:
                start_x = int(hand[start].x * frame.shape[1])
                start_y = int(hand[start].y * frame.shape[0])
                end_x = int(hand[end].x * frame.shape[1])
                end_y = int(hand[end].y * frame.shape[0])
                cv2.line(frame, (start_x, start_y), (end_x, end_y), (255, 0, 0), 2)
    
        keyposition = draw_keyboard(frame, alpha=0.5)
        if recognized_gesture:
            if ("Pointing_Up" in recognized_gesture):
                is_finger_over_key(landmarks[0][INDEX_TIP].x * frame_width, landmarks[0][INDEX_TIP].y * frame_height, keyposition)
    
        cv2.imshow('Virtual Keyboard', frame)
    
        if cv2.waitKey(1) == ord('q'):
            break
    
cap.release()
cv2.destroyAllWindows()
