In [17]:
import cv2 as cv
import mediapipe as mp
import numpy as np
import time
from threading import Thread

In [8]:
# Regular way of video capturing (without threading)

vs1 = cv.VideoCapture('http://192.168.0.114:4747/video')
vs2 = cv.VideoCapture('http://192.168.0.155:4747/video')

while vs1.isOpened() and vs2.isOpened():
    ret1, frame1 = vs1.read()
    ret2, frame2 = vs2.read()

    if not ret1 or not ret2:
        break

    cv.imshow('Camera 1', frame1)
    cv.imshow('Camera 2', frame2)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break


vs1.release()
vs2.release()   
cv.destroyAllWindows()

Seems to lag a bit in one feed so threading might be better

In [18]:
## Threaded video capture class:

class VideoStream:
    def __init__(self, url):
        self.stream = cv.VideoCapture(url)
        self.frame = None
        self.stopped = False

    def start(self):
        Thread(target=self.update, daemon=True).start()
        return self

    def update(self):
        while not self.stopped:
            if self.stream.isOpened():
                ret, frame = self.stream.read()
                if ret:
                    self.frame = frame
            time.sleep(0.01)
            
    def is_Opened(self):
        return self.stream.isOpened()
    
    def read(self):
        return self.frame

    def stop(self):
        self.stopped = True
        self.stream.release()



In [19]:
## Video capturing from two cameras

vs1 = VideoStream('http://192.168.0.114:4747/video').start()
vs2 = VideoStream('http://192.168.0.177:4747/video').start()


In [19]:
#Testing the threaded video capture

while vs1.isOpened() and vs2.isOpened():
    frame1 = vs1.read()
    frame2 = vs2.read()

    cv.imshow('Camera 1', frame1)
    cv.imshow('Camera 2', frame2)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

vs1.stop()
vs2.stop()  
cv.destroyAllWindows()

It seems to be working a lot better now.. There's still some lag but it's almost unnoticeable

In [21]:
def draw_box(frame, box_color=(0, 0, 255), box_size=300):
    height, width = frame.shape[:2]
    top_left = (width // 2 - box_size // 2, height // 2 - box_size // 2)
    bottom_right = (width // 2 + box_size // 2, height // 2 + box_size // 2)
    cv.rectangle(frame, top_left, bottom_right, box_color, 2)
    
    

In [None]:
# Testing the box drawing function

while vs1.isOpened() and vs2.isOpened():
    frame1 = vs1.read()
    frame2 = vs2.read()
    frame2 = cv.resize(frame2, (frame1.shape[1], frame1.shape[0]))  # Resize frame2 to match frame1

    if frame1 is None or frame2 is None:
        break

    draw_box(frame1)
    draw_box(frame2)

    cv.imshow('Camera 1', frame1)
    cv.imshow('Camera 2', frame2)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

vs1.stop()
vs2.stop()  
cv.destroyAllWindows()

In [20]:
# Draw a red guide box
def draw_box(frame):
    h, w, _ = frame.shape
    box_w, box_h = int(w * 0.6), int(h * 0.6)
    x1, y1 = (w - box_w) // 2, (h - box_h) // 2
    x2, y2 = x1 + box_w, y1 + box_h
    cv.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)


# Bitwise AND overlay of two frames
# def bitwise_and_frames(frame1, frame2):
#     if frame1 is None or frame2 is None:
#         return None
#     if frame1.shape != frame2.shape:
#         frame2 = cv.resize(frame2, (frame1.shape[1], frame1.shape[0]))
#     return cv.bitwise_and(frame1, frame2)


# MediaPipe Setup
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2,
                       min_detection_confidence=0.5, min_tracking_confidence=0.5)
mp_draw = mp.solutions.drawing_utils

# Classify gesture based on landmarks
def classify_pose(hand_landmarks):
    thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    index_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP]
    middle_tip = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
    middle_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_MCP]

    index_extended = index_tip.y < index_mcp.y
    middle_extended = middle_tip.y < middle_mcp.y
    thumb_extended = thumb_tip.y < index_mcp.y

    if index_extended and middle_extended and not thumb_extended:
        return "VICTORY"
    elif not index_extended and not middle_extended and not thumb_extended:
        return "FIST"
    elif not index_extended and not middle_extended and thumb_extended:
        return "THUMBS_UP"
    elif index_extended and middle_extended and thumb_extended:
        return "Open"
    else:
        return "Unknown"

# Detect hands and classify gesture
def detect_and_classify(frame):
    frame_rgb = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    pose = "Unknown"
    hand_landmarks = None
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            pose = classify_pose(hand_landmarks)
            break
    return pose, hand_landmarks

# Get edge-detected hand region
def get_edge_hand(frame, hand_landmarks):
    h, w, _ = frame.shape
    x_vals = [int(lm.x * w) for lm in hand_landmarks.landmark]
    y_vals = [int(lm.y * h) for lm in hand_landmarks.landmark]
    x_min, x_max = min(x_vals), max(x_vals)
    y_min, y_max = min(y_vals), max(y_vals)
    margin = 30
    x_min = max(0, x_min - margin)
    y_min = max(0, y_min - margin)
    x_max = min(w, x_max + margin)
    y_max = min(h, y_max + margin)

    hand_crop = frame[y_min:y_max, x_min:x_max]
    gray = cv.cvtColor(hand_crop, cv.COLOR_BGR2GRAY)
    edges = cv.Canny(gray, 100, 200)
    edges_bgr = cv.cvtColor(edges, cv.COLOR_GRAY2BGR)
    resized = cv.resize(edges_bgr, (300, 300))
    return resized

# Main Loop
while True:
    frame1 = vs1.read()
    frame2 = vs2.read()
    frame2 = cv.resize(frame2, (frame1.shape[1], frame1.shape[0]))  # Resize frame2 to match frame1

    frame11 = frame1.copy()
    frame22 = frame2.copy()

    if frame1 is None or frame2 is None:
        continue

    draw_box(frame1)
    draw_box(frame2)

    gesture1, landmarks1 = detect_and_classify(frame1)
    gesture2, landmarks2 = detect_and_classify(frame2)

    cv.putText(frame1, f'P1: {gesture1}', (10, 60), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv.putText(frame2, f'P2: {gesture2}', (10, 30), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    match_text = "MATCH!" if gesture1 == gesture2 and "Unknown" not in gesture1 else "NO MATCH"
    match_color = (0, 255, 0) if match_text == "MATCH!" else (0, 0, 255)

    # result_frame = bitwise_and_frames(frame1, frame2)
    # if result_frame is not None:
    #     cv.putText(result_frame, match_text, (10, 100), cv.FONT_HERSHEY_SIMPLEX, 1.2, match_color, 3)

    # Edge detection output
    if landmarks1:
        edge1 = get_edge_hand(frame11, landmarks1)
    else:
        edge1 = np.zeros((300, 300, 3), dtype=np.uint8)

    if landmarks2:
        edge2 = get_edge_hand(frame22, landmarks2)
    else:
        edge2 = np.zeros((300, 300, 3), dtype=np.uint8)

    combined_edge = np.hstack((edge1, edge2))
    cv.putText(combined_edge, match_text, (10, 40), cv.FONT_HERSHEY_SIMPLEX, 1.2, match_color, 3)

    # Show all windows
    cv.imshow("Player 1", frame1)
    cv.imshow("Player 2", frame2)
    # if result_frame is not None:
    #     cv.imshow("Hand Match Overlay", result_frame)
    cv.imshow("Edge Detection View", combined_edge)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
vs1.stop()
vs2.stop()
cv.destroyAllWindows()

Exception in thread Thread-22:
Traceback (most recent call last):
  File "c:\Users\khare\Anaconda3\lib\threading.py", line 980, in _bootstrap_inner
    self.run()
  File "c:\Users\khare\Anaconda3\lib\threading.py", line 917, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\khare\AppData\Local\Temp\ipykernel_42564\3824750881.py", line 16, in update
cv2.error: Unknown C++ exception from OpenCV code


In [None]:


# ========== MediaPipe Setup for Gesture Detection ==========
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.6
)

# ========== Threaded Camera Stream Class ==========
class VideoStream:
    def __init__(self, url):
        self.stream = cv.VideoCapture(url)
        self.frame = None
        self.stopped = False

    def start(self):
        Thread(target=self.update, daemon=True).start()
        return self

    def update(self):
        while not self.stopped:
            if self.stream.isOpened():
                ret, frame = self.stream.read()
                if ret:
                    self.frame = frame
            time.sleep(0.01)

    def read(self):
        return self.frame

    def stop(self):
        self.stopped = True
        self.stream.release()

# ========== Pose Classifier ==========
def classify_pose(hand_landmarks):
    thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    index_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP]
    middle_tip = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
    middle_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_MCP]

    index_extended = index_tip.y < index_mcp.y
    middle_extended = middle_tip.y < middle_mcp.y
    thumb_extended = thumb_tip.x < index_mcp.x

    if index_extended and middle_extended and not thumb_extended:
        return "VICTORY"
    elif not index_extended and not middle_extended:
        return "FIST"
    elif index_extended and not middle_extended and thumb_extended:
        return "THUMBS_UP"
    else:
        return "OPEN"

# ========== Start the Two Streams ==========
vs1 = VideoStream('http://192.168.0.177:4747/video').start()
vs2 = VideoStream('http://192.168.0.155:4747/video').start()

# ========== Create Display Windows ==========
cv.namedWindow("Player 1 View", cv.WINDOW_NORMAL)
cv.namedWindow("Player 2 View", cv.WINDOW_NORMAL)
cv.namedWindow("Blended View", cv.WINDOW_NORMAL)
cv.resizeWindow("Player 1 View", 640, 480)
cv.resizeWindow("Player 2 View", 640, 480)
cv.resizeWindow("Blended View", 640, 480)

# ========== Main Loop ==========
while True:
    frame1 = vs1.read()
    frame2 = vs2.read()

    if frame1 is None or frame2 is None:
        continue

    # Match frame sizes
    if frame1.shape != frame2.shape:
        frame2 = cv.resize(frame2, (frame1.shape[1], frame1.shape[0]))

    # === Gesture Detection ===
    p1_pose, p2_pose = None, None

    results1 = hands.process(cv.cvtColor(frame1, cv.COLOR_BGR2RGB))
    if results1.multi_hand_landmarks:
        hand_landmarks = results1.multi_hand_landmarks[0]
        p1_pose = classify_pose(hand_landmarks)
        mp_drawing.draw_landmarks(frame1, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    results2 = hands.process(cv.cvtColor(frame2, cv.COLOR_BGR2RGB))
    if results2.multi_hand_landmarks:
        hand_landmarks = results2.multi_hand_landmarks[0]
        p2_pose = classify_pose(hand_landmarks)
        mp_drawing.draw_landmarks(frame2, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # === Blended Channel View: Green from Frame1, Red from Frame2 ===
    r1, g1, b1 = cv.split(frame1)
    r2, g2, b2 = cv.split(frame2)

    # Combine: Green channel from Player 1, Red from Player 2, Blue = average
    blended = cv.merge([
        ((b1.astype(np.uint16) + b2.astype(np.uint16)) // 2).astype(np.uint8),  # Blue
        g1,  # Green from P1
        r2   # Red from P2
    ])

    # === Annotate the Blended View ===
    cv.putText(blended, f"P1 Gesture: {p1_pose or 'None'}", (10, 30), cv.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2)
    cv.putText(blended, f"P2 Gesture: {p2_pose or 'None'}", (10, 60), cv.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,255), 2)

    if p1_pose and p2_pose:
        match = "MATCH!" if p1_pose == p2_pose else "NO MATCH!"
        color = (0,255,0) if p1_pose == p2_pose else (0,0,255)
        cv.putText(blended, match, (10, 100), cv.FONT_HERSHEY_SIMPLEX, 1, color, 2)

    # === Display Output ===
    cv.imshow("Player 1 View", frame1)
    cv.imshow("Player 2 View", frame2)
    cv.imshow("Blended View", blended)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

# ========== Cleanup ==========
vs1.stop()
vs2.stop()
hands.close()
cv.destroyAllWindows()


In [15]:

cap = cv.VideoCapture(1)
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.6
)


while cap.isOpened():
    ret, frame = cap.read()
    if not ret: break

    frameRGB = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
    results = hands.process(frameRGB)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            for id, lm in enumerate(hand_landmarks.landmark):
                h, w, c = frame.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                print(f"Landmark ID: {id}, Coordinates: ({cx}, {cy})")
                if id == 4:
                    cv.circle(frame, (cx, cy), 15, (255, 255, 0), -1)

            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        


    cv.imshow('Webcam Feed', frame)
    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
hands.close()
cv.destroyAllWindows()


Landmark ID: 0, Coordinates: (171, 448)
Landmark ID: 1, Coordinates: (238, 440)
Landmark ID: 2, Coordinates: (299, 392)
Landmark ID: 3, Coordinates: (336, 342)
Landmark ID: 4, Coordinates: (370, 309)
Landmark ID: 5, Coordinates: (258, 282)
Landmark ID: 6, Coordinates: (283, 215)
Landmark ID: 7, Coordinates: (295, 169)
Landmark ID: 8, Coordinates: (301, 130)
Landmark ID: 9, Coordinates: (219, 267)
Landmark ID: 10, Coordinates: (237, 187)
Landmark ID: 11, Coordinates: (246, 136)
Landmark ID: 12, Coordinates: (251, 96)
Landmark ID: 13, Coordinates: (177, 268)
Landmark ID: 14, Coordinates: (191, 189)
Landmark ID: 15, Coordinates: (198, 142)
Landmark ID: 16, Coordinates: (202, 105)
Landmark ID: 17, Coordinates: (131, 284)
Landmark ID: 18, Coordinates: (118, 226)
Landmark ID: 19, Coordinates: (109, 189)
Landmark ID: 20, Coordinates: (101, 157)
Landmark ID: 0, Coordinates: (167, 453)
Landmark ID: 1, Coordinates: (236, 440)
Landmark ID: 2, Coordinates: (300, 390)
Landmark ID: 3, Coordinates: (