In [None]:
import cv2
import mediapipe as mp
from fer import FER
import math
import warnings

In [7]:
# Using FER for face emotion recognition
detector = FER(mtcnn=True)

def detect(frame) :
    results = detector.detect_emotions(frame)
    return results

In [8]:
# Initialize MediaPipe Hands and Drawing modules
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

In [9]:
class HandTracker:
    def __init__(self):
        self.thumb_tip = None
        self.index_tip = None
        self.middle_tip = None
        self.ring_tip = None
        self.pinky_tip = None
        self.flag = False

    def get_coordinates(self, landmarks):
        self.thumb_tip = landmarks[mp_hands.HandLandmark.THUMB_TIP]
        self.index_tip = landmarks[mp_hands.HandLandmark.INDEX_FINGER_TIP]
        self.middle_tip = landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
        self.ring_tip = landmarks[mp_hands.HandLandmark.RING_FINGER_TIP]
        self.pinky_tip = landmarks[mp_hands.HandLandmark.PINKY_TIP]

    def gesture(self, image):
        if self.flag:
            h, w, _ = image.shape
            index_tip_x = int(self.index_tip.x * w)
            index_tip_y = int(self.index_tip.y * h)
            cv2.putText(image, 'Drawing!', (index_tip_x + 10, index_tip_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        # Check if thumb is up compared to other fingers
        if self.thumb_tip.y < self.index_tip.y and self.thumb_tip.y < self.middle_tip.y and \
           self.thumb_tip.y < self.ring_tip.y and self.thumb_tip.y < self.pinky_tip.y:
            h, w, _ = image.shape
            thumb_tip_x = int(self.thumb_tip.x * w)
            thumb_tip_y = int(self.thumb_tip.y * h)
            self.flag = True
            if not self.flag:
                cv2.putText(image, 'Thumbs Up!', (thumb_tip_x + 10, thumb_tip_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
            return

        if self.index_tip.y < self.thumb_tip.y and self.index_tip.y < self.ring_tip.y and \
           self.index_tip.y < self.pinky_tip.y and self.middle_tip.y < self.thumb_tip.y and \
           self.middle_tip.y < self.ring_tip.y and self.middle_tip.y < self.pinky_tip.y:
            h, w, _ = image.shape
            index_tip_x = int(self.index_tip.x * w)
            index_tip_y = int(self.index_tip.y * h)
            if not self.flag:
                cv2.putText(image, 'Victory!', (index_tip_x + 10, index_tip_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        elif (self.middle_tip.y < self.index_tip.y and self.middle_tip.y < self.thumb_tip.y and 
              self.ring_tip.y < self.index_tip.y and self.ring_tip.y < self.thumb_tip.y and
              self.pinky_tip.y < self.index_tip.y and self.pinky_tip.y < self.thumb_tip.y):
            h, w, _ = image.shape
            x1, y1 = self.thumb_tip.x * w, self.thumb_tip.y * h
            x2, y2 = self.index_tip.x * w, self.index_tip.y * h\

            # Calculate the Euclidean distance between the two points
            distance = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)

            # Calculate the threshold distance in pixels
            threshold_distance = 0.3 * min(w, h)
            self.flag = False
            if distance < threshold_distance:
                middle_tip_x = int(self.middle_tip.x * w)
                middle_tip_y = int(self.middle_tip.y * h)
                if not self.flag:
                    cv2.putText(image, 'OK!', (middle_tip_x + 10, middle_tip_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
                return

        elif self.middle_tip.y < self.index_tip.y and self.middle_tip.y < self.thumb_tip.y and \
             self.middle_tip.y < self.ring_tip.y and self.middle_tip.y < self.pinky_tip.y:
            h, w, _ = image.shape
            middle_tip_x = int(self.middle_tip.x * w)
            middle_tip_y = int(self.middle_tip.y * h)
            if not self.flag:
                cv2.putText(image, 'Middle', (middle_tip_x + 10, middle_tip_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        
        elif self.index_tip.y < self.thumb_tip.y and self.index_tip.y < self.middle_tip.y and \
             self.index_tip.y < self.ring_tip.y and self.index_tip.y < self.pinky_tip.y:
            h, w, _ = image.shape
            index_tip_x = int(self.index_tip.x * w)
            index_tip_y = int(self.index_tip.y * h)
            if not self.flag:
                cv2.putText(image, 'Index', (index_tip_x + 10, index_tip_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        elif self.pinky_tip.y < self.index_tip.y and self.pinky_tip.y < self.middle_tip.y and \
             self.pinky_tip.y < self.ring_tip.y and self.pinky_tip.y < self.thumb_tip.y:
            h, w, _ = image.shape
            pinky_tip_x = int(self.pinky_tip.x * w)
            pinky_tip_y = int(self.pinky_tip.y * h)
            if not self.flag:
                cv2.putText(image, 'Pinky', (pinky_tip_x + 10, pinky_tip_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        elif self.ring_tip.y < self.index_tip.y and self.ring_tip.y < self.middle_tip.y and \
             self.ring_tip.y < self.thumb_tip.y and self.ring_tip.y < self.pinky_tip.y:
            h, w, _ = image.shape
            ring_tip_x = int(self.ring_tip.x * w)
            ring_tip_y = int(self.ring_tip.y * h)
            if not self.flag:
                cv2.putText(image, 'Ring', (ring_tip_x + 10, ring_tip_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)


In [10]:
with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    
    # Initialize webcam
    cap = cv2.VideoCapture(0)

    # List to store index finger tip positions
    index_finger_positions = []

    hand_tracker = HandTracker()

    with mp_hands.Hands(
        max_num_hands=2,
        min_detection_confidence=0.7,
        min_tracking_confidence=0.7) as hands:
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                print("Ignoring empty camera frame.")
                continue

            # Flip the frame horizontally for a selfie-view display
            frame = cv2.flip(frame, 1)
            
            # Convert the BGR frame to RGB for emotion detection
            emotion_results = detect(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            
            for result in emotion_results:
                bounding_box = result["box"]
                emotions = result["emotions"]
                
                # Draw bounding box
                x, y, w, h = bounding_box
                cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
                
                # Find the emotion with the highest score
                max_emotion = max(emotions, key=emotions.get)
                max_score = emotions[max_emotion]
                
                # Display the highest scoring emotion on top of the bounding box
                color = (0, 255, 0)    # emotion text 
                cv2.putText(frame, f'{max_emotion}: {max_score:.2f}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

            # Convert the frame to RGB for hand tracking
            image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            hands_results = hands.process(image_rgb)

            if not hand_tracker.flag:
                index_finger_positions = []

            if hands_results.multi_hand_landmarks:
                for hand_landmarks in hands_results.multi_hand_landmarks:
                    # Draw hand landmarks on the original frame
                    mp_drawing.draw_landmarks(
                        frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                    if hand_tracker.flag:
                        # Get the position of the index finger tip
                        index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
                        h, w, _ = frame.shape
                        index_tip_x, index_tip_y = int(index_tip.x * w), int(index_tip.y * h)

                        # Store the position in the list
                        index_finger_positions.append((index_tip_x, index_tip_y))

                        # Draw the path
                        for i in range(1, len(index_finger_positions)):
                            cv2.line(frame, index_finger_positions[i - 1], index_finger_positions[i], (187, 102, 255), 2) #drawing color

                    hand_tracker.get_coordinates(hand_landmarks.landmark)
                    hand_tracker.gesture(frame)

            # Display the combined frame
            cv2.imshow('Emotion Detection and Hand Tracking', frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    cap.release()
    cv2.destroyAllWindows()