In [1]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m52.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.21 sounddevice-0.5.1


In [11]:
import cv2
import mediapipe as mp
import numpy as np
import math
import time
import ipywidgets as widgets
from IPython.display import display, Javascript

# -----------------------------
# Set Your File Paths
# -----------------------------
audio_path = "/content/Osad3eny_HellMakers.Com.mp3"  # Replace with your audio file path
video_path = "/content/volume3.18_30c4f49c.mp4"        # Replace with your video file path (or use 0 for webcam)

# -----------------------------
# MediaPipe Hands Setup
# -----------------------------
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1,
                       min_detection_confidence=0.7,
                       min_tracking_confidence=0.7)
mp_draw = mp.solutions.drawing_utils

# -----------------------------
# Helper Function: Map Distance to Volume
# -----------------------------
def map_distance_to_volume(distance, min_d=30, max_d=200):
    """
    Map the Euclidean distance (in pixels) between thumb and index finger
    to a volume percentage (0-100).
    """
    volume = np.interp(distance, [min_d, max_d], [0, 100])
    return int(volume)

# -----------------------------
# Setup Widgets for Output in Colab
# -----------------------------
# HTML Audio Element: auto-loops and has controls.
# Audio playback requires a user gesture, so a start button is provided.
audio_widget = widgets.HTML(value=f"""
<audio id="audioPlayer" src="{audio_path}" loop controls style="width:100%;">
Your browser does not support the audio element.
</audio>
""")
display(audio_widget)

# Button to start the audio (click this to enable playback)
start_button = widgets.Button(description="Start Audio (Click Me)")
def on_start_button_clicked(b):
    display(Javascript("document.getElementById('audioPlayer').play();"))
    start_button.layout.visibility = 'hidden'
    print("Audio playback started!")
start_button.on_click(on_start_button_clicked)
display(start_button)

# Video output widget.
video_widget = widgets.Image(format='jpeg')
display(video_widget)

# Widget for running JavaScript (to update the audio element's volume)
js_output = widgets.Output()
display(js_output)

# -----------------------------
# Video Capture Setup
# -----------------------------
cap = cv2.VideoCapture(video_path)  # Use video_path; change to 0 for webcam if needed

# Set desired output resolution (reduced width for faster processing)
desired_width = 400
desired_height = 600

# -----------------------------
# Main Processing Loop
# -----------------------------
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Stream ended or no frame captured.")
        break

    # Resize frame to desired resolution and flip for a mirror view.
    frame = cv2.resize(frame, (desired_width, desired_height))
    frame = cv2.flip(frame, 1)

    # Convert frame for MediaPipe processing.
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    h, w, _ = frame.shape

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Draw hand landmarks for visual feedback.
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            landmarks = hand_landmarks.landmark

            # Get thumb tip (landmark 4) and index finger tip (landmark 8) coordinates.
            x_thumb, y_thumb = int(landmarks[4].x * w), int(landmarks[4].y * h)
            x_index, y_index = int(landmarks[8].x * w), int(landmarks[8].y * h)

            # Draw blue circles at the finger tips and a connecting line.
            cv2.circle(frame, (x_thumb, y_thumb), 5, (255, 0, 0), -1)
            cv2.circle(frame, (x_index, y_index), 5, (255, 0, 0), -1)
            cv2.line(frame, (x_thumb, y_thumb), (x_index, y_index), (255, 0, 0), 2)

            # Calculate the Euclidean distance and map it to volume.
            distance = math.hypot(x_index - x_thumb, y_index - y_thumb)
            volume = map_distance_to_volume(distance)

            # Overlay the current volume percentage on the video frame.
            cv2.putText(frame, f'Volume: {volume}%', (10, 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

            # Update the HTML audio element's volume via JavaScript.
            # Volume values range between 0.0 and 1.0.
            js_code = f"document.getElementById('audioPlayer').volume = {volume/100.0};"
            with js_output:
                js_output.clear_output(wait=True)
                display(Javascript(js_code))

    # Encode the processed frame as JPEG and update the video widget.
    ret2, buffer = cv2.imencode('.jpg', frame)
    if ret2:
        video_widget.value = buffer.tobytes()

    # Short delay for smoother updating.
    time.sleep(0.001)

cap.release()
print("Processing complete.")


HTML(value='\n<audio id="audioPlayer" src="/content/Osad3eny_HellMakers.Com.mp3" loop controls style="width:10…

Button(description='Start Audio (Click Me)', style=ButtonStyle())

Image(value=b'', format='jpeg')

Output()

Stream ended or no frame captured.
Processing complete.
