In [1]:
import cv2
import mediapipe as mp
import numpy as np
import google.generativeai as genai
import time
import os
from dotenv import load_dotenv
load_dotenv('.env')

mp_drawing = mp.solutions.drawing_utils # Add landmarks and drawing elements to video
mp_pose = mp.solutions.pose             # Model to detect poses (pose estimation model)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def left_arm_bicep_curl(landmarks, frame, height, width):
    # Get shoulder and wrist coordinates
    left_shoulder = landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value]
    left_hip = landmarks[mp_pose.PoseLandmark.LEFT_HIP.value]
    left_wrist = landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value]

    shoulder_coords = (int(left_shoulder.x * width), int(left_shoulder.y * height))
    hip_coords = (int(left_hip.x * width), int(left_hip.y * height))
    wrist_coords = (int(left_wrist.x * width), int(left_wrist.y * height))

    # Define target zones (top and bottom relative to shoulder)
    top_target = (shoulder_coords[0], shoulder_coords[1] + 50)
    bottom_target = (hip_coords[0] + 30, hip_coords[1] - 10)

    # Draw target circles
    cv2.circle(frame, top_target, 25, (0, 255, 0), 2)
    cv2.circle(frame, bottom_target, 25, (0, 0, 255), 2)

    # Draw wrist point
    cv2.circle(frame, wrist_coords, 8, (255, 255, 255), -1)

    mapping_dict = {'left_wrist': wrist_coords,
                    'right_wrist': None,
                    'left_top_zone': top_target,
                    'right_top_zone': None,
                    'left_bottom_zone': bottom_target,
                    'right_bottom_zone': None}

    return mapping_dict    # wrist_coords, top_target, bottom_target

In [3]:
def right_arm_bicep_curl(landmarks, frame, height, width):
    # Get shoulder and wrist coordinates
    right_shoulder = landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
    right_hip = landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value]
    right_wrist = landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value]

    shoulder_coords = (int(right_shoulder.x * width), int(right_shoulder.y * height))
    hip_coords = (int(right_hip.x * width), int(right_hip.y * height))
    wrist_coords = (int(right_wrist.x * width), int(right_wrist.y * height))

    # Define target zones (top and bottom relative to shoulder)
    top_target = (shoulder_coords[0] - 10, shoulder_coords[1] + 30)
    bottom_target = (hip_coords[0] - 50, hip_coords[1] - 10)

    # Draw target circles
    cv2.circle(frame, top_target, 25, (0, 255, 0), 2)
    cv2.circle(frame, bottom_target, 25, (0, 0, 255), 2)

    # Draw wrist point
    cv2.circle(frame, wrist_coords, 8, (255, 255, 255), -1)

    mapping_dict = {'left_wrist': None,
                    'right_wrist': wrist_coords,
                    'left_top_zone': None,
                    'right_top_zone': top_target,
                    'left_bottom_zone': None,
                    'right_bottom_zone': bottom_target}

    return mapping_dict     # wrist_coords, top_target, bottom_target

In [4]:
def both_arms_lateral_raise(landmarks, frame, height, width):
    left_shoulder = landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value]
    left_hip = landmarks[mp_pose.PoseLandmark.LEFT_HIP.value]
    left_wrist = landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value]

    right_shoulder = landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
    right_hip = landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value]
    right_wrist = landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value]

    left_shoulder_coords = (int(left_shoulder.x * width), int(left_shoulder.y * height))
    left_hip_coords = (int(left_hip.x * width), int(left_hip.y * height))
    left_wrist_coords = (int(left_wrist.x * width), int(left_wrist.y * height))

    right_shoulder_coords = (int(right_shoulder.x * width), int(right_shoulder.y * height))
    right_hip_coords = (int(right_hip.x * width), int(right_hip.y * height))
    right_wrist_coords = (int(right_wrist.x * width), int(right_wrist.y * height))

    # Define target zones: lateral raise is outward to shoulder height
    left_top_zone = (left_shoulder_coords[0] + 100, left_shoulder_coords[1] - 80)       # To the left, shoulder height
    left_bottom_zone = (left_hip_coords[0] + 30, left_hip_coords[1] - 10)             # Below waist

    right_top_zone = (right_shoulder_coords[0] - 100, right_shoulder_coords[1] - 80)    # To the right, shoulder height
    right_bottom_zone = (right_hip_coords[0] - 20, right_hip_coords[1] + 30)

    # Draw zones and wrists
    cv2.circle(frame, left_top_zone, 25, (0, 255, 0), 2)
    cv2.circle(frame, left_bottom_zone, 25, (0, 0, 255), 2)
    cv2.circle(frame, right_top_zone, 25, (0, 255, 0), 2)
    cv2.circle(frame, right_bottom_zone, 25, (0, 0, 255), 2)

    cv2.circle(frame, left_wrist_coords, 8, (255, 255, 255), -1)
    cv2.circle(frame, right_wrist_coords, 8, (255, 255, 255), -1)

    return {
        'left_wrist': left_wrist_coords,
        'right_wrist': right_wrist_coords,
        'left_top_zone': left_top_zone,
        'right_top_zone': right_top_zone,
        'left_bottom_zone': left_bottom_zone,
        'right_bottom_zone': right_bottom_zone
    }

In [None]:
import cv2
import mediapipe as mp
import numpy as np

# Setup
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()
mp_draw = mp.solutions.drawing_utils

# Video capture
cap = cv2.VideoCapture(0)

# Curl counter variables
counter = 0
stage = None  # Can be "up" or "down"

def calculate_distance(p1, p2):
    return np.linalg.norm(np.array(p1) - np.array(p2))

def is_inside_zone(wrist, target, radius=30):
    if wrist is None or target is None:
        return False  # Can't be inside a zone if wrist or target is missing
    return calculate_distance(wrist, target) < radius

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break

    # Flip and convert color
    frame = cv2.flip(frame, 1)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Pose detection
    results = pose.process(rgb_frame)

    if results.pose_landmarks:
        mp_draw.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
        landmarks = results.pose_landmarks.landmark

        h, w, _ = frame.shape

        # wrist_coords, top_target, bottom_target = both_arms_bicep_curl(landmarks, frame, h, w)

        mapping = left_arm_bicep_curl(landmarks, frame, h, w)

        left_wrist = mapping['left_wrist']
        right_wrist = mapping['right_wrist']
        left_top = mapping['left_top_zone']
        right_top = mapping['right_top_zone']
        left_bottom = mapping['left_bottom_zone']
        right_bottom = mapping['right_bottom_zone']


        # Left arm logic
        left_down = is_inside_zone(left_wrist, left_bottom)
        left_up = is_inside_zone(left_wrist, left_top)

        if left_down and stage != 'down':
            stage = 'down'

        elif stage == 'down' and left_up:
            counter += 1
            stage = 'up'

        # Right arm logic
        right_down = is_inside_zone(right_wrist, right_bottom)
        right_up = is_inside_zone(right_wrist, right_top)

        if right_down and stage != 'down':
            stage = 'down'

        elif stage == 'down' and right_up:
            counter += 1
            stage = 'up'

        # Display counter
        cv2.putText(frame, f'Reps: {counter}', (10, 40), cv2.FONT_HERSHEY_SIMPLEX,
                    1, (255, 255, 255), 2, cv2.LINE_AA)

    cv2.imshow('Bicep Curl Counter', frame)
    if cv2.waitKey(5) & 0xFF == 27:  # Esc to exit
        break

cap.release()
cv2.destroyAllWindows()


### Baseline video capture code

In [2]:
capture = cv2.VideoCapture(0)

while capture.isOpened():
    # Read current frame and display
    _, frame = capture.read()
    cv2.imshow(winname='Workout Window', mat=frame)

    # Define wait key for infinite amount of time until keystroke "q" is entered
    key = cv2.waitKey(10)
    if key == ord('q'):
        # print('Stopping live feed.')
        break

capture.release()
cv2.destroyAllWindows()

Stopping live feed.


### Making detections

In [6]:
capture = cv2.VideoCapture(0)

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while capture.isOpened():
        # Read current frame and display
        _, frame = capture.read()
        
        # Convert frame from BGR to RGB
        cvt_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Set converted frame flags to False (save memory when pass to model)
        cvt_frame.flags.writeable = False

        # Make detection predictions
        detections = pose.process(cvt_frame)

        # Set frame memory back to True and use BGR frame again for display
        cvt_frame.flags.writeable = True

        # Extract detection landmarks from results
        pose_landmarks = detections.pose_landmarks

        mp_drawing.draw_landmarks(frame, pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=2),
                                mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

        cv2.imshow(winname='Workout Window', mat=frame)


        # Define wait key for infinite amount of time until keystroke "q" is entered
        key = cv2.waitKey(10)
        if key == ord('q'):
            # print('Stopping live feed.')
            break

capture.release()
cv2.destroyAllWindows()

Stopping live feed.


### Determining joints

In [2]:
capture = cv2.VideoCapture(0)
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # Or 'mp4v' for .mp4 files
out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640, 480))



with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while capture.isOpened():
        # Read current frame and display
        _, frame = capture.read()
        
        # Convert frame from BGR to RGB
        cvt_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Set converted frame flags to False (save memory when pass to model)
        cvt_frame.flags.writeable = False

        # Make detection predictions
        detections = pose.process(cvt_frame)

        # Set frame memory back to True and use BGR frame again for display
        cvt_frame.flags.writeable = True

        # Define detection landmarks from results
        pose_landmarks = detections.pose_landmarks

        # Explicitly extract landmarks (sometimes landmark attribute cannot be obtained, hence try-except block)
        try:
            landmarks = pose_landmarks.landmark
        except:
            pass

        # Plot out landmarks and connections to frame
        mp_drawing.draw_landmarks(frame, pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=2),
                                mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

        out.write(frame)  
        cv2.imshow(winname='Workout Window', mat=frame)


        # Define wait key for infinite amount of time until keystroke "q" is entered
        key = cv2.waitKey(10)
        if key == ord('q'):
            # print('Stopping live feed.')
            break

capture.release()
out.release()
cv2.destroyAllWindows()

Stopping live feed.


In [5]:
# To identify all 33 possible landmarks in Pose model
pose_landmarks_idx_dict = {i: landmark for i, landmark in enumerate(mp_pose.PoseLandmark)}
pose_landmarks_idx_dict

{0: <PoseLandmark.NOSE: 0>,
 1: <PoseLandmark.LEFT_EYE_INNER: 1>,
 2: <PoseLandmark.LEFT_EYE: 2>,
 3: <PoseLandmark.LEFT_EYE_OUTER: 3>,
 4: <PoseLandmark.RIGHT_EYE_INNER: 4>,
 5: <PoseLandmark.RIGHT_EYE: 5>,
 6: <PoseLandmark.RIGHT_EYE_OUTER: 6>,
 7: <PoseLandmark.LEFT_EAR: 7>,
 8: <PoseLandmark.RIGHT_EAR: 8>,
 9: <PoseLandmark.MOUTH_LEFT: 9>,
 10: <PoseLandmark.MOUTH_RIGHT: 10>,
 11: <PoseLandmark.LEFT_SHOULDER: 11>,
 12: <PoseLandmark.RIGHT_SHOULDER: 12>,
 13: <PoseLandmark.LEFT_ELBOW: 13>,
 14: <PoseLandmark.RIGHT_ELBOW: 14>,
 15: <PoseLandmark.LEFT_WRIST: 15>,
 16: <PoseLandmark.RIGHT_WRIST: 16>,
 17: <PoseLandmark.LEFT_PINKY: 17>,
 18: <PoseLandmark.RIGHT_PINKY: 18>,
 19: <PoseLandmark.LEFT_INDEX: 19>,
 20: <PoseLandmark.RIGHT_INDEX: 20>,
 21: <PoseLandmark.LEFT_THUMB: 21>,
 22: <PoseLandmark.RIGHT_THUMB: 22>,
 23: <PoseLandmark.LEFT_HIP: 23>,
 24: <PoseLandmark.RIGHT_HIP: 24>,
 25: <PoseLandmark.LEFT_KNEE: 25>,
 26: <PoseLandmark.RIGHT_KNEE: 26>,
 27: <PoseLandmark.LEFT_ANKLE: 27

In [3]:
api_key = os.getenv("GEMINI_API_KEY")  # Replace with your API key or use environment variable
genai.configure(api_key=api_key)

In [9]:
video_file_path = "output.avi"  # Replace with the path to your video file
display_name = "sample_video"  # Unique display name for the uploaded file

In [10]:
def upload_and_process_video(file_path, display_name):
    # print("Uploading video file...")
    try:
        video_file = genai.upload_file(path=file_path, display_name=display_name, mime_type="video/mp4")
        # print(f"Completed upload: {video_file.uri}")

        # Check the state of the uploaded file
        while video_file.state.name == "PROCESSING":
            # print(".", end="", flush=True)
            time.sleep(10)
            video_file = genai.get_file(video_file.name)

        if video_file.state.name == "FAILED":
            raise ValueError("Video processing failed.")

        return video_file
    except Exception as e:
        # print(f"Error uploading video: {e}")
        return None

In [None]:
def generate_content_from_video(video_file, prompt):
    try:
        model = genai.GenerativeModel(model_name="gemini-1.5-flash")
        # print("\nMaking LLM inference request...")
        response = model.generate_content([video_file, prompt], request_options={"timeout": 600})
        return response.text
    except Exception as e:
        # print(f"Error generating content: {e}")
        return None

In [None]:

# Check if the file exists
if not os.path.exists(video_file_path):
    # print(f"Video file {video_file_path} not found.")


# Load in system prompt
with open('livefeed-sys-prompt.txt', 'r') as f:
    system_prompt = f.read()

# Check if the file is already uploaded
try:
    file_list = genai.list_files(page_size=100)
    video_file = next((f for f in file_list if f.display_name == display_name), None)

    if video_file:
        # print(f"Using existing file: {video_file.uri}")
    else:
        video_file = upload_and_process_video(video_file_path, display_name)

    if video_file:
        response_text = generate_content_from_video(video_file, system_prompt)

        if response_text:
            # print("\nGenerated Response:")
            # print(response_text)
        else:
            # print("Failed to generate response.")
    else:
        # print("Failed to upload or process video.")
except Exception as e:
    # print(f"Error: {e}")

Using existing file: https://generativelanguage.googleapis.com/v1beta/files/9jljvgtbks69

Making LLM inference request...

Generated Response:
The video shows a person performing bicep curls with a dumbbell.  While the video quality is low, several aspects of their form could be improved for better results and injury prevention:

**Issues and Improvements:**

* **Range of Motion:** The person's range of motion appears somewhat limited.  They should strive to fully extend their arm at the bottom of the curl (without locking the elbow) and bring the dumbbell up towards their shoulders, feeling a full contraction in the bicep.  A more complete range of motion maximizes muscle activation.

* **Swinging:** There's a noticeable amount of body movement and swinging, particularly in the torso. This reduces the effectiveness of the exercise by using momentum instead of muscle strength.  To correct this, they need to maintain a stable core and avoid any significant body movement during the lift.

In [2]:
from elevenlabs.client import ElevenLabs
from elevenlabs import play
import os
from dotenv import load_dotenv

load_dotenv('.env')

True

In [None]:
generated_content = """The video shows a person performing bicep curls with a dumbbell.  While the video quality is low, several aspects of their form could be improved for better results and injury prevention:

**Issues and Improvements:** 

* **Range of Motion:** The person's range of motion appears somewhat limited.  They should strive to fully extend their arm at the bottom of the curl (without locking the elbow) and bring the dumbbell up towards their shoulders, feeling a full contraction in the bicep.  A more complete range of motion maximizes muscle activation.

* **Swinging:** There's a noticeable amount of body movement and swinging, particularly in the torso. This reduces the effectiveness of the exercise by using momentum instead of muscle strength.  To correct this, they need to maintain a stable core and avoid any significant body movement during the lift.  Focus should be on controlled movement, using only the bicep to lift the weight.

* **Elbow Position:**  It's difficult to assess completely from the angle, but the elbow appears to move slightly during the lift.  Keeping the elbows pinned to the sides throughout the exercise is crucial for isolating the bicep muscles and minimizing strain on other joints (shoulders, wrists).

* **Speed of Movement:** The curls seem to be performed relatively quickly.  Slower, controlled movements allow for better muscle engagement and reduce the risk of injury.  A good tempo would be to take approximately 2-3 seconds to raise the weight and 2-3 seconds to lower it.

* **Weight Selection:**  It is hard to tell from the video, but if the weight is too light, they won't gain strength and hypertrophy.  If they're swinging or using poor form to lift the weight, it is too heavy and needs to be decreased.
"""

In [6]:
elevenlabs_apikey = os.environ['ELEVENLABS_APIKEY']

In [2]:
import requests

In [None]:
import requests

api_key = elevenlabs_apikey
voice_id = '2EiwWnXFnvU5JabPnv8n'
text = "The video shows a person performing bicep curls with a dumbbell.  While the video quality is low, several aspects of their form could be improved for better results and injury prevention."

headers = {
    'xi-api-key': api_key,
    'Content-Type': 'application/json'
}

payload = {
    "text": text,
    "voice_settings": {
        "stability": 0.75,
        "similarity_boost": 0.75
    }
}

response = requests.post(
    f'https://api.elevenlabs.io/v1/text-to-speech/{voice_id}',
    headers=headers,
    json=payload,
    stream=True
)


In [14]:
with open("tts_output.wav", "wb") as f:
    for chunk in response.iter_content(chunk_size=1024):
        if chunk:
            f.write(chunk)

In [7]:
resp = requests.get('https://api.elevenlabs.io/v2/voices', headers={'xi-api-key': elevenlabs_apikey})

In [8]:
content = resp.json()

In [9]:
for voice in content['voices']:
    # print(" | ".join((voice['name'], voice['voice_id'], voice['labels']['gender'])))


    

Rachel | 21m00Tcm4TlvDq8ikWAM | female
Drew | 29vD33N1CtxCmqQRPOHJ | male
Clyde | 2EiwWnXFnvU5JabPnv8n | male
Paul | 5Q0t7uMcjvnagumLfvZi | male
Aria | 9BWtsMINqrJLrRacOk9x | female
Domi | AZnzlk1XvdvUeBnXmlld | female
Dave | CYw3kZ02Hs0563khs1Fj | male
Roger | CwhRBWXzGAHq8TQ4Fs17 | male
Fin | D38z5RcWu1voky8WS1ja | male
Sarah | EXAVITQu4vr4xnSDxMaL | female


In [10]:
elevenlabs = ElevenLabs(
  api_key=elevenlabs_apikey,
)
audio = elevenlabs.text_to_speech.convert(
    text="The video shows a person performing bicep curls with a dumbbell.  While the video quality is low, several aspects of their form could be improved for better results and injury prevention",
    voice_id="JBFqnCBsd6RMkjVDRZzb",
    model_id="eleven_multilingual_v2",
    output_format="mp3_44100_128",
    
)

In [11]:
play(audio)

In [None]:
# Getting coordinates of specific landmarks
corresponding_idx = mp_pose.PoseLandmark.LEFT_SHOULDER.value
landmarks[corresponding_idx]

x: 1.11685908
y: 1.38789415
z: -0.800803661
visibility: 0.38747403

In [17]:
# Getting all coordinates for right arm (RIGHT_SHOULDER, RIGHT_ELBOW, RIGHT_WRIST)
right_shoulder_idx = mp_pose.PoseLandmark.RIGHT_SHOULDER.value
right_elbow_idx = mp_pose.PoseLandmark.RIGHT_ELBOW.value
right_wrist_idx = mp_pose.PoseLandmark.RIGHT_WRIST.value

for i in [right_shoulder_idx, right_elbow_idx, right_wrist_idx]:
    # print(landmarks[i])


x: 0.482961506
y: 1.42339432
z: -0.92957592
visibility: 0.492494792

x: 0.48836118
y: 1.41965485
z: -1.3281703
visibility: 0.116792232

x: 0.772435248
y: 0.90872395
z: -1.49982941
visibility: 0.037862245



### Calculating angles

In [10]:
def both_arms_bicep_curl(landmarks, frame, height, width):
    # Get left shoulder and wrist coordinates
    left_shoulder = landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value]
    left_hip = landmarks[mp_pose.PoseLandmark.LEFT_HIP.value]
    left_wrist = landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value]

    # Get right shoulder and wrist coordinates
    right_shoulder = landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
    right_hip = landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value]
    right_wrist = landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value]

    left_shoulder_coords = (int(left_shoulder.x * width), int(left_shoulder.y * height))
    left_hip_coords = (int(left_hip.x * width), int(left_hip.y * height))
    left_wrist_coords = (int(left_wrist.x * width), int(left_wrist.y * height))

    right_shoulder_coords = (int(right_shoulder.x * width), int(right_shoulder.y * height))
    right_hip_coords = (int(right_hip.x * width), int(right_hip.y * height))
    right_wrist_coords = (int(right_wrist.x * width), int(right_wrist.y * height))

    # Define target zones (top and bottom relative to shoulder)
    left_top_target = (left_shoulder_coords[0] - 10, left_shoulder_coords[1] + 30)
    left_bottom_target = (left_hip_coords[0] - 50, left_hip_coords[1] - 10)

    right_top_target = (right_shoulder_coords[0], right_shoulder_coords[1] + 50)
    right_bottom_target = (right_hip_coords[0] + 30, right_hip_coords[1] - 10)

    # Draw target circles
    cv2.circle(frame, left_top_target, 25, (0, 255, 0), 2)
    cv2.circle(frame, left_bottom_target, 25, (0, 0, 255), 2)
    cv2.circle(frame, right_top_target, 25, (0, 255, 0), 2)
    cv2.circle(frame, right_bottom_target, 25, (0, 0, 255), 2)

    # Draw wrist point
    cv2.circle(frame, left_wrist_coords, 8, (255, 255, 255), -1)
    cv2.circle(frame, right_wrist_coords, 8, (255, 255, 255), -1)

    mapping_dict = {'left_wrist': left_wrist_coords,
                    'right_wrist': right_wrist_coords,
                    'left_top_zone': left_top_target,
                    'right_top_zone': right_top_target,
                    'left_bottom_zone': left_bottom_target,
                    'right_bottom_zone': right_bottom_target}

    return mapping_dict     # (left_wrist_coords, right_wrist_coords), (left_top_target, right_top_target), (left_bottom_target, right_bottom_target)

Game mode code

I0000 00:00:1749954707.136996 12425583 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1749954707.242443 12438987 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1749954707.257650 12438991 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [12]:
def calculate_angle(a, b, c):
    a = np.array(a)     # First point
    b = np.array(b)     # Middle point
    c = np.array(c)     # End point

    radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
    degrees = np.abs(radians * 180 / np.pi)

    if degrees > 180.0:
        degrees = 360 - degrees

    return degrees

In [9]:
right_shoulder = [landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].y]
right_elbow = [landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value].y]
right_wrist = [landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value].y]
right_hip = [landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value].y]

In [10]:
# print(calculate_angle(right_shoulder, right_elbow, right_wrist))
# print(calculate_angle(right_hip, right_shoulder, right_elbow))

165.23998737497607
20.19538022568069


In [13]:
capture = cv2.VideoCapture(0)
# Optional: Set frame dimensions
# capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
# capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    # print("Starting pose detection... Press 'q' to quit")
    
    while capture.isOpened():
        # Read current frame
        ret, frame = capture.read()
        
        if not ret:
            # print("Error: Could not read frame")
            break
        
        # Convert frame from BGR to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Set frame flags to False (optimization)
        rgb_frame.flags.writeable = False

        # Make detection predictions
        results = pose.process(rgb_frame)

        # Set frame memory back to True
        rgb_frame.flags.writeable = True

        try:
            landmarks = results.pose_landmarks.landmark

            # Get x,y coordinates of points (normalized 0-1)
            left_shoulder = [
                landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].x, 
                landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].y
            ]
            left_elbow = [
                landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].x, 
                landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].y
            ]
            left_wrist = [
                landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].x, 
                landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].y
            ]

            # Get actual frame size
            h, w, _ = frame.shape
            
            # Calculate angle between points
            angle = calculate_angle(left_shoulder, left_elbow, left_wrist)

            # Display angle on frame (use original BGR frame for display)
            cv2.putText(frame, 
                        f'Angle: {angle:.1f}', 
                        tuple(np.multiply(left_elbow, [w, h]).astype(int)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2, cv2.LINE_AA)
            
            # # print shoulder coordinates (this should now work)
            # print(f"Left shoulder: {left_shoulder}")
            # print(f"Angle: {angle:.1f} degrees")

        except Exception as e:
            # print(f"Error processing landmarks: {e}")

        # Draw landmarks and connections on the original BGR frame
        mp_drawing.draw_landmarks(
            frame, 
            results.pose_landmarks, 
            mp_pose.POSE_CONNECTIONS,
            mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=2),
            mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
        )

        # Display the frame
        cv2.imshow('Workout Window', frame)

        # Check for 'q' key press
        key = cv2.waitKey(10) & 0xFF
        if key == ord('q'):
            # print('Stopping live feed.')
            break

# Clean up
capture.release()
cv2.destroyAllWindows()
# print("Program ended.")

Starting pose detection... Press 'q' to quit
Error processing landmarks: 'NoneType' object has no attribute 'landmark'
Error processing landmarks: 'NoneType' object has no attribute 'landmark'
Error processing landmarks: 'NoneType' object has no attribute 'landmark'
Error processing landmarks: 'NoneType' object has no attribute 'landmark'
Error processing landmarks: 'NoneType' object has no attribute 'landmark'
Error processing landmarks: 'NoneType' object has no attribute 'landmark'
Error processing landmarks: 'NoneType' object has no attribute 'landmark'
Error processing landmarks: 'NoneType' object has no attribute 'landmark'
Error processing landmarks: 'NoneType' object has no attribute 'landmark'
Error processing landmarks: 'NoneType' object has no attribute 'landmark'
Error processing landmarks: 'NoneType' object has no attribute 'landmark'
Error processing landmarks: 'NoneType' object has no attribute 'landmark'
Error processing landmarks: 'NoneType' object has no attribute 'lan

In [2]:
file_list = genai.list_files(page_size=100)

In [5]:
for item in file_list:
    # print(item.display_name)

In [7]:
video_file = next((f for f in file_list if f.display_name == display_name), None)

In [8]:
video_file

In [6]:
display_name = 'workout_recording_1749897946.mp4'