### Imports & Setup

In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import math
import glob
from ultralytics import YOLO
from collections import defaultdict

# Paths and models
frame_folder = "video_frames"  
output_folder = "angle_overlay_frames"
classification_model = YOLO("runs/detect/train11/weights/best.pt")
pose_model = YOLO("yolo11s-pose.pt")

print("Models loaded successfully!")


Models loaded successfully!


### Define path to video. If you want to use a different video, change the path. I provided two videos for convenience

In [14]:
video_path = "video15.mp4"

### Define function to extract frames from video if not already extracted

In [15]:
def extract_if_needed(video_path, frame_folder, frame_rate=5):
    existing_frames = sorted(glob.glob(os.path.join(frame_folder, "frame_*.jpg")))

    # Quick check: if we already have the correct number of frames, skip
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    expected_frames = math.ceil(total_frames / frame_rate)

    if len(existing_frames) == expected_frames:
        print("Frames already extracted. Skipping.")
        cap.release()
        return

    print("Mismatch detected or missing frames. Re-extracting...")

    # Delete existing frames
    for f in existing_frames:
        os.remove(f)

    # Re-extract
    frame_id = 0
    saved_id = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if frame_id % frame_rate == 0:
            cv2.imwrite(f"{frame_folder}/frame_{saved_id}.jpg", frame)
            saved_id += 1
        frame_id += 1
    cap.release()


### Extract frames from video if doesn't exist in video_frames folder

In [16]:
extract_if_needed(video_path, frame_folder)

Mismatch detected or missing frames. Re-extracting...


### Run Classification on Frames and determine max confidence

In [17]:
# Classify frames and gather confidence scores
class_confidence = defaultdict(list)

for img in os.listdir(frame_folder):
    img_path = os.path.join(frame_folder, img)
    results = classification_model(img_path)
    for r in results:
        if r.boxes is not None and len(r.boxes.cls) > 0:
            for cls_id, conf in zip(r.boxes.cls, r.boxes.conf):
                class_name = classification_model.names[int(cls_id)]
                class_confidence[class_name].append(float(conf))

# Determine max confidence per class
max_confidence = {cls: max(confs) for cls, confs in class_confidence.items() if confs}



image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_0.jpg: 384x640 1 deadlift, 1 hip thrust, 1 push up, 359.6ms
Speed: 4.5ms preprocess, 359.6ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_1.jpg: 384x640 1 deadlift, 1 hip thrust, 1 push up, 9.2ms
Speed: 1.7ms preprocess, 9.2ms inference, 3.2ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_10.jpg: 384x640 1 push up, 13.7ms
Speed: 2.4ms preprocess, 13.7ms inference, 2.8ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_100.jpg: 384x640 1 push up, 9.0ms
Speed: 2.1ms preprocess, 9.0ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone

### Get predicted Class based on max confidence

In [18]:
if max_confidence:
    best_class = max(max_confidence, key=max_confidence.get)
    best_score = max_confidence[best_class]
    print(f"Predicted Class: {best_class} with max confidence: {best_score:.2f}")
else:
    best_class, best_score = None, None
    print("No predictions found.")


Predicted Class: push up with max confidence: 0.91


### Analysis:
- The two videos tested here are videos that have never been seen by model
- The pipeline I structured here classified both videos accurately as a push-up
- Using max confidence to classify exercises improves the prediction accuracy 

### Skip or Trigger Pose Estimation

In [19]:
if best_class.lower() == "push up":
    flag_pushup = True
    print("Push-up detected — Running pose estimation...")
else:
    print("Not a push-up or low confidence — skipping pose estimation.")


Push-up detected — Running pose estimation...


### Determine Most Visible Arm

In [20]:
if flag_pushup:
    
    left_visible_count = 0
    right_visible_count = 0

    for img_name in sorted(os.listdir(frame_folder)):
        img_path = os.path.join(frame_folder, img_name)
        results = pose_model(img_path)

        for r in results:
            if r.keypoints is None:
                continue
            conf = r.keypoints.conf[0]
            left_visible = all(conf[i] > 0.5 for i in [5, 7, 9])
            right_visible = all(conf[i] > 0.5 for i in [6, 8, 10])
            if left_visible: left_visible_count += 1
            if right_visible: right_visible_count += 1

    if right_visible_count >= left_visible_count:
        side = "right"
    else:
        side = "left"
        
    print(f"Using {side} arm for angle tracking.")



image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_0.jpg: 384x640 1 person, 240.7ms
Speed: 4.1ms preprocess, 240.7ms inference, 5.6ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_1.jpg: 384x640 1 person, 90.0ms
Speed: 3.8ms preprocess, 90.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_10.jpg: 384x640 1 person, 13.5ms
Speed: 1.7ms preprocess, 13.5ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_100.jpg: 384x640 1 person, 13.1ms
Speed: 1.6ms preprocess, 13.1ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_101.jpg: 384x640 1 perso

### Remove angle_overlay frames if they don't match ones in video_frames folder

In [21]:

input_frames = sorted(glob.glob(os.path.join(frame_folder, "frame_*.jpg")))
output_frames = sorted(glob.glob(os.path.join(output_folder, "frame_*.jpg")))

overlay_flag = False
if len(input_frames) != len(output_frames):
    overlay_flag = True
    print("Mismatch in frame count. Deleting output frames...")
    for f in output_frames:
        os.remove(f)
else:
    print("Frame counts match. No need to delete output frames.")

Mismatch in frame count. Deleting output frames...


### Define Angle Function

In [22]:
def calculate_angle(a, b, c):
    a, b, c = np.array(a), np.array(b), np.array(c)
    ba = a - b
    bc = c - b
    cosine = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    angle = np.arccos(np.clip(cosine, -1.0, 1.0))
    return np.degrees(angle)


### Track the change in angle for each push up frame

In [23]:

# Choose which arm to track (use code from earlier to auto-select if needed)
if side == "right":
    k_idx = [6, 8, 10]
else:
    k_idx = [5, 7, 9]


angle_tracking = []

# Process each frame
if overlay_flag:
    for img_name in sorted(os.listdir(frame_folder)):
        img_path = os.path.join(frame_folder, img_name)
        frame = cv2.imread(img_path)
        results = pose_model(img_path)

        for r in results:
            if r.keypoints is None:
                continue

            conf = r.keypoints.conf[0]
            xy = r.keypoints.xy[0]

            if all(conf[i] > 0.5 for i in k_idx):
                a = xy[k_idx[0]].cpu().numpy()
                b = xy[k_idx[1]].cpu().numpy()
                c = xy[k_idx[2]].cpu().numpy()
                angle = calculate_angle(a, b, c)
                angle_tracking.append(angle)
                # Draw keypoints
                for point in [a, b, c]:
                    cv2.circle(frame, tuple(map(int, point)), 6, (0, 255, 0), -1)

                # Draw lines connecting keypoints
                cv2.line(frame, tuple(map(int, a)), tuple(map(int, b)), (255, 0, 0), 2)
                cv2.line(frame, tuple(map(int, b)), tuple(map(int, c)), (255, 0, 0), 2)

                # Annotate angle
                angle_text = f"{int(angle)} deg"
                pos = tuple(map(int, b))
                cv2.putText(frame, angle_text, (pos[0] + 10, pos[1] - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)

        # Save annotated frame
        out_path = os.path.join(output_folder, img_name)
        cv2.imwrite(out_path, frame)

    print(f"Annotated frames with angles saved to: {output_folder}")
else:
    print("No overlay needed. Skipping angle annotation. ")





image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_0.jpg: 384x640 1 person, 169.2ms
Speed: 14.7ms preprocess, 169.2ms inference, 8.1ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_1.jpg: 384x640 1 person, 75.6ms
Speed: 3.3ms preprocess, 75.6ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_10.jpg: 384x640 1 person, 10.0ms
Speed: 1.9ms preprocess, 10.0ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_100.jpg: 384x640 1 person, 12.0ms
Speed: 1.6ms preprocess, 12.0ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 f:\LSU_Fullstack_AI_Machine_Learning_Course\capstone_project\video_frames\frame_101.jpg: 384x640 1 perso

In [24]:
# Step 2: Rep counting logic
reps = 0
down = False
for angle in angle_tracking:
    if angle < 90:         # Down position
        down = True
    elif angle > 150 and down:  # Back to up from down
        reps += 1
        down = False

print(f"Total push-up reps counted: {reps}")

Total push-up reps counted: 8


### Analysis:
- The two videos that I have in project's directory have been tested
- The rep counting works on both videos but with some limitations
- The calculated number of push-ups in the istockphoto-2154152195-640_adpp_is.mp4 video is 2 which reflects reality
- The calculated number of push-ups in the video15.mp4 video is 8 when in reality is 5. This suggests that the rep counting logic needs more refinement and complexity
