In [2]:
import cv2
import mediapipe as mp
import numpy as np
import torch
import torch.nn as nn

In [3]:
# MediaPipe setup
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

In [4]:
# Positional Encoding for Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=500):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x

In [5]:
# Transformer model for pose classification
class PoseTransformer(nn.Module):
    def __init__(self, input_dim, num_exercises, num_heads=8, num_layers=4, dim_feedforward=512, dropout=0.1):
        super(PoseTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, 128)
        self.pos_encoder = PositionalEncoding(128)
        encoder_layers = nn.TransformerEncoderLayer(d_model=128, nhead=num_heads, dim_feedforward=dim_feedforward, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.fc1 = nn.Linear(128, 64)
        self.fc2 = nn.Linear(64, num_exercises)
        self.fc3 = nn.Linear(64, 2)  # 2 classes for each exercise (terminal positions)

    def forward(self, x):
        x = self.embedding(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=0)  # Global average pooling
        x = self.fc1(x)
        exercise_type = self.fc2(x)
        position = self.fc3(x)
        return exercise_type, position

In [6]:
# Function to calculate angle between three points
def calculate_angle(a, b, c):
    a = np.array(a)
    b = np.array(b)
    c = np.array(c)
    
    radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
    angle = np.abs(radians*180.0/np.pi)
    
    if angle > 180.0:
        angle = 360-angle
        
    return angle 


In [7]:
# Function to get joint angles and landmarks
def get_joint_angles(landmarks):
    angles = []
    for lm in landmarks:
        angles.append([lm.x, lm.y, lm.z])
    return np.array(angles).flatten()

In [8]:
# Exercise state class
class ExerciseState:
    def __init__(self):
        self.count = 0
        self.stage = None
        self.previous_stage = None

    def update(self, current_stage):
        if current_stage != self.previous_stage:
            if current_stage == "End" and self.previous_stage == "Start":
                self.count += 1
            self.stage = current_stage
            self.previous_stage = current_stage

In [10]:
#Main function 
def main():
    # Initialize pose estimation model
    pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)

    # Initialize PoseTransformer model (you'll need to load your trained weights here)
    input_dim = 99  # Adjust based on your model
    num_exercises = 5  # Push-up, Pull-up, Sit-up, Jumping Jack, Squat
    model = PoseTransformer(input_dim=input_dim, num_exercises=num_exercises)
    model.eval()  # Set the model to evaluation mode

    # Exercise names and states
    exercise_names = ['Push-up', 'Pull-up', 'Sit-up', 'Jumping Jack', 'Squat']
    exercise_states = {name: ExerciseState() for name in exercise_names}

    # Open webcam
    cap = cv2.VideoCapture(0)

    # For smoothing predictions
    prediction_history = []
    smoothing_window = 5

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break

        # Convert the BGR image to RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Make detection
        results = pose.process(image)

        # Convert the image back to BGR
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        if results.pose_landmarks:
            landmarks = results.pose_landmarks.landmark

            # Get joint angles
            angles = get_joint_angles(landmarks)

            # Predict exercise type and position
            with torch.no_grad():
                angles_tensor = torch.tensor(angles, dtype=torch.float32).unsqueeze(0)
                exercise_type, position = model(angles_tensor)
                predicted_exercise = torch.argmax(exercise_type, dim=0).item()
                predicted_position = torch.argmax(position, dim=0).item()

            # Smooth predictions
            prediction_history.append((predicted_exercise, predicted_position))
            if len(prediction_history) > smoothing_window:
                prediction_history.pop(0)
            
            smoothed_exercise = max(set([p[0] for p in prediction_history]), key=[p[0] for p in prediction_history].count)
            smoothed_position = max(set([p[1] for p in prediction_history]), key=[p[1] for p in prediction_history].count)

            # Update exercise state
            exercise_name = exercise_names[smoothed_exercise]
            position_name = "Start" if smoothed_position == 0 else "End"
            exercise_states[exercise_name].update(position_name)

            # Draw skeleton
            mp_drawing.draw_landmarks(
                image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
                mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
            )

            # Display exercise, position, and count
            cv2.putText(image, f"{exercise_name}: {position_name}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(image, f"Count: {exercise_states[exercise_name].count}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            # Exercise-specific angle calculations and feedback
            if exercise_name == 'Push-up':
                shoulder = [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].x, landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].y]
                elbow = [landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].x, landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].y]
                wrist = [landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].x, landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].y]
                elbow_angle = calculate_angle(shoulder, elbow, wrist)
                cv2.putText(image, f"Elbow Angle: {int(elbow_angle)}", (50, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
                
                if position_name == "End" and elbow_angle > 90:
                    cv2.putText(image, "Go lower!", (50, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            elif exercise_name == 'Squat':
                hip = [landmarks[mp_pose.PoseLandmark.LEFT_HIP.value].x, landmarks[mp_pose.PoseLandmark.LEFT_HIP.value].y]
                knee = [landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value].x, landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value].y]
                ankle = [landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value].x, landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value].y]
                knee_angle = calculate_angle(hip, knee, ankle)
                cv2.putText(image, f"Knee Angle: {int(knee_angle)}", (50, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
                
                if position_name == "End" and knee_angle < 90:
                    cv2.putText(image, "Squat deeper!", (50, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            # Add similar angle calculations and feedback for other exercises

        cv2.imshow('Exercise Pose Correction', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()