In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import cv2
import mediapipe as mp
import numpy as np

In [10]:
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

In [11]:
class ExerciseDataset(Dataset):
    def __init__(self, angles_csv, labels_csv):
        self.angles_data = pd.read_csv(angles_csv)
        self.labels_data = pd.read_csv(labels_csv)
        self.data = pd.merge(self.angles_data, self.labels_data, on="pose_id", how='inner')

        if self.data.empty:
            raise ValueError("Merged dataset is empty. Check if 'pose_id's match between the two CSV files.")
        
        self.labels = self.data['pose'].astype('category').cat.codes
        self.angles = self.data.drop(columns=['pose_id', 'pose']).values

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        angles = self.angles[idx]
        label = self.labels[idx]
        return torch.tensor(angles, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

In [12]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=500):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x

In [13]:
class PoseTransformer(nn.Module):
    def __init__(self, input_dim, num_classes, num_heads=8, num_layers=4, dim_feedforward=512, dropout=0.1):
        super(PoseTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, 128)
        self.pos_encoder = PositionalEncoding(128)
        encoder_layers = nn.TransformerEncoderLayer(d_model=128, nhead=num_heads, dim_feedforward=dim_feedforward, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=0)  # Global average pooling
        x = self.fc(x)
        return x

In [None]:
def calculate_angle(a,b,c):
    a = np.array(a) # First
    b = np.array(b) # Mid
    c = np.array(c) # End
    
    radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
    angle = np.abs(radians*180.0/np.pi)
    
    if angle >180.0:
        angle = 360-angle
        
    return angle 

In [14]:
def get_joint_angles(frame, model, threshold=0.1):
    results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    if results.pose_landmarks:
        landmarks = results.pose_landmarks.landmark
        angles = []

        #sholder/elbow...other parts=[x,y]----in list-X7...list lenght 7...integers in list
        #calculate angles
        if len(angles_flat) != model.embedding.in_features:
            print(f"Error: Expected {model.embedding.in_features} features but got {len(angles_flat)}")
            return None, None, None
        
        # Convert angles to tensor and predict class
        angles_tensor = torch.tensor(angles_flat, dtype=torch.float32).unsqueeze(0)
        with torch.no_grad():
            output = model(angles_tensor)
            predicted_class = torch.argmax(output, dim=1).item()  # Note the change to dim=1 for batch

        return angles_flat, landmarks, predicted_class
    
    return None, None, None


In [None]:
def get_joint_angles(frame, model, threshold=0.1):
    results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    if results.pose_landmarks:
        landmarks = results.pose_landmarks.landmark
        angles = []
        for lm in landmarks:
            angles.append([lm.x, lm.y, lm.z])  # Extract x, y, z for each joint
        angles_flat = np.array(angles).flatten()  # Flatten for model input
        #sholder/elbow...other parts=[x,y]----in list
        #calculate angles
        if len(angles_flat) != model.embedding.in_features:
            print(f"Error: Expected {model.embedding.in_features} features but got {len(angles_flat)}")
            return None, None, None
        
        # Convert angles to tensor and predict class
        angles_tensor = torch.tensor(angles_flat, dtype=torch.float32).unsqueeze(0)
        with torch.no_grad():
            output = model(angles_tensor)
            predicted_class = torch.argmax(output, dim=1).item()  # Note the change to dim=1 for batch

        return angles_flat, landmarks, predicted_class
    
    return None, None, None


In [15]:
def train_model(model, dataloader, criterion, optimizer, num_epochs=25):
    for epoch in range(num_epochs):
        running_loss = 0.0
        for angles, labels in dataloader:
            angles = angles.unsqueeze(1)  # Add sequence dimension if necessary
            optimizer.zero_grad()
            outputs = model(angles)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(dataloader)}')

In [None]:
def main():
    # Step 1: Load dataset for training
    dataset = ExerciseDataset('angles.csv', 'labels.csv')
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

    # Get the actual number of features from the dataset
    input_dim = dataset.angles.shape[1]  # Dynamically determine the number of features
    num_classes = len(set(dataset.labels))  # Number of unique poses (labels)
    
    # Step 2: Initialize model, loss function, and optimizer
    model = PoseTransformer(input_dim=input_dim, num_classes=num_classes)
    model.train()  # Set the model to training mode

    # Define the criterion (loss function) and optimizer
    criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for classification tasks
    optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer with learning rate

    # Step 3: Train the model
    train_model(model, dataloader, criterion, optimizer)

    # Step 4: Real-time Pose Estimation using OpenCV
    cap = cv2.VideoCapture(0)  # Open the webcam
    model.eval()  # Set the model to evaluation mode for inference
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break
        
        # Extract joint angles from the frame and predict pose
        angles, landmarks, predicted_class = get_joint_angles(frame, model)
        
        if angles is not None:
            for idx, lm in enumerate(landmarks):
                # Custom logic for correction
                x, y = int(lm.x * frame.shape[1]), int(lm.y * frame.shape[0])  # Convert normalized coordinates to pixel
                if abs(lm.x - angles[idx]) > 0.1:  # Simple threshold for correction
                    color = (0, 0, 255)  # Red for correction
                else:
                    color = (255, 0, 0)  # Blue for normal

                # Draw the landmark
                cv2.circle(frame, (x, y), 5, color, -1)

            # Display the predicted class
            cv2.putText(frame, f"Predicted Pose: {predicted_class}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

        # Display the video feed with predictions
        cv2.imshow('Pose Estimation', frame)
        
        if cv2.waitKey(10) & 0xFF == ord('q'):  # Press 'q' to quit
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()


In [23]:
dataset = ExerciseDataset('angles.csv', 'labels.csv')
input_dim = dataset.angles.shape[1]
num_classes = len(set(dataset.labels))
print(num_classes)

10


In [32]:
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
for angles,labels in dataloader:
    print(angles)

tensor([[116.0985, 153.5788,  43.7366,  62.5220,  64.5979,  83.4766,  66.8604],
        [149.2540, 151.1995,  28.9656, 166.2928, 139.8292, 100.5508,  86.9703],
        [167.3041, 154.7254,  22.6626, 146.1715, 149.8195, 160.4000, 172.1969],
        [ 33.7499,  90.6573,  19.5284, 169.7759, 162.0188, 118.5745, 135.3569],
        [158.4866, 130.6546,  94.6250, 165.5182, 161.2604, 132.8349, 148.7605],
        [ 92.4622,  93.1420,  23.9375, 140.7638, 141.8538, 125.3733, 126.4127],
        [ 89.6005,  65.0443,  17.5432, 168.3959, 165.2213, 170.0170, 136.3982],
        [ 33.1442,  25.6309,  13.7297, 175.0396, 176.7615,  97.1703,  89.7165],
        [ 42.5508,  36.1167,  23.3634, 162.1152, 150.7434, 111.6996,  95.5859],
        [ 82.2221,  71.3412,  69.9241, 168.2855, 173.1407, 151.7006, 128.9293],
        [ 32.3873,  26.1963,  50.7888, 138.2517, 112.8765, 122.2213, 107.8516],
        [169.0215, 172.6735,  46.4446, 126.6218, 128.3380, 150.5824, 152.1771],
        [  8.1705,  64.1727,  37.6705, 1