In [1]:
from ultralytics import YOLO
import cv2
import os
import pandas as pd
import random

# Load YOLOv8n-pose model
model = YOLO('yolov8n-pose.pt')

# Paths
data_root = r'C:/Users/Kassimi/OneDrive/Bureau/cv_data/pushups'
folders = ['correct sequence', 'wrong sequence']
augmented_folders = {'correct sequence': 'aug_correct', 'wrong sequence': 'aug_wrong'}

# Create folders for augmented videos
for folder in augmented_folders.values():
    os.makedirs(os.path.join(data_root, folder), exist_ok=True)

# Augmentation function
def augment_frame(frame):
    if random.random() < 0.5:
        frame = cv2.flip(frame, 1)  # Horizontal flip
    alpha = random.uniform(0.8, 1.2)  # Contrast
    beta = random.randint(-30, 30)   # Brightness
    frame = cv2.convertScaleAbs(frame, alpha=alpha, beta=beta)
    return frame

#  Create augmented videos
print("🎥 Augmenting and saving videos...")
for label in folders:
    input_folder = os.path.join(data_root, label)
    output_folder = os.path.join(data_root, augmented_folders[label])

    for video_file in os.listdir(input_folder):
        if not video_file.endswith(('.mp4', '.avi', '.mov')):
            continue

        input_path = os.path.join(input_folder, video_file)
        output_path = os.path.join(output_folder, f"aug_{video_file}")

        cap = cv2.VideoCapture(input_path)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            aug_frame = augment_frame(frame)
            out.write(aug_frame)

        cap.release()
        out.release()

print("✅ Augmented videos saved.\n")

#  Extract frames and keypoints from both original and augmented videos
print("📸 Extracting frames and pose keypoints...")

output_data = []

# Process both original and augmented videos
for label in folders + list(augmented_folders.values()):  # Including augmented folders
    folder_path = os.path.join(data_root, label)

    for video_file in os.listdir(folder_path):
        if not video_file.endswith(('.mp4', '.avi', '.mov')):
            continue

        video_path = os.path.join(folder_path, video_file)
        cap = cv2.VideoCapture(video_path)
        frame_index = 0

        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Pose estimation
            results = model.predict(frame, save=False, verbose=False)
            keypoints = results[0].keypoints.xy.cpu().numpy()

            if len(keypoints) == 0:
                frame_index += 1
                continue

            # Take only the first person detected
            person_keypoints = keypoints[0].flatten()  # shape: (34,)
            row = [video_file, frame_index] + list(person_keypoints) + [label]
            output_data.append(row)

            frame_index += 1
        cap.release()

# Save to CSV
columns = ['video_id', 'frame'] + [f'kp_{i}_{coord}' for i in range(17) for coord in ['x', 'y']] + ['label']
df = pd.DataFrame(output_data, columns=columns)

output_csv_path = os.path.join(data_root, 'pushup_pose_data.csv')
df.to_csv(output_csv_path, index=False)

print(f"✅ Keypoints from all videos (original + augmented) saved to: {output_csv_path}")


🎥 Augmenting and saving videos...
✅ Augmented videos saved.

📸 Extracting frames and pose keypoints...
✅ Keypoints from all videos (original + augmented) saved to: C:/Users/Kassimi/OneDrive/Bureau/cv_data/pushups\pushup_pose_data.csv


In [23]:
df

Unnamed: 0,video_id,frame,kp_0_x,kp_0_y,kp_1_x,kp_1_y,kp_2_x,kp_2_y,kp_3_x,kp_3_y,...,kp_12_y,kp_13_x,kp_13_y,kp_14_x,kp_14_y,kp_15_x,kp_15_y,kp_16_x,kp_16_y,label
0,Copy of push up 1.mp4,0,470.29395,210.786789,474.699005,205.294525,471.992767,199.126373,0.000000,0.000000,...,158.329926,164.350677,236.564285,167.065247,222.115448,19.154755,285.743134,24.524994,281.398132,1.0
1,Copy of push up 1.mp4,1,470.05493,210.226105,475.933716,204.226776,471.986908,195.307053,0.000000,0.000000,...,148.483047,174.185471,246.092224,174.133942,216.902008,37.519714,308.267334,40.383118,285.514893,1.0
2,Copy of push up 1.mp4,2,470.19278,212.633636,475.442169,206.140823,471.920502,198.019073,0.000000,0.000000,...,152.222107,171.839996,247.033997,172.521240,219.902557,33.059113,308.396637,36.110382,287.430023,1.0
3,Copy of push up 1.mp4,3,470.89636,212.662811,476.404419,206.303314,472.506042,198.065811,0.000000,0.000000,...,153.436523,170.993851,247.762726,170.518173,220.252960,30.347168,306.387878,33.768555,285.480804,1.0
4,Copy of push up 1.mp4,4,471.8687,210.240173,478.258026,203.996674,472.973633,194.745804,0.000000,0.000000,...,152.613342,175.539963,247.787415,173.467041,216.459595,31.727356,309.895935,34.773468,284.468719,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20635,aug_Copy of push up 81.mp4,88,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,195.980637,220.386139,231.456543,225.815903,228.070755,80.811462,200.456619,84.838776,201.392670,
20636,aug_Copy of push up 81.mp4,89,101.722916,99.781471,106.058823,86.640907,0.000000,0.000000,130.515030,79.025879,...,208.271088,421.723450,197.422852,411.472290,223.376602,535.454224,206.514099,528.744141,229.306793,
20637,aug_Copy of push up 81.mp4,90,100.9666,100.084717,105.165054,86.992126,0.000000,0.000000,128.998245,78.785080,...,208.689301,416.601562,195.951431,405.537598,218.852554,535.146179,211.927536,527.344360,230.922729,
20638,aug_Copy of push up 81.mp4,91,103.80043,100.658356,106.407501,87.300125,0.000000,0.000000,128.524185,78.168808,...,208.099899,429.433807,204.062317,418.362305,222.913101,545.697693,217.271606,531.541016,228.716782,


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

# Load CSV
df = pd.read_csv(r"C:/Users/Kassimi/OneDrive/Bureau/cv_data/pushups\pushup_pose_datav0.csv")

# Convert label to numeric
df['label'] = df['label'].map({'correct sequence': 1, 'wrong sequence': 0})

# Parameters
sequence_length = 30  # Choose fixed sequence length
min_frames_required = sequence_length

# Normalize keypoints
keypoint_cols = [col for col in df.columns if col.startswith('kp_')]
scaler = StandardScaler()
df[keypoint_cols] = scaler.fit_transform(df[keypoint_cols])

# Group by video and build sequences
X_sequences = []
y_labels = []
for video_id, group in df.groupby('video_id'):
    group = group.sort_values('frame')
    keypoints = group[keypoint_cols].values  # shape: (num_frames, 34)
    label = group['label'].iloc[0]
    
    # Skip short sequences
    if len(keypoints) < min_frames_required:
        continue
    
    # Break long videos into multiple sequences
    for start in range(0, len(keypoints) - sequence_length + 1, sequence_length):
        seq = keypoints[start:start + sequence_length]
        X_sequences.append(seq)
        y_labels.append(label)

# Convert to tensors
X_tensor = torch.tensor(np.array(X_sequences), dtype=torch.float32)  # shape: (N, seq_len, 34)
y_tensor = torch.tensor(y_labels, dtype=torch.long)

# Train/val split
X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# PyTorch Dataset
class PushupSequenceDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Dataloaders
train_dataset = PushupSequenceDataset(X_train, y_train)
val_dataset = PushupSequenceDataset(X_val, y_val)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [9]:
class PushupLSTM(nn.Module):
    def __init__(self):
        super(PushupLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size=34, hidden_size=64, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(64, 32)  # LSTM output to hidden layer
        self.fc2 = nn.Linear(32, 2)   # Hidden layer to output (2 units for binary classification)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        # x shape: (batch_size, sequence_length, 34)
        lstm_out, _ = self.lstm(x)              # Output shape: (batch_size, sequence_length, hidden_size)
        x = lstm_out[:, -1, :]                  # Take output from the last time step
        x = self.relu(self.fc1(x))              # Pass through FC layer
        x = self.fc2(x)                         # Output layer (2 values for binary classification)
        return x                                # Return raw logits for CrossEntropyLoss

# Initialize the model
model = PushupLSTM()

In [11]:
criterion = nn.CrossEntropyLoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)  # Shape: (batch_size, 2)
        
        # Now calculate the loss - no need for shape adjustment
        loss = criterion(outputs, labels)  # labels should be of shape [batch_size]
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * inputs.size(0)
        
        _, predicted = torch.max(outputs, 1)  # Get the index of the maximum logit for prediction
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    avg_train_loss = train_loss / total
    train_acc = correct / total
    
    # Validation
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    avg_val_loss = val_loss / total
    val_acc = correct / total
    
    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.4f} | "
          f"Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.4f}")

# Save the trained model
torch.save(model.state_dict(), 'pushup_classifier.pth')

# Example of how to use the model for inference
def predict_sequence(model, sequence):
    model.eval()
    with torch.no_grad():
        sequence = sequence.to(device)
        output = model(sequence)
        _, predicted = torch.max(output, 1)
        return predicted.item()

Epoch [1/50] Train Loss: 0.6623, Train Acc: 0.5750 | Val Loss: 0.6382, Val Acc: 0.5833
Epoch [2/50] Train Loss: 0.6186, Train Acc: 0.6250 | Val Loss: 0.5898, Val Acc: 0.7667
Epoch [3/50] Train Loss: 0.5635, Train Acc: 0.7292 | Val Loss: 0.5213, Val Acc: 0.7167
Epoch [4/50] Train Loss: 0.4976, Train Acc: 0.7500 | Val Loss: 0.4647, Val Acc: 0.7167
Epoch [5/50] Train Loss: 0.4478, Train Acc: 0.7625 | Val Loss: 0.4417, Val Acc: 0.7167
Epoch [6/50] Train Loss: 0.4133, Train Acc: 0.7750 | Val Loss: 0.4359, Val Acc: 0.7167
Epoch [7/50] Train Loss: 0.3806, Train Acc: 0.8042 | Val Loss: 0.4172, Val Acc: 0.7333
Epoch [8/50] Train Loss: 0.3526, Train Acc: 0.8208 | Val Loss: 0.4032, Val Acc: 0.7500
Epoch [9/50] Train Loss: 0.3202, Train Acc: 0.8333 | Val Loss: 0.3803, Val Acc: 0.7500
Epoch [10/50] Train Loss: 0.2992, Train Acc: 0.8583 | Val Loss: 0.3573, Val Acc: 0.7667
Epoch [11/50] Train Loss: 0.2691, Train Acc: 0.8875 | Val Loss: 0.3349, Val Acc: 0.7667
Epoch [12/50] Train Loss: 0.2367, Train A

In [13]:
# Save the trained model
model_path = r"C:\Users\Kassimi\OneDrive\Bureau\cv_data\augmented_pushup_lstm_modelv1.pth"
torch.save(model.state_dict(), model_path)
print(f"✅ Model saved to {model_path}")

✅ Model saved to C:\Users\Kassimi\OneDrive\Bureau\cv_data\augmented_pushup_lstm_modelv1.pth


In [3]:
import torch
import cv2
import pandas as pd
import numpy as np
from ultralytics import YOLO
from sklearn.preprocessing import StandardScaler
from collections import deque

# Define LSTM model class (needs to match what you used in training)
class PushupLSTM(torch.nn.Module):
    def __init__(self):
        super(PushupLSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size=34, hidden_size=64, num_layers=1, batch_first=True)
        self.fc1 = torch.nn.Linear(64, 32)
        self.fc2 = torch.nn.Linear(32, 2)  # 2 outputs for binary classification
        self.relu = torch.nn.ReLU()
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        x = lstm_out[:, -1, :]  # Take output from the last time step
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# LSTM sequence length (must match training)
sequence_length = 30

# Load trained model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_path = r'C:\Users\Kassimi\OneDrive\Bureau\cv_data\augmented_pushup_lstm_modelv1.pth'
model = PushupLSTM()
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

# Load scaler (refit on original dataset)
try:
    df = pd.read_csv(r"C:\Users\Kassimi\OneDrive\Bureau\cv_data\pushups\pushup_pose_datav0.csv")
    # Only use keypoint columns that contain numerical data
    keypoint_cols = [col for col in df.columns if col.startswith('kp_')]
    
    # Make sure we only use numeric data for scaling
    # First check if the data is clean
    for col in keypoint_cols:
        if df[col].dtype == 'object':
            df[col] = pd.to_numeric(df[col], errors='coerce')
    
    # Drop rows with NaN values that might have been created by coercion
    df = df.dropna(subset=keypoint_cols)
    
    # Now fit the scaler
    scaler = StandardScaler()
    scaler.fit(df[keypoint_cols])
    print(f"Successfully fit scaler on {len(df)} rows with {len(keypoint_cols)} keypoint features")
    
except Exception as e:
    print(f"Error fitting scaler: {e}")
    print("Will continue with default scaling instead")
    # If loading fails, we'll use a simple normalization as fallback
    scaler = StandardScaler()

# Load YOLO pose model
try:
    yolo_model = YOLO('yolov8n-pose.pt')
    print("YOLO pose model loaded successfully")
except Exception as e:
    print(f"Error loading YOLO model: {e}")
    exit(1)

# Video path
video_path = r"D:\Kassimi\Pictures\Camera Roll\WIN_20250505_09_38_56_Pro.mp4"
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print(f"Error: Could not open video {video_path}")
    exit(1)

print(f"Video loaded successfully. Starting prediction...")

# Buffer to store sequence of keypoints
sequence_buffer = deque(maxlen=sequence_length)

# For video output
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
out = cv2.VideoWriter('pushup_prediction3.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

frame_count = 0
while True:
    ret, frame = cap.read()
    if not ret:
        break
        
    frame_count += 1
    if frame_count % 10 == 0:
        print(f"Processing frame {frame_count}")
    
    # Get YOLO pose estimation
    results = yolo_model.predict(frame, save=False, verbose=False)
    
    # Draw the pose on the frame (optional)
    annotated_frame = results[0].plot()
    
    # Check if any person was detected
    keypoints = results[0].keypoints.xy.cpu().numpy()
    
    if len(keypoints) > 0:  # If at least one person is detected
        # Take the first person's keypoints
        person_keypoints = keypoints[0].flatten()
        
        # Make sure we have the right number of keypoints
        if len(person_keypoints) == 34:  # 17 keypoints * 2 (x,y)
            try:
                # Scale keypoints using our scaler
                if hasattr(scaler, 'mean_'):  # Check if scaler was properly fitted
                    person_keypoints_scaled = scaler.transform([person_keypoints])[0]
                else:
                    # Simple normalization as fallback
                    person_keypoints_scaled = (person_keypoints - np.mean(person_keypoints)) / (np.std(person_keypoints) + 1e-8)
                
                # Add to sequence buffer
                sequence_buffer.append(person_keypoints_scaled)
                
                # Only predict when we have a full sequence
                if len(sequence_buffer) == sequence_length:
                    input_seq = torch.tensor([list(sequence_buffer)], dtype=torch.float32).to(device)
                    
                    with torch.no_grad():
                        output = model(input_seq)
                        probabilities = torch.softmax(output, dim=1)
                        confidence, predicted_class = torch.max(probabilities, 1)
                    
                    # Get the prediction and confidence
                    label = 'Correct Form' if predicted_class.item() == 1 else 'Incorrect Form'
                    conf_value = confidence.item()
                    
                    # Set color based on prediction (green for correct, red for incorrect)
                    color = (0, 255, 0) if label == 'Correct Form' else (0, 0, 255)
                    
                    # Display prediction on frame
                    cv2.putText(annotated_frame, f"{label} ({conf_value:.2f})", 
                                (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, color, 2)
            
            except Exception as e:
                print(f"Error in prediction: {e}")
    
    # Display frame with predictions
    cv2.imshow('Pushup Form Prediction', annotated_frame)
    out.write(annotated_frame)
    
    # Break loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()
print("Prediction completed. Video saved as 'pushup_prediction2.mp4'")

Successfully fit scaler on 10320 rows with 34 keypoint features
YOLO pose model loaded successfully
Video loaded successfully. Starting prediction...




Processing frame 10




Processing frame 20




Processing frame 30




Processing frame 40




Processing frame 50




Processing frame 60




Processing frame 70




Processing frame 80




Processing frame 90




Processing frame 100




Processing frame 110




Processing frame 120




Processing frame 130




Processing frame 140
Prediction completed. Video saved as 'pushup_prediction2.mp4'




In [1]:
import torch
import cv2
import numpy as np
from ultralytics import YOLO
from sklearn.preprocessing import StandardScaler
from collections import deque
import time

# Define LSTM model class (same as your original)
class PushupLSTM(torch.nn.Module):
    def __init__(self):
        super(PushupLSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size=34, hidden_size=64, num_layers=1, batch_first=True)
        self.fc1 = torch.nn.Linear(64, 32)
        self.fc2 = torch.nn.Linear(32, 2)  # 2 outputs for binary classification
        self.relu = torch.nn.ReLU()
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        x = lstm_out[:, -1, :]  # Take output from the last time step
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# LSTM sequence length (must match training)
sequence_length = 30

# Load trained model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Update the path to your model
model_path = r'C:\Users\Kassimi\OneDrive\Bureau\cv_data\augmented_pushup_lstm_modelv1.pth'
model = PushupLSTM()
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()
print("LSTM model loaded successfully")

# Create a simple scaler as fallback since we might not have access to the original dataset
scaler = StandardScaler()
print("Using default scaler (make sure your keypoint values are properly scaled)")

# Load YOLO pose model
try:
    yolo_model = YOLO('yolov8n-pose.pt')
    print("YOLO pose model loaded successfully")
except Exception as e:
    print(f"Error loading YOLO model: {e}")
    exit(1)

# Initialize webcam
cap = cv2.VideoCapture(0)  # 0 is usually the default webcam
if not cap.isOpened():
    print("Error: Could not open webcam")
    exit(1)

# Set webcam resolution (optional)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

print("Webcam initialized. Starting live prediction...")
print("Press 'q' to quit")

# Buffer to store sequence of keypoints
sequence_buffer = deque(maxlen=sequence_length)

# Variable to track prediction status
prediction_ready = False
last_prediction = "Waiting for enough frames..."
confidence = 0.0
color = (255, 255, 0)  # Yellow for waiting

# For FPS calculation
prev_time = time.time()
fps = 0

# Flag to toggle recording
is_recording = False
out = None

# Counter for pushups
pushup_count = 0
last_state = None
pushup_threshold = 0.7  # Confidence threshold for counting

frame_count = 0
while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame from webcam")
        break
    
    # Calculate FPS
    current_time = time.time()
    fps = 1 / (current_time - prev_time)
    prev_time = current_time
    
    # Process every frame
    frame_count += 1
    
    # Get YOLO pose estimation
    results = yolo_model.predict(frame, save=False, verbose=False)
    
    # Draw the pose on the frame
    annotated_frame = results[0].plot()
    
    # Check if any person was detected
    keypoints = results[0].keypoints.xy.cpu().numpy()
    
    if len(keypoints) > 0:  # If at least one person is detected
        # Take the first person's keypoints
        person_keypoints = keypoints[0].flatten()
        
        # Make sure we have the right number of keypoints
        if len(person_keypoints) == 34:  # 17 keypoints * 2 (x,y)
            try:
                # Simple normalization as we don't have the original scaler
                person_keypoints_scaled = (person_keypoints - np.mean(person_keypoints)) / (np.std(person_keypoints) + 1e-8)
                
                # Add to sequence buffer
                sequence_buffer.append(person_keypoints_scaled)
                
                # Only predict when we have a full sequence
                if len(sequence_buffer) == sequence_length:
                    input_seq = torch.tensor([list(sequence_buffer)], dtype=torch.float32).to(device)
                    
                    with torch.no_grad():
                        output = model(input_seq)
                        probabilities = torch.softmax(output, dim=1)
                        confidence_tensor, predicted_class = torch.max(probabilities, 1)
                    
                    # Get the prediction and confidence
                    prediction_ready = True
                    last_prediction = 'Correct Form' if predicted_class.item() == 1 else 'Incorrect Form'
                    confidence = confidence_tensor.item()
                    
                    # Set color based on prediction
                    color = (0, 255, 0) if last_prediction == 'Correct Form' else (0, 0, 255)
                    
                    # Simple pushup counter logic
                    current_state = 'up' if confidence > pushup_threshold and last_prediction == 'Correct Form' else 'down'
                    if last_state == 'down' and current_state == 'up':
                        pushup_count += 1
                    last_state = current_state
                    
            except Exception as e:
                print(f"Error in prediction: {e}")
    
    # Display prediction on frame
    if prediction_ready:
        cv2.putText(annotated_frame, f"{last_prediction} ({confidence:.2f})", 
                    (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, color, 2)
    else:
        cv2.putText(annotated_frame, last_prediction, 
                    (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, color, 2)
    
    # Display pushup counter
    cv2.putText(annotated_frame, f"Pushup Count: {pushup_count}", 
                (30, 90), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 165, 0), 2)
    
    # Display FPS
    cv2.putText(annotated_frame, f"FPS: {fps:.1f}", 
                (frame.shape[1] - 150, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
    
    # Display recording status if recording
    if is_recording:
        cv2.putText(annotated_frame, "REC", 
                    (frame.shape[1] - 70, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        # Record frame if we're recording
        out.write(annotated_frame)
    
    # Display frame with predictions
    cv2.imshow('Pushup Form Detection (Webcam)', annotated_frame)
    
    # Check for key presses
    key = cv2.waitKey(1) & 0xFF
    
    # Quit if 'q' is pressed
    if key == ord('q'):
        break
    
    # Toggle recording if 'r' is pressed
    elif key == ord('r'):
        is_recording = not is_recording
        if is_recording:
            # Initialize video writer
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            timestamp = time.strftime("%Y%m%d-%H%M%S")
            out = cv2.VideoWriter(f'pushup_webcam_{timestamp}.mp4', 
                                 fourcc, 20.0, 
                                 (frame.shape[1], frame.shape[0]))
            print("Recording started")
        else:
            if out is not None:
                out.release()
                print("Recording stopped and saved")
    
    # Reset pushup counter if 'c' is pressed
    elif key == ord('c'):
        pushup_count = 0
        print("Pushup counter reset")

# Release resources
cap.release()
if out is not None:
    out.release()
cv2.destroyAllWindows()
print("Application closed")

Using device: cpu
LSTM model loaded successfully
Using default scaler (make sure your keypoint values are properly scaled)
YOLO pose model loaded successfully
Webcam initialized. Starting live prediction...
Press 'q' to quit


  input_seq = torch.tensor([list(sequence_buffer)], dtype=torch.float32).to(device)


Application closed
