In [3]:
import requests
import os
import pandas as pd

gym_dataset = pd.read_excel("Gymcam/Gym Exercises Dataset.xlsx")

if not os.path.exists("gym_images"):
    os.makedirs("gym_images")

def download_images(urls, prefix):
    for idx, url in enumerate(urls):      
        if not isinstance(url, str) or not url.startswith("http"):
            continue
        response = requests.get(url)
        with open(f"gym_images/{prefix}_{idx}.jpg", "wb") as f:
            f.write(response.content)

download_images(gym_dataset['Exercise_Image'].tolist(), "img")
download_images(gym_dataset['Exercise_Image1'].tolist(), "img1")



In [8]:
import torch
from pathlib import Path
import cv2
import os

def detect_human(image_path):
    # Load image
    img = cv2.imread(image_path)

    # Use YOLO for object detection
    results = model(img)

    # Save image with bounding boxes
    results.render()
    results.save(Path('./output'))

# Load YOLO
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # Load the YOLOv5 small model

# Loop through all images and detect humans
for image_path in os.listdir("gym_images"):
    detect_human(os.path.join("gym_images", image_path))


Using cache found in C:\Users\Sujit/.cache\torch\hub\ultralytics_yolov5_master
[31m[1mrequirements:[0m Ultralytics requirement ['gitpython>=3.1.30'] not found, attempting AutoUpdate...
Collecting gitpython>=3.1.30
  Obtaining dependency information for gitpython>=3.1.30 from https://files.pythonhosted.org/packages/8a/7e/20f7e45878b5aed34320fbeeae8f78acc806e7bd708d00b1c6e64b016f5b/GitPython-3.1.37-py3-none-any.whl.metadata
  Downloading GitPython-3.1.37-py3-none-any.whl.metadata (12 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython>=3.1.30)
  Downloading gitdb-4.0.10-py3-none-any.whl (62 kB)
     ---------------------------------------- 62.7/62.7 kB 1.1 MB/s eta 0:00:00
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython>=3.1.30)
  Obtaining dependency information for smmap<6,>=3.0.1 from https://files.pythonhosted.org/packages/a7/a5/10f97f73544edcdef54409f1d839f6049a0d79df68adbc1ceb24d1aaca42/smmap-5.0.1-py3-none-any.whl.metadata
  Downloading smmap-5.0.1-py3-none-any.whl.metad

In [9]:
import torch
import torchvision.transforms as transforms
from torchvision.models import resnet50
from PIL import Image

# Load the pre-trained ResNet50 model without its top classification layer
resnet = resnet50(pretrained=True)
resnet = torch.nn.Sequential(*(list(resnet.children())[:-1]))  # Remove the last classification layer
resnet.eval()  # Set the model to evaluation mode

# Define a transform to preprocess the image for ResNet50
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def extract_features_from_bbox(img, bbox):
    """
    Extract features from a given bounding box in the image.
    
    Args:
    - img (PIL.Image): The input image.
    - bbox (tuple): A tuple (x1, y1, x2, y2) representing the bounding box coordinates.
    
    Returns:
    - torch.Tensor: The extracted feature vector.
    """
    x1, y1, x2, y2 = bbox
    cropped_region = img.crop((x1, y1, x2, y2))
    input_tensor = transform(cropped_region).unsqueeze(0)
    with torch.no_grad():
        features = resnet(input_tensor)
    return features.squeeze()

# Updated human detection function to extract features
def detect_and_extract_features(image_path):
    img = cv2.imread(image_path)
    img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))  # Convert OpenCV image to PIL Image

    # Use YOLO for object detection
    results = model(img)

    # Get bounding boxes of detected humans
    # The format for each bounding box is (x1, y1, x2, y2)
    bboxes = results.xyxy[0].numpy()
    
    # Extract features for each detected human
    all_features = []
    for bbox in bboxes:
        x1, y1, x2, y2, conf, class_id = bbox
        # Check if the detected class is a person (class_id=0 for YOLOv5 COCO)
        if int(class_id) == 0:
            features = extract_features_from_bbox(img_pil, (x1, y1, x2, y2))
            all_features.append(features)
    
    return all_features

# Load YOLO
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # Load the YOLOv5 small model

# Loop through all images, detect humans, and extract features
all_image_features = {}
for image_path in os.listdir("gym_images"):
    full_path = os.path.join("gym_images", image_path)
    features = detect_and_extract_features(full_path)
    all_image_features[image_path] = features


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\Sujit/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100.0%
Using cache found in C:\Users\Sujit/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-10-14 Python-3.10.0 torch-2.1.0+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

# 2.1 Data Preparation
features = []
labels = []

for filename, feature_list in all_image_features.items():
    exercise_label = filename.split('_')[0]
    for feature in feature_list:
        features.append(feature.numpy())
        labels.append(exercise_label)

# Convert labels to integer values
unique_labels = list(set(labels))
label_to_int = {label: i for i, label in enumerate(unique_labels)}
int_labels = [label_to_int[label] for label in labels]

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features, int_labels, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train)
X_val = torch.tensor(X_val)
y_val = torch.tensor(y_val)

# 2.2 Model Definition
class ExerciseClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(ExerciseClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.fc2 = nn.Linear(1024, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(512, 256)
        self.bn3 = nn.BatchNorm1d(256)
        self.fc4 = nn.Linear(256, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = self.relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        x = self.softmax(self.fc4(x))
        return x

# Initialize the classifier
model = ExerciseClassifier(2048, len(unique_labels))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 2.3 Training the Classifier
num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = criterion(val_outputs, y_val)
        _, predicted = torch.max(val_outputs, 1)
        accuracy = (predicted == y_val).sum().item() / len(y_val)

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Val Acc: {accuracy:.4f}")


Epoch 1/30, Loss: 0.7915, Val Loss: 0.6948, Val Acc: 0.4421
Epoch 2/30, Loss: 0.7211, Val Loss: 0.6951, Val Acc: 0.4421
Epoch 3/30, Loss: 0.6514, Val Loss: 0.6942, Val Acc: 0.4211
Epoch 4/30, Loss: 0.6549, Val Loss: 0.6929, Val Acc: 0.4737
Epoch 5/30, Loss: 0.6158, Val Loss: 0.6930, Val Acc: 0.4842
Epoch 6/30, Loss: 0.5606, Val Loss: 0.6924, Val Acc: 0.4947
Epoch 7/30, Loss: 0.5368, Val Loss: 0.6919, Val Acc: 0.4842
Epoch 8/30, Loss: 0.5137, Val Loss: 0.6938, Val Acc: 0.5053
Epoch 9/30, Loss: 0.4980, Val Loss: 0.7018, Val Acc: 0.4632
Epoch 10/30, Loss: 0.4358, Val Loss: 0.7194, Val Acc: 0.5053
Epoch 11/30, Loss: 0.4137, Val Loss: 0.7440, Val Acc: 0.5263
Epoch 12/30, Loss: 0.3802, Val Loss: 0.7680, Val Acc: 0.5053
Epoch 13/30, Loss: 0.3508, Val Loss: 0.7864, Val Acc: 0.5368
Epoch 14/30, Loss: 0.3158, Val Loss: 0.7927, Val Acc: 0.5053
Epoch 15/30, Loss: 0.2768, Val Loss: 0.8036, Val Acc: 0.4737
Epoch 16/30, Loss: 0.2458, Val Loss: 0.8472, Val Acc: 0.4737
Epoch 17/30, Loss: 0.2344, Val Lo

In [14]:
# 2.4 Evaluation and Prediction
# After training, you can use the trained model to predict exercises on new data.
def predict_exercise(features):
    model.eval()
    with torch.no_grad():
        outputs = model(torch.tensor(features))
        _, predicted = torch.max(outputs, 1)
        predicted_labels = [unique_labels[pred] for pred in predicted]
    return predicted_labels

In [20]:
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split
from torchvision import transforms

# 1. Fine-tuning ResNet50
# Load the pre-trained ResNet50 model
resnet = resnet50(pretrained=True)
for param in resnet.parameters():
    param.requires_grad = True  # Make all layers trainable

# Modify the last layer to match the number of classes in our dataset
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, len(unique_labels))

# 2. Data Augmentation
aug_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def get_augmented_features_labels(image_path, label):
    img_pil = Image.open(image_path)
    augmented_img = aug_transform(img_pil)
    features = resnet(augmented_img.unsqueeze(0)).detach().squeeze().numpy()
    return features, label

augmented_features = []
augmented_labels = []

# Generate augmented features
for image_path, label in zip(os.listdir("gym_images"), labels):
    full_path = os.path.join("gym_images", image_path)
    for _ in range(5):  # Augment each image 5 times
        features, int_label = get_augmented_features_labels(full_path, label_to_int[label])
        augmented_features.append(features)
        augmented_labels.append(int_label)

# 3. Increasing Model Complexity
# Define a more complex classifier
class ComplexExerciseClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(ComplexExerciseClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.softmax(self.fc4(x))
        return x

# Combine the original and augmented data
combined_features = features + augmented_features
combined_labels = int_labels + augmented_labels

# Split combined data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(combined_features, combined_labels, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train)
X_val = torch.tensor(X_val)
y_val = torch.tensor(y_val)

# Initialize the complex classifier
complex_model = ComplexExerciseClassifier(2048, len(unique_labels))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(complex_model.parameters(), lr=0.001)

# Training the Complex Classifier
num_epochs = 20
for epoch in range(num_epochs):
    complex_model.train()
    optimizer.zero_grad()
    outputs = complex_model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    complex_model.eval()
    with torch.no_grad():
        val_outputs = complex_model(X_val)
        val_loss = criterion(val_outputs, y_val)
        _, predicted = torch.max(val_outputs, 1)
        accuracy = (predicted == y_val).sum().item() / len(y_val)

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Val Acc: {accuracy:.4f}")



ValueError: Found input variables with inconsistent numbers of samples: [2355, 2826]

In [15]:
def recognize_exercise_in_image(image_path, detection_model, classifier_model):
    """
    Recognize the exercise being performed in an image.

    Args:
    - image_path (str): Path to the image.
    - detection_model (torch model): YOLOv5 model for human detection.
    - classifier_model (torch model): Trained model for exercise recognition.

    Returns:
    - List[str]: List of predicted exercises for each detected human.
    """

    # 1. Detect humans in the image
    img = cv2.imread(image_path)
    img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))  # Convert OpenCV image to PIL Image
    results = detection_model(img)
    bboxes = results.xyxy[0].numpy()

    # 2. Extract features from the detected human regions
    extracted_features = []
    for bbox in bboxes:
        x1, y1, x2, y2, conf, class_id = bbox
        # Check if the detected class is a person (class_id=0 for YOLOv5 COCO)
        if int(class_id) == 0:
            features = extract_features_from_bbox(img_pil, (x1, y1, x2, y2))
            extracted_features.append(features)

    # 3. Use the trained classifier to predict the exercise type
    predicted_exercises = predict_exercise(extracted_features)
    
    return predicted_exercises

# Test the function on a new image
image_path = "path_to_new_image.jpg"
predicted_exercises = recognize_exercise_in_image(image_path, model, model)
print(predicted_exercises)


FileNotFoundError: [Errno 2] No such file or directory: 'path_to_new_image.jpg'