## Imports

In [2]:
import mediapipe as mp
import os
import glob
import numpy as np
from mediapipe.framework.formats import landmark_pb2
import cv2
import matplotlib.pyplot as plt
from mediapipe import solutions
import torch
import torch.optim as optim
from torchvision import datasets, models, transforms
from torchvision.datasets import ImageFolder
from torchvision import datasets, transforms
from PIL import Image

## Pre-processing

#### Video indexes for the correct and incorrect ones 

In [2]:
video_index_corr = [1, 2, 3, 4, 5, 6, 7, 47, 78, 79, 80, 83, 85, 100, 101, 102, 113, 114, 115, 116, 127, 129, 131, 132, 133
                , 134, 135, 136, 137, 138, 140, 141, 142, 144, 146, 147, 148, 162, 163, 164, 165, 173, 174, 175, 177, 178, 186, 187, 188, 197]

video_index_incorr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 36, 37, 42, 44,
                46, 55,56,57,58,81,104,105, 107, 108, 110, 149, 150, 151, 152, 153, 155, 156, 166, 167, 168, 171, 198]


#### Create dataset with landamarks

In [32]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

# Define the pose landmarking model
model_path = 'pose_landmarker_heavy.task'

## These paths are not correct right now
# TODO - Automatize this

# Path to the folder containing the images
image_folder = 'test_dataset/correct_seq/'#'images/wrong_seq/'

# Define the output folder to save the processed images
output_folder = 'test_dataset_with_landmarks/correct_seq'
os.makedirs(output_folder, exist_ok=True)

BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

options = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.IMAGE)

with PoseLandmarker.create_from_options(options) as landmarker:
    # Iterate over the images in the folder
    for filename in os.listdir(image_folder):
        # Read the image using OpenCV
        image_path = os.path.join(image_folder, filename)
        image = cv2.imread(image_path)

        # Perform pose landmarking on the image
        image = mp.Image.create_from_file(image_path)
        pose_landmarker_result = landmarker.detect(image)

        pose_landmarks_list = pose_landmarker_result.pose_landmarks

        # Check if pose landmarks are detected
        if pose_landmarks_list is not None:
            annotated_image = np.copy(image.numpy_view())
            # Loop through the detected poses to visualize.
            for idx in range(len(pose_landmarks_list)):
                pose_landmarks = pose_landmarks_list[idx]

                # Draw the pose landmarks.
                pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
                pose_landmarks_proto.landmark.extend([
                    landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
                ])

                solutions.drawing_utils.draw_landmarks(
                    annotated_image,
                    pose_landmarks_proto,
                    solutions.pose.POSE_CONNECTIONS,
                    solutions.drawing_styles.get_default_pose_landmarks_style())

            # Save the processed image to the output folder
            output_path = os.path.join(output_folder, filename)
            plt.imsave(output_path, annotated_image)
        else:
            print(f'No pose landmarks detected in {filename}.')

print('Image processing completed.')

Image processing completed.


#### Mean and Std deviation

In [14]:
def get_mean_std(train_dir):
    # Create the training dataset
    train_data = datasets.ImageFolder(train_dir, transform=transforms.ToTensor())

    # Iterate over the dataset and collect pixel values
    pixel_values = []
    for image, _ in train_data:
        image = np.array(image)  # Convert the image to a NumPy array
        pixel_values.append(image.flatten())

    # Calculate mean and standard deviation
    pixel_values = np.concatenate(pixel_values, axis=0)
    mean = np.mean(pixel_values, axis=0)
    std = np.std(pixel_values, axis=0)

    print(train_dir)
    print("Mean:", mean)
    print("Standard Deviation:", std)

train_dir_up = "images_with_landmarks/train/up"  # Replace with the path to your training data
train_dir_down = "images_with_landmarks/train/down"

get_mean_std(train_dir_up)
get_mean_std(train_dir_down)



images_with_landmarks/train/up
Mean: 0.49097234
Standard Deviation: 0.29159072
images_with_landmarks/train/down
Mean: 0.49058145
Standard Deviation: 0.3018868


#### Data augmentation and normalization

In [7]:
train_transforms_up = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.49097234, 0.49097234, 0.49097234], std=[0.29159072, 0.29159072, 0.29159072])
])

train_transforms_down = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.49058145, 0.49058145, 0.49058145], std=[0.3018868, 0.3018868, 0.3018868])
])

#### Save or load model

In [12]:
# Save the state of the model
def save_model(model, name):
    torch.save(model.state_dict(), name)

#model.load_state_dict(torch.load('models/resnet18_aug.pth'))

#### Training Model - Up

In [24]:
# Load the pre-trained model (e.g., ResNet)
model_up = models.resnet18(pretrained=True)

# Freeze all the layers in the model
for param in model_up.parameters():
    param.requires_grad = False

# Replace the final layer of the model to match the number of classes in the new dataset
num_ftrs = model_up.fc.in_features
model_up.fc = torch.nn.Linear(num_ftrs, 2)  # num_classes should be the number of classes in your new dataset

# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model_up.fc.parameters(), lr=0.001, momentum=0.9)

train_dir = "images_with_landmarks/train/up"  # replace train_dir with the path to your data

train_data = datasets.ImageFolder(train_dir, transform=train_transforms_up)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)  # reduce batch_size


# Training the model
num_epochs = 15
train_loss_history = []
for epoch in range(num_epochs):  # num_epochs should be the number of epochs you want to train for
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model_up(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
print('Epoch [%d/%d], Loss: %.4f' % (epoch+1, num_epochs, running_loss/len(train_loader)))

Epoch [15/15], Loss: 0.4935


#### Training Model - Down

In [25]:
# Load the pre-trained model (e.g., ResNet)
model_down = models.resnet18(pretrained=True)

# Freeze all the layers in the model
for param in model_down.parameters():
    param.requires_grad = False

# Replace the final layer of the model to match the number of classes in the new dataset
num_ftrs = model_down.fc.in_features
model_down.fc = torch.nn.Linear(num_ftrs, 2)  # num_classes should be the number of classes in your new dataset

# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model_down.fc.parameters(), lr=0.001, momentum=0.9)

train_dir = "images_with_landmarks/train/down"  # replace train_dir with the path to your data

train_data = datasets.ImageFolder(train_dir, transform=train_transforms_up)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)  # reduce batch_size


# Training the model
num_epochs = 15
train_loss_history = []
for epoch in range(num_epochs):  # num_epochs should be the number of epochs you want to train for
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model_down(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
print('Epoch [%d/%d], Loss: %.4f' % (epoch+1, num_epochs, running_loss/len(train_loader)))

Epoch [15/15], Loss: 0.5800


In [13]:
# Save model
save_model(model_up, 'models/resnet18_aug_up.pth')
save_model(model_down, 'models/resnet18_aug_down.pth')

#### Test model - Up

In [26]:
from torchvision import transforms

# Define the transform
transform_test_up = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize all images to the same size
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.49097234, 0.49097234, 0.49097234], std=[0.29159072, 0.29159072, 0.29159072])
])

# Load the testing data
test_dir = "images_with_landmarks/train/up"  # replace test_dir with the path to your testing data
test_data = datasets.ImageFolder(test_dir, transform=transform_test_up)  # apply the defined transform

test_loader = torch.utils.data.DataLoader(test_data, batch_size=64)

# Switch the model to evaluation mode
model_up.eval()

# Create a list to store predictions
predictions = []

# Test the model
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:

        outputs = model_up(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Append the predictions to the list
        predictions.extend(predicted.tolist())

# Calculate and print the accuracy
accuracy = float(correct / total)
print('Test Accuracy: %.2f %%' % (100 * accuracy))

# Print the class predictions
print('Class predictions:', predictions)

# Create a reverse mapping from index to class name
idx_to_class = {v: k for k, v in test_data.class_to_idx.items()}

# Transform the predictions list
class_predictions = [idx_to_class.get(idx, 'Unknown') for idx in predictions]

# Print the class predictions
print('Class predictions:', class_predictions)
# Print the ground-truth labels for each image testing data
print('Ground-truth labels:', [idx_to_class.get(idx) for idx in test_data.targets])


Test Accuracy: 84.04 %
Class predictions: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1]
Class predictions: ['correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_s

#### Test model - Down

In [27]:
from torchvision import transforms

# Define the transform
transform_test_up = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize all images to the same size
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.49058145, 0.49058145, 0.49058145], std=[0.3018868, 0.3018868, 0.3018868])
])

# Load the testing data
test_dir = "images_with_landmarks/train/down"  # replace test_dir with the path to your testing data
test_data = datasets.ImageFolder(test_dir, transform=transform_test_up)  # apply the defined transform

test_loader = torch.utils.data.DataLoader(test_data, batch_size=64)

# Switch the model to evaluation mode
model_up.eval()

# Create a list to store predictions
predictions = []

# Test the model
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:

        outputs = model_up(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Append the predictions to the list
        predictions.extend(predicted.tolist())

# Calculate and print the accuracy
accuracy = float(correct / total)
print('Test Accuracy: %.2f %%' % (100 * accuracy))

# Print the class predictions
print('Class predictions:', predictions)

# Create a reverse mapping from index to class name
idx_to_class = {v: k for k, v in test_data.class_to_idx.items()}

# Transform the predictions list
class_predictions = [idx_to_class.get(idx, 'Unknown') for idx in predictions]

# Print the class predictions
print('Class predictions:', class_predictions)
# Print the ground-truth labels for each image testing data
print('Ground-truth labels:', [idx_to_class.get(idx) for idx in test_data.targets])


Test Accuracy: 67.07 %
Class predictions: [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1]
Class predictions: ['wrong_seq', 'wrong_seq', 'wrong_seq', 'wrong_seq', 'wrong_seq', 'wrong_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'wrong_seq', 'wrong_seq', 'wrong_seq', 'correct_seq', 'wrong_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'correct_seq', 'corre