In [1]:
import scipy.io as sio
from PIL import Image
import cv2
import numpy as np

In [3]:
IMAGE_PATH = "data\\mpii\\images\\"
ANNOTATION_PATH = "data\\mpii\\raw_annotations\\mpii_human_pose_v1_u12_1.mat"

test = sio.loadmat(ANNOTATION_PATH)

#### Collect all valid image ids (parse through dataset)

In [None]:
count = 0
weird_count = 0

validated_images = {}

for i, image in enumerate(test["RELEASE"]["annolist"][0][0][0]):
    try:
        img_name = image[0][0][0][0][0]
        length = len(image[1][0][0])

        if length == 7:
            person_annotation_list = []
            for person_detected in image[1][0]:
                person_annotation_list.append(person_detected)
            
            validated_images[img_name] = person_annotation_list
            count += 1

    except:
        print(f"number {i} failed")


print(count / 24987)

#### Show individual image with annotations

In [None]:
im = cv2.imread(IMAGE_PATH + "096244729.jpg")

for person in validated_images["096244729.jpg"]:
    
    print(person[4][0][0][0])
    for a in person[4][0][0][0][0]:
        
        x = int(a[1][0])
        y = int(a[2][0])


        im = cv2.circle(im, (x,y), 5, (255, 0, 0), 1)
        im = cv2.putText(im, f"{a[0][0]}", (x+5,y+5), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    

cv2.imshow('img', im)
cv2.waitKey(0)


#### Show multiple images with annotations

In [None]:

for name, annotations in validated_images.items():

    im = cv2.imread(IMAGE_PATH + name)
    # print(len(annotations))
    # print(name)

    for person in annotations:

        #Dataset has a random change in format for some samples. The off samples have the is_visible field (a[-1]) as an int.
        #The correct ones have it as a string.  This detects which one it is and stores it in flag

        for a in person[4][0][0][0][0]:
            try:
                flag = type(a[-1][0][0])
                break
            except:
                continue
    
        for a in person[4][0][0][0][0]:

            if flag == np.uint8:
                x = int(a[0][0])
                y = int(a[1][0])
                joint_id = int(a[2][0])
            else:
                x = int(a[1][0])
                y = int(a[2][0])
                joint_id = int(a[0][0])

            im = cv2.circle(im, (x,y), 5, (255, 0, 0), 1)
            im = cv2.putText(im, f"{joint_id}", (x+5,y+5), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    

    cv2.imshow('img', im)
    cv2.waitKey(0)

#### Split data to train and test and convert to format for reading by PyTorch dataset

In [12]:
# Convert validate images dict to a format easier to wrap in PyTorch DataSet
new_validated_images = { i: (j,k) for i, (j, k) in enumerate(validated_images.items())}

#Shuffle keys and split into train test sets
keys = list(new_validated_images.keys())
shuffled_keys = np.random.permutation(keys)
split_index = int(.8* len(shuffled_keys))
train_indices = shuffled_keys[:split_index]
test_indices = shuffled_keys[split_index:]

train_annotations = {}
test_annotations = {}

for i in range(len(shuffled_keys)):
    if i < split_index:
        train_annotations[i] = new_validated_images[shuffled_keys[i]]
    elif i >= split_index:
        test_annotations[i - split_index] = new_validated_images[shuffled_keys[i]]


In [17]:
import pickle

with open("data\\mpii\\validated_annotations\\train_annotations.pkl", "wb") as f:
    pickle.dump(train_annotations, f)

with open("data\\mpii\\validated_annotations\\test_annotations.pkl", "wb") as f:
    pickle.dump(test_annotations, f)