In [47]:
import numpy as np
import torch
from torch.utils import data
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from matplotlib import pyplot as plt
from matplotlib import image
from matplotlib import patches
import zipfile
from pycocotools.coco import COCO
import pandas as pd
import cv2
import random as rn
from shutil import copyfile
%matplotlib inline

In [48]:
# Load annotations for training data
train_annot_path = 'data/annotations/person_keypoints_train2017.json'
val_annot_path = 'data/annotations/person_keypoints_val2017.json'
train_coco = COCO(train_annot_path) # load annotations for training set
val_coco = COCO(val_annot_path) # load annotations for validation set

loading annotations into memory...
Done (t=14.82s)
creating index...
index created!
loading annotations into memory...
Done (t=0.27s)
creating index...
index created!


In [70]:
# Load meta data on images: https://towardsdatascience.com/how-to-analyze-the-coco-dataset-for-pose-estimation-7296e2ffb12e
def get_meta(coco):
    ids = list(coco.imgs.keys())
    for i, img_id in enumerate(ids):
        img_meta = coco.imgs[img_id]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        # basic parameters of an image
        img_file_name = img_meta['file_name']
        w = img_meta['width']
        h = img_meta['height']
        # retrieve metadata for all persons in the current image
        anns = coco.loadAnns(ann_ids)

        yield [img_id, img_file_name, w, h, anns]
        
def convert_to_df(coco):
    images_data = []
    persons_data = []
    # iterate over all images
    for img_id, img_fname, w, h, meta in get_meta(coco):
        images_data.append({
            'image_id': int(img_id),
            'path': img_fname,
            'width': int(w),
            'height': int(h)
        })
        # iterate over all metadata
        for m in meta:
            persons_data.append({
                'image_id': m['image_id'],
                'is_crowd': m['iscrowd'],
                'bbox': m['bbox'],
                'area': m['area'],
                'num_keypoints': m['num_keypoints'],
                'keypoints': m['keypoints'],
            })
    # create dataframe with image paths
    images_df = pd.DataFrame(images_data)
    images_df.set_index('image_id', inplace=True)
    # create dataframe with persons
    persons_df = pd.DataFrame(persons_data)
    persons_df.set_index('image_id', inplace=True)
    return images_df, persons_df

# Create train and validation dfs
images_df, persons_df = convert_to_df(train_coco)
train_coco_df = pd.merge(images_df, persons_df, right_index=True, left_index=True)

images_df, persons_df = convert_to_df(val_coco)
val_coco_df = pd.merge(images_df, persons_df, right_index=True, left_index=True)

In [71]:
# Drop all photos with no keypoints or where a group is just marked as crowd
train_coco_df = train_coco_df[train_coco_df['is_crowd']==0]
train_coco_df = train_coco_df[train_coco_df['num_keypoints']>0]
# train_coco_df.drop_duplicates(subset=['path'], keep='first', inplace=True, ignore_index=False)

val_coco_df = val_coco_df[val_coco_df['is_crowd']==0]
val_coco_df = val_coco_df[val_coco_df['num_keypoints']>0]
# val_coco_df.drop_duplicates(subset=['path'], keep='first', inplace=True, ignore_index=False)
print(len(train_coco_df))
print(len(val_coco_df))

149813
6352


In [72]:
# normalizing transform defined in the DeepPose paper
# y is a 2 vec, b is a 4 vec of bcx, bcy, bw, bh
def N(y, b):
    return [ (y[0]-b[0]) * 1/b[2], (y[1]-b[1]) * 1/b[3]]

In [73]:
def convertKeyPoints(df, dimensions, source, index):
    bbox = np.array(df.iloc[index]["bbox"]).astype(np.int64)
    keypoints = df.iloc[index]["keypoints"]

    bbox_tl_x = bbox[0]
    bbox_tl_y = bbox[1]
    w_original = bbox[2]
    h_original = bbox[3]

    bbox_rescale = [dimensions/2, dimensions/2, dimensions, dimensions]

    # Move keypoints to new bbox locations accounting for crop, scale, and normalization
    final_keypoints = []
    for i in range(2, len(keypoints), 3):
        x = (keypoints[i-2] - bbox_tl_x) * dimensions / w_original
        y = (keypoints[i-1] - bbox_tl_y)* dimensions / h_original

        x, y = N([x, y], bbox_rescale)
        v = keypoints[i]

        # If resizing put keypoint outside of image then remove it
        if x < -0.5 or y < -0.5 or x > 0.5 or y > 0.5: v = 0

        # Set keypoints to -1 if not visible
        if v == 0:
            x = -1
            y = -1

        # Add final keypoints to final_keypoints
        final_keypoints.extend([x,y])

    return final_keypoints

In [74]:
def convertImg(df, dimensions, source, index):
    img = image.imread(f'data/{source}2017/{df.iloc[index]["path"]}')
    bbox = np.array(df.iloc[index]["bbox"]).astype(np.int64)
    
    # Account for potential gray images by adding channels
    if len(img.shape) == 2 or img.shape[2] == 1:
        if (len(img.shape) == 2): img = np.expand_dims(img,-1)
        img = cv2.merge([img,img,img])

    # Crop image to bbox
    img = img[bbox[1]:bbox[1]+bbox[3],bbox[0]:bbox[0]+bbox[2]]

    # Scale image 
    img = cv2.resize(img, dsize=(dimensions, dimensions), interpolation=cv2.INTER_CUBIC)
    
    img = img.astype('uint8') # enforce int 
    
    return img

In [75]:
# Make new final dataframes

train_df = pd.DataFrame(columns = ['path', 'width', 'height', 'keypoints', 'num_keypoints'])
val_df = pd.DataFrame(columns = ['path', 'width', 'height', 'keypoints', 'num_keypoints'])

In [77]:
!mkdir -p data-2/train2017
!mkdir -p data-2/val2017

# Compute new keypoints for train_df
for row in range(len(train_coco_df)):
    df_row = train_coco_df.iloc[row]
    kps = convertKeyPoints(train_coco_df, 224, 'train', row)
    
    train_df = train_df.append(
        {
            'path' : df_row['path'],
            'width' : 224,
            'height' : 224,
            'keypoints' : kps,
            'num_keypoints' : sum(1 for _ in filter(lambda score: score > -1, kps))
        }
        , ignore_index = True)
    if row % 1000 == 0: print(f'row {row}')
    
# Compute new keypoints for val_df
for row in range(len(val_coco_df)):
    df_row = val_coco_df.iloc[row]
    kps = convertKeyPoints(val_coco_df, 224, 'val', row)
    
    val_df = val_df.append(
        {
            'path' : df_row['path'],
            'width' : 224,
            'height' : 224,
            'keypoints' : kps,
            'num_keypoints' : sum(1 for _ in filter(lambda score: score > -1, kps))
        }
        , ignore_index = True)
    if row % 1000 == 0: print(f'row {row}')
    
# Remove all duplicate files from dataframes
train_coco_df.drop_duplicates(subset=['path'], keep='first', inplace=True, ignore_index=False)
val_coco_df.drop_duplicates(subset=['path'], keep='first', inplace=True, ignore_index=False)

# Create new cropped files in data-2
for row in range(len(train_coco_df)):
    df_row = train_coco_df.iloc[row]
    img = convertImg(train_coco_df, 224, 'train', row)
    cv2.imwrite(f'data-2/train2017/{train_coco_df.iloc[row]["path"]}', cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
    if row % 1000 == 0: print(f'row {row}')

for row in range(len(val_coco_df)):
    df_row = val_coco_df.iloc[row]
    img = convertImg(val_coco_df, 224, 'val', row)
    cv2.imwrite(f'data-2/val2017/{val_coco_df.iloc[row]["path"]}', cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
    if row % 1000 == 0: print(f'row {row}')

row 0
row 1000
row 2000
row 3000
row 4000
row 5000
row 6000
row 7000
row 8000
row 9000
row 10000
row 11000
row 12000
row 13000
row 14000
row 15000
row 16000
row 17000
row 18000
row 19000
row 20000
row 21000
row 22000
row 23000
row 24000
row 25000
row 26000
row 27000
row 28000
row 29000
row 30000
row 31000
row 32000
row 33000
row 34000
row 35000
row 36000
row 37000
row 38000
row 39000
row 40000
row 41000
row 42000
row 43000
row 44000
row 45000
row 46000
row 47000
row 48000
row 49000
row 50000
row 51000
row 52000
row 53000
row 54000
row 55000
row 56000
row 57000
row 58000
row 59000
row 60000
row 61000
row 62000
row 63000
row 64000
row 65000
row 66000
row 67000
row 68000
row 69000
row 70000
row 71000
row 72000
row 73000
row 74000
row 75000
row 76000
row 77000
row 78000
row 79000
row 80000
row 81000
row 82000
row 83000
row 84000
row 85000
row 86000
row 87000
row 88000
row 89000
row 90000
row 91000
row 92000
row 93000
row 94000
row 95000
row 96000
row 97000
row 98000
row 99000
row 100000
ro

In [78]:
print(len(train_df))
print(len(val_df))
print(train_df.head())

149813
6352
               path width height  \
0  000000000036.jpg   224    224   
1  000000000049.jpg   224    224   
2  000000000049.jpg   224    224   
3  000000000077.jpg   224    224   
4  000000000077.jpg   224    224   

                                           keypoints num_keypoints  
0  [-0.23225806451612901, -0.32365591397849464, -...            26  
1  [0.2384615384615385, -0.2808219178082192, 0.26...            26  
2  [0.21428571428571427, -0.3548387096774194, 0.2...            22  
3  [-1, -1, -1, -1, -1, -1, -0.10194174757281552,...            28  
4  [-1, -1, -1, -1, 0.148936170212766, -0.3350515...            28  


In [83]:
# Save pickle files and zip data
# train_df.to_pickle('data-2/train_df.pkl')
# val_df.to_pickle('data-2/val_df.pkl')

!zip -r -j -q data.zip data-2/*