In this notebook we will prototype the data importer from the output of the CPM into the Pytorch format.

In [1]:
from __future__ import division
import json
import torch
import glob
from random import randint
import os
from os.path import join,basename
import numpy as np
from math import ceil
import matplotlib.pyplot as plt
import seaborn as sns

sns.palplot(sns.color_palette("Set2", 10))

In [2]:
data_path = '/Users/zal/CMU/Spring2017/16824/FinalProject/Data/mockup/'
data_save_path = '/Users/zal/CMU/Spring2017/16824/FinalProject/Data/mockup.npz'

action_labels_path = '/Users/zal/CMU/Spring2017/16824/FinalProject/Data/mockup_labels.txt'
action_labels = []
action_labels_idx = {}

num_frames = 180

In [3]:
def get_dirs(path):
    if not os.path.exists(path):
        return []
        
    return [x for x in os.listdir(path) if os.path.isdir(join(path,x)) and not x.startswith('.')]

def get_list_inv_dict(in_list):
    inv_dict = {}
    for i,x in enumerate(in_list):
        inv_dict[x] = i
    return inv_dict

def pose_data_to_numpy(video_pose_path):
    frame_pose_paths = glob.glob(os.path.join(video_pose_path, '*.json'))
    frame_feat_list = []
    for fp_path in frame_pose_paths:
        fp_data = json.load(open(fp_path))
        frame_feat_list.append(fp_data['people']['body_parts'][0])
    return numpy.array(frame_feat_list)

1. Obtain the classes and build structure
2. Select the target classes
3. Make the numpy structure
4. Go through each of the classes
    - Go through each of the videos in the class
        - Go through each of the frames in the file
            - Build the numpy array from the frame info
            - Append to data samples struct
5. Save on disk

In [5]:
action_labels = get_dirs(data_path)
action_labels_idx = get_list_inv_dict(action_labels)
with open(action_labels_path, 'w') as alf:
    alf.write('\n'.join(action_labels))

In [6]:
sel_classes = ['BabyCrawling', 'BlowingCandles', 'BodyWeightSquats',
               'HandstandPushups', 'HandstandWalking', 'PullUps', 
               'PushUps', 'RockClimbingIndoor', 'RopeClimbing',
               'Swing', 'TaiChi', 'TrampolineJumping']
print len(sel_classes),'human pose classes'

12 human pose classes


In [46]:
def which_dirs_in_path(path, dirs):
    subdirs = os.listdir(path)
    return [d for d in dirs if d in subdirs and os.path.isdir(join(path,d))]

def extract_data(data_path, sel_classes, num_frames, actions_dict):
    '''
    Reads the data from the folders and loads into numpy
    @returns: a tuple with the data samples and datalabels
    '''
    
    #Verify that all sel_classes are in path and notify missing classes
    found_classes = which_dirs_in_path(data_path, sel_classes)
    if set(found_classes) != set(sel_classes):
        print 'Not all the classes were found. The missing classes are:'
        print ', '.join(list(set(sel_classes) - set(found_classes)))
        
    REQ_FRAMES = num_frames
    data_samples = []
    data_labels = []
    # For each selected class
    for sel_class in found_classes:
        print 'Processing class {}'.format(sel_class) 
        # Get its full path and the videos which are dirs
        sel_class_path = join(data_path, sel_class)
        sel_class_videos = get_dirs(sel_class_path)
        print 'Found {} videos for the class {}'.format(len(sel_class_videos), sel_class)

        # For each video for the selected class
        for video_name in sel_class_videos:
            # Each of the json files correspond to each frame
            vf_jsons = glob.glob(join(sel_class_path, video_name, '*.json'))
            print 'Found {} frames for {}'.format(len(vf_jsons), video_name)

            # Collect all the frames for  the video

            last_pose = np.zeros(54) 
            video_poses = np.array([])
            missing_count = 0
            for vf_json_path in vf_jsons:
                vf_json = json.load(open(vf_json_path))
                if 'people' in vf_json and len(vf_json['people'])>0 and 'body_parts' in vf_json['people'][0]:
                        frame_pose = vf_json['people'][0]['body_parts']
                        if len(video_poses) < 1:
                            video_poses = np.array([frame_pose], dtype=np.float32)
                        else:
                            video_poses = np.concatenate((video_poses, [frame_pose]))
                        last_pose = frame_pose
                else:
                    if len(video_poses) < 1: #When the first frame did not find any pose
                        continue
                    missing_count += 1
                    video_poses = np.concatenate((video_poses, [last_pose]))
            if missing_count > 0:
                print 'Missing frames: {}/{}'.format(missing_count, len(video_poses))
            
            if len(video_poses) < REQ_FRAMES:
                # Loop through the video until we get the required length
                loops = int(ceil(REQ_FRAMES/len(video_poses)))
                video_poses = np.tile(video_poses, (loops, 1))[:REQ_FRAMES]
            elif len(video_poses) > REQ_FRAMES:
                # Select a segment at random from the video
                start_idx = randint(0, len(video_poses)-REQ_FRAMES) # randint is inclusive
                video_poses = video_poses[start_idx:start_idx+REQ_FRAMES]

            # Append pose matrix to data tensor
            if len(data_samples) < 1:
                data_samples = np.array([video_poses], dtype=np.float32)
            else:
                data_samples = np.concatenate((data_samples, [video_poses]))

            # Append label to label tensor
            data_labels.append(actions_dict[sel_class])

        assert len(data_samples) == len(data_labels)
    
    data_labels = np.array(data_labels)
    
    return data_samples, data_labels
    
def store_npdata(save_path, data, labels):
    '''
    Saves data into a compressed numpy binary file
    @returns: None
    '''
    np.savez_compressed(save_path, data=data, labels=labels)
    
def load_npdata(save_path):
    '''
    Loades the stored binary data
    @returns: tuple with data and labels as numpy arrays
    '''
    loaded_data = np.load(save_path)
    data = loaded_data['data']
    labels = loaded_data['labels']
    return data, labels

In [45]:
data, labels = extract_data(data_path, sel_classes, 180, action_labels_idx)
store_npdata(data_save_path, data, labels)

Not all the classes were found. The missing classes are:
TrampolineJumping, TaiChi, RopeClimbing, BabyCrawling, PushUps, RockClimbingIndoor, HandstandWalking, BlowingCandles, BodyWeightSquats, Swing, HandstandPushups
Processing class PullUps
Found 100 videos for the class PullUps
Found 167 frames for v_PullUps_g01_c01
Missing frames: 25/166
Found 138 frames for v_PullUps_g01_c02
Missing frames: 30/138
Found 181 frames for v_PullUps_g01_c03
Missing frames: 79/180
Found 157 frames for v_PullUps_g01_c04
Missing frames: 58/151
Found 122 frames for v_PullUps_g02_c01
Found 176 frames for v_PullUps_g02_c02
Found 131 frames for v_PullUps_g02_c03
Found 178 frames for v_PullUps_g02_c04
Found 178 frames for v_PullUps_g03_c01
Missing frames: 1/178
Found 208 frames for v_PullUps_g03_c02
Found 203 frames for v_PullUps_g03_c03
Found 249 frames for v_PullUps_g03_c04
Found 141 frames for v_PullUps_g04_c01
Found 142 frames for v_PullUps_g04_c02
Found 154 frames for v_PullUps_g04_c03
Found 138 frames for

In [10]:
REQ_FRAMES = 180
data_samples = []
data_labels = []
# For each selected class
for sel_class in sel_classes:
    print 'Processing class {}'.format(sel_class) 
    # Get its full path and the videos which are dirs
    sel_class_path = join(data_path, sel_class)
    sel_class_videos = get_dirs(sel_class_path)
    print 'Found {} videos for the class {}'.format(len(sel_class_videos), sel_class)
    
    # For each video for the selected class
    for video_name in sel_class_videos:
        # Each of the json files correspond to each frame
        vf_jsons = glob.glob(join(sel_class_path, video_name, '*.json'))
        print 'Found {} frames for {}'.format(len(vf_jsons), video_name)
        
        # Collect all the frames for  the video
        
        last_pose = np.zeros(54) 
        video_poses = np.array([])
        for vf_json_path in vf_jsons:
            vf_json = json.load(open(vf_json_path))
            if 'people' in vf_json and len(vf_json['people'])>0 and 'body_parts' in vf_json['people'][0]:
                    frame_pose = vf_json['people'][0]['body_parts']
                    if len(video_poses) < 1:
                        print 'appended first elem'
                        video_poses = np.array([frame_pose], dtype=np.float32)
                    else:
                        video_poses = np.concatenate((video_poses, [frame_pose]))
                    last_pose = frame_pose
            else:
                if len(video_poses) < 1:
                    continue
                video_poses = np.concatenate((video_poses, [last_pose]))
        
        video_poses = video_poses[1:]
        print len(video_poses)
        if len(video_poses) < REQ_FRAMES:
            # Loop through the video until we get the required length
            loops = int(ceil(REQ_FRAMES/len(video_poses)))
            video_poses = np.tile(video_poses, (loops, 1))[:REQ_FRAMES]
            print 'Loops',loops
            print 'Video poses len:', len(video_poses)
        elif len(video_poses) > REQ_FRAMES:
            # Select a segment at random from the video
            start_idx = randint(0, len(video_poses)-REQ_FRAMES) # randint is inclusive
            video_poses = video_poses[start_idx:start_idx+REQ_FRAMES]
        print 'Video poses shape:', video_poses.shape
        
        # Append pose matrix to data tensor
        if len(data_samples) < 1:
            print 'appended first element to data_samples'
            data_samples = np.array([video_poses], dtype=np.float32)
        else:
            print 'appended element to data samples', video_poses.shape
            data_samples = np.concatenate((data_samples, [video_poses]))

        # Append label to label tensor
        data_labels.append(action_labels_idx[sel_class])
        
    assert len(data_samples) == len(data_labels)

data_labels = np.array(data_labels)
print 'Data tensor ndim: {}, nsamples: {}, shape: {}'.format(data_samples.ndim, len(data_samples), data_samples.shape)
print 'Labels tensor ndim: {}, nsamples: {}'.format(data_labels.ndim, len(data_labels))
print type(data_samples), data_samples.ndim, data_samples.shape
np.savez_compressed(data_save_path, data=data_samples, labels=data_labels)
print type(data_samples), data_samples.ndim, data_samples.shape

Processing class BabyCrawling
Found 0 videos for the class BabyCrawling
Processing class BlowingCandles
Found 0 videos for the class BlowingCandles
Processing class BodyWeightSquats
Found 0 videos for the class BodyWeightSquats
Processing class HandstandPushups
Found 0 videos for the class HandstandPushups
Processing class HandstandWalking
Found 0 videos for the class HandstandWalking
Processing class PullUps
Found 100 videos for the class PullUps
Found 167 frames for v_PullUps_g01_c01
appended first elem
165
Loops 2
Video poses len: 180
Video poses shape: (180, 54)
appended first element to data_samples
Found 138 frames for v_PullUps_g01_c02
appended first elem
137
Loops 2
Video poses len: 180
Video poses shape: (180, 54)
appended element to data samples (180, 54)
Found 181 frames for v_PullUps_g01_c03
appended first elem
179
Loops 2
Video poses len: 180
Video poses shape: (180, 54)
appended element to data samples (180, 54)
Found 157 frames for v_PullUps_g01_c04
appended first elem
1

### Import into Torch

In [16]:
data_load = np.load(data_save_path)
data = data_load['data']
labels = data_load['labels']

In [17]:
print type(data_samples), data_samples.ndim, data_samples.shape
print type(data), data.ndim, data.shape
print type(labels), labels.ndim, labels.shape

<type 'numpy.ndarray'> 3 (100, 180, 54)
<type 'numpy.ndarray'> 3 (100, 180, 54)
<type 'numpy.ndarray'> 1 (100,)


In [21]:
data_tensor = torch.from_numpy(data)
data_tensor = data_tensor.float()
print type(data_tensor)
print data_tensor.size()

<class 'torch.FloatTensor'>
torch.Size([100, 180, 54])


### Get statistics from selected classes

In [None]:
# First get the number of videos per class
def get_numvideos_for_classes(data_path, classes):
    num_videos = []
    for sel_class in classes:
        sel_class_path = join(data_path, sel_class)
        sel_class_videos = get_dirs(sel_class_path)
        print '{},{}'.format(len(sel_class_videos), sel_class)
        num_videos.append(len(sel_class_videos))
    return num_videos