This program loads the json files, extracts the person's data from the left side of the 
screen (since the videos are split screen) and then saves the data as a csv file with
header. To save space, it finds the last row for which there is at least one non-null point, and drops all the remaining rows

In [None]:
import numpy as np
import pandas as pd
import os
import json

In [None]:
JSON_DIR = './data/output-feet/'

In [None]:
alldata_processed = pd.read_csv("./data/annotations/alldata_processed.csv")
video_ids = set(alldata_processed['videoid'])

In [None]:
def convert_json2csv(video_id,json_dir):
    resL = np.zeros((1500,75))
    resL[:] = np.nan
    for frame in range(1,1500):
        videoid_str = '%s-processed' % (video_id)
        test_image_json = '%s%s/%s_%s_keypoints.json' %\
            (json_dir, videoid_str, videoid_str,str(frame).zfill(12))

        if not os.path.isfile(test_image_json):
            break
        with open(test_image_json) as data_file:  
            data = json.load(data_file)

        for person in data['people']:
            keypoints = person['pose_keypoints_2d']
            xcoords = [keypoints[i] for i in range(len(keypoints)) if i % 3 == 0]
            counter = 0
            if np.max(xcoords) < 320:
                if(counter > 0):
                    print("duplicate person on left half of video %s") % video_id
                    print(video_id)
                counter += 1
                resL[frame-1,:] = keypoints

    #we can save space by dropping rows after the last row that isn't all nan
    check = np.apply_along_axis(lambda x: np.any(~np.isnan(x)),1,resL)
    for i in range(len(check)-1,-1,-1):
        if check[i]:
            break
    return resL[:i+1],resR[:i+1]

In [None]:
existing_csvs = os.listdir("./data/video_csvs/")
existing_csvs = [e.split('_')[0] for e in existing_csvs]

In [None]:
for i,video_id in enumerate(video_ids):
    if(i % 100 == 0):
        print("%.2f percent done") % (i*100./len(video_ids))
    if str(video_id) not in existing_csvs:
        try: 
            resL,resR = convert_json2csv(video_id,JSON_DIR)
        except:
            continue
        save_string_L = '%s/video_csvs/%s.csv' % ('./data',video_id)
        np.savetxt(save_string_L, resL, delimiter=',')