In [53]:
import pandas as pd
import json
import boto3
import os
from io import StringIO

In [65]:
def s3_json2df(folder, bucket_name='alignedstorage'):
    '''
    Create Pandas dataframe of keypoints from an s3 directory
    of json files, with each row corresponding to a json.
    Returns Pandas dataframe.
    '''
    s3 = boto3.resource('s3')
    bucket = s3.Bucket(bucket_name)
    df = pd.DataFrame(columns=list(range(75)))
    for i, obj in enumerate(bucket.objects.filter(Prefix=folder)):
        content_object = s3.Object(bucket_name, obj.key)
        try:
            file_content = content_object.get()['Body'].read().decode('utf-8')
            json_content = json.loads(file_content)
            data = json_content['people'][0]['pose_keypoints_2d']
            df.loc[i] = data
        except:
            continue
    return df

def df2csv_s3(df, file_path, bucket_name='alignedstorage'):
    '''
    Convert Pandas dataframe to csv and upload to s3.
    '''
    s3 = boto3.resource('s3')
    bucket = s3.Bucket(bucket_name)
    csv_buffer = StringIO()
    df.to_csv(csv_buffer)
    bucket.put_object(Key=file_path, Body=csv_buffer.getvalue(), ACL='public-read')

In [60]:
bucket_name = 'alignedstorage'
s3 = boto3.client("s3")

# Get the directories of json files for all poses in training data
result = s3.list_objects(Bucket=bucket_name, Prefix='training_data/', Delimiter='/')
folders = []
for obj in result.get('CommonPrefixes'):
    folders.append(obj.get('Prefix'))

In [73]:
# Loop through directories and save a csv of keypoints for each pose
for folder in folders:
    df = s3_json2df(folder=folder)
    file_path = folder[:-1] + '.csv'
    df2csv_s3(df=df, file_path=file_path)