In [22]:
from os import listdir as ls
from os.path import join, isfile, expanduser
from glob import glob
from toolz import pipe, curry
from toolz.curried import reduce, map
from funcy import compose, rpartial
from operator import add
import pandas as pd
import json

json_from = compose(json.load, open, join)
frame_paths = compose(
    sorted,
    map(lambda p: '/'.join(p.split('/')[-3:])),
    glob,
    rpartial(join, 'frames/*.jpg')
)

OUT_CSV = './eye-gaze-capture.csv'

READ_FUNCS = {
    'motion': compose(pd.DataFrame.from_dict, json_from)
}

FRAME_TYPES = [
    'appleRightEye', 'appleLeftEye', 'appleFace', 'frames',
    'screen', 'faceGrid', 'dotInfo'
]

@curry
def data_type_from_folder(folder_path, data_type):
    read_func = READ_FUNCS.get(data_type, json_from)
    
    # Load raw JSON
    raw = read_func(folder_path, data_type + '.json')
    
    # Create data frame
    df = raw if isinstance(raw, pd.DataFrame) else pd.DataFrame(data = raw)
    
    # Prefix columns with file_type
    df = df.add_prefix(data_type + '.')
    
    # Add corresponding frames
    df['Frame'] = frame_paths(folder_path)
    
    return df

@curry
def join_on(key, df1, df2):
    return df1.join(df2.set_index(key), on = key)

@curry
def append_info(folder_path, df):
    info = json_from(folder_path, 'info.json')
    df['Dataset'] = info['Dataset']
    df['DeviceName'] = info['DeviceName']
    return df

def folder_df(folder_path):
    return pipe(
        map(data_type_from_folder(folder_path), FRAME_TYPES),
        reduce(join_on('Frame')),
        append_info(folder_path))

def write_folder_df(df, path = OUT_CSV):
    mode = 'a' if isfile(path) else 'w'
    header = not isfile(path)
    df.to_csv(path, mode = mode, header = header, index = False)
    return df.shape[0]

collect_data = compose(
    reduce(add),
    map(compose(write_folder_df, folder_df)),
    glob,
    expanduser,
    rpartial(join, '*/')
)

In [23]:
# Collect everything into single CSV (~ 600 MB)
collect_data('~/Documents/data/gazecapture')

# Read data back in from CSV
capture_df = pd.read_csv(OUT_CSV)

In [26]:
capture_df.shape

(2445504, 33)