In [2]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.patches as patches
import pandas as pd
import pprint
from mpl_toolkits.mplot3d import Axes3D
import matplotlib as mpl
import csv
import json
import seaborn as sns
from scipy.ndimage.filters import gaussian_filter
from scipy.stats import entropy
from os import listdir
from os.path import isfile, join

mpl.rcParams['figure.dpi'] = 144
mpl.rcParams['figure.figsize'] = (8, 5)

pp = pprint.PrettyPrinter(indent=4)

In [3]:
DATA_DIR = "/Users/jeremygordon/Google Drive/Academic/# UC Berkeley ISchool PhD/Research/# Current/Covert Embodied Choice/Data"

In [8]:
def load_json(file):
    data = None
    try:
        with open(file) as f:
            data = json.load(f)
    except:
        pass
        # print("Can't find %s" % file)
    return data

def get_session_info(ppid):
    systime, synctime = None, None
    with open('../TrialData/PupilRecordings/%s/info.csv' % ppid, 'r') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            if row[0] == "Start Time (System)":
                systime = float(row[1])
            elif row[0] == "Start Time (Synced)":
                synctime = float(row[1])
    return (systime, synctime)

def df_to_tuples(df):
    return [tuple(x) for x in df.values]    

def roll_median(session, plot=False):
    trials = session.get("trials", [])
    all_rolls = []
    for trial in trials:
        hits = trial.get('hits', [])
        rolls = [h['roll'] for h in hits if not h.get('post_response')]
        all_rolls.extend(rolls)
    med = np.median(all_rolls)
    if plot:
        n_rolls = len(all_rolls)
        plt.plot(range(n_rolls), all_rolls)
        plt.plot([0, n_rolls], [med, med], color='red')
    return med

def generate_subject_df():
    """
    Load all meta data files and create subject_df
    """
    onlyfiles = [f for f in listdir(DATA_DIR) if isfile(join(DATA_DIR, f))]

    subject_df = pd.DataFrame(columns=[
        'hand_order', 'condition', 'left_handed', 
        'total_points', 'total_points_possible', 'total_matches',
        'ts_session_start', 'ts_adversary', 'ts_session_end', 'session_duration'])
    
    # Loop through all meta data files
    for file in onlyfiles:
        if '_meta' in file:
            data = load_json(DATA_DIR + '/' + file)
            key = data['session_id']
            del data['hand_specs']
            del data['session_id']
            data['session_duration'] = int(data['ts_session_end'] - data['ts_session_start'])
            subject_df = subject_df.append(pd.Series(data, name=key))
    
    print("Generated subject dataframe with %d subjects" % len(subject_df))
    return subject_df
        
def generate_trial_and_tracking_dfs(subject_df):
    """
    For each subject in passed df, load all subject's trial data and generate trial_df
    """
    trial_df = pd.DataFrame(columns=['subject_choice', 'trial_id', 'points', 'correct', 
                                     'avoided_prediction', 'ts_start', 'ts_selection', 
                                     'ts_end', 'hand', 'practice',
                                     'with_adversary', 'duration'])
    tracking_df = pd.DataFrame(columns=[
        'hmd_yaw', 'hmd_roll', 'hmd_pitch', 'hmd_x', 'hmd_y', 'hmd_z', 
        'ctr_yaw', 'ctr_roll', 'ctr_pitch', 'ctr_x', 'ctr_y', 'ctr_z', 
        'gaze_or_x', 'gaze_or_y', 'gaze_or_z', 'gaze_dir_x', 'gaze_dir_y', 
        'gaze_dir_z', 'gaze_tgt_x', 'gaze_tgt_y', 'gaze_tgt_z', 'gaze_conv_dist', 
        'blinking', 'ts'])
    fixation_df = pd.DataFrame(columns=['timestamp'])
    
    for session_id, row in subject_df.iterrows():
        done = False
        trial = 1
        while not done:
            data = load_json(DATA_DIR + "/session_%s_trial_%d.json" % (session_id, trial))
            if data is not None:
                key = "%s_%d" % (session_id, trial)
                n_records = len(data['records'])
                n_fixations = len(data['fixations'])                
                for record in data['records']:
                    record_key = "%s_%.3f" % (session_id, record['ts'])
                    tracking_df = tracking_df.append(pd.Series(record, name=record_key))
                del data['records']
                for fixation in data['fixations']:
                    fix_key = "%s_%.3f" % (session_id, fixation['ts'])                    
                    fixation_df = fixation_df.append(pd.Series(fixation, name=fix_key))
                del data['fixations']
                data['duration'] = int(data.get('ts_end', 0) - data['ts_start'])
                trial_df = trial_df.append(pd.Series(data, name=key))
                trial += 1
            else:
                done = True
                print("Loaded %d trials, %d records, %d fixations for subject %s" % (trial - 1, n_records, n_fixations, session_id))
    return trial_df, tracking_df, fixation_df

In [9]:
subject_df = generate_subject_df()
trial_df, tracking_df, fixation_df = generate_trial_and_tracking_dfs(subject_df)

Generated subject dataframe with 1 subjects
Loaded 5 trials, 899 records, 0 fixations for subject DEBUG


In [10]:
trial_df.to_pickle(DATA_DIR + '/frames/trial_df.pickle')
tracking_df.to_pickle(DATA_DIR + '/frames/tracking_df.pickle')
fixation_df.to_pickle(DATA_DIR + '/frames/fixation_df.pickle')
subject_df.to_pickle(DATA_DIR + '/frames/subject_df.pickle')
print("Saved")

Saved
