In [None]:
import json
import numpy as np
import pandas as pd
from glob import glob

# Load set of participants who reported writing notes
with open('../Data/WROTE_NOTES.txt', 'r') as f:
    wn = {s.strip() for s in f.readlines()}

# Create recall data table

In [None]:
# Create data frame
columns = ['subject', 'experiment', 'wrote_notes', 'list_num', 'modality', 
           'list_length', 'pres_rate', 'start_position']
columns += ['rec' + str(i) for i in range(1, 25)]
df = pd.DataFrame(columns=columns)

# Load data from each session
datafiles = glob('../Data/data/MTK[0-9][0-9][0-9][0-9].json')
for path in datafiles:
    
    with open(path, 'r') as f:
        d = json.load(f)
    
    subj_df = dict(
        # Session level info
        subject = d['subject'],
        experiment = [1 if len(np.unique(d['pres_mod'])) == 2 else 2] * 18,
        wrote_notes = [int(d['subject'][0] in wn)] * 18,
        
        # List level info
        list_num = np.arange(1, 19),
        modality = d['pres_mod'],
        list_length = d['list_len'],
        pres_rate = d['pres_rate'],
        
        # Recall info
        start_position = np.array(d['serialpos'])[:, 0]
    )
    
    recalled = np.array(d['recalled'])
    for i in range(24):
        subj_df['rec%i' % (i+1)] = recalled[:, i]
    
    # Convert data into a data frame and add it to the full table
    subj_df = pd.DataFrame(subj_df)
    df = df.append(subj_df)
    
# Write CSVs
df.to_csv('../Data/recall_data.csv', index=False, header=True)

# Create intrusion data table

In [None]:
# Create data frame
columns = ['subject', 'experiment', 'wrote_notes', 
           'enc_modality', 'ret_modality', 'plis', 'pli_recency']
df = pd.DataFrame(columns=columns)

# Load stats from each session
statfiles = glob('../Data/stats/MTK[0-9][0-9][0-9][0-9].json')
for path in statfiles:
    
    with open(path, 'r') as f:
        d = json.load(f)
    
    # Determine subject ID and experiment number
    subj = path[-1][-12:-5]
    exp = 1 + ('plis_2factor' not in d)
    
    # Experiment 1 has a 2 x 2 design for intrusion modality
    if exp == 1:
        subj_df = dict(
            # Session level info
            subject = [subj] * 4,
            experiment = [exp] * 4,
            wrote_notes = [int(subj in wn)] * 4,

            # Condition level info
            enc_modality = ['v', 'v', 'a', 'a'],
            ret_modality = ['v', 'a', 'v', 'a'],
            plis = np.array(d['plis_2factor']).flatten(),
            pli_recency = np.array(d['pli_recency_2factor'])[0].flatten()
        )
    
    # Experiment 2 only has one modality per participant
    else:
        modality = 'v' if 'v' in d['plis'] else 'a'
        subj_df = dict(
            # Session level info
            subject = [subj],
            experiment = [exp],
            wrote_notes = [int(subj in wn)],

            # Condition level info
            enc_modality = [modality],
            ret_modality = [modality],
            plis = [d['plis'][modality]],
            pli_recency = [d['pli_recency'][modality][0]]
        )
    
    # Convert data into a data frame and add it to the full table
    subj_df = pd.DataFrame(subj_df)
    df = df.append(subj_df)
    
# Write CSVs
df.to_csv('../Data/intrusion_data.csv', index=False, header=True)