# JSON to CSV Converter

This code combines the data from all participants into tables that can easily be read into R and used for statistical tests.

In [1]:
import json
import numpy as np
import pandas as pd
from glob import glob

# Load set of participants who reported writing notes
with open('../Data/WROTE_NOTES.txt', 'r') as f:
    wn = {s.strip() for s in f.readlines()}
    
# Get PEERS word pool
wp = [s.lower() for s in np.loadtxt('../Data/wasnorm_wordpool.txt', dtype='U32')]

# Create recall data table

In [44]:
# Create data frame
columns = ['subject', 'experiment', 'wrote_notes', 'list_num', 'modality', 
           'list_length', 'pres_rate', 'start_position']
columns += ['rec' + str(i) for i in range(1, 25)]
df = pd.DataFrame(columns=columns)

# Load data from each session
datafiles = glob('../Data/data/MTK[0-9][0-9][0-9][0-9].json')
for path in datafiles[:2]:
    
    with open(path, 'r') as f:
        d = json.load(f)
    
    # Convert presented words to word ID numbers by finding their position in the pool
    pres_itemnos = np.searchsorted(wp, d['pres_words'], side='right')

    subj_df = dict(
        # Session level info
        subject = d['subject'],
        experiment = [1 if len(np.unique(d['pres_mod'])) == 2 else 2] * 18,
        wrote_notes = [int(d['subject'][0] in wn)] * 18,
        
        # List level info
        list_num = np.arange(1, 19),
        modality = d['pres_mod'],
        list_length = d['list_len'],
        pres_rate = d['pres_rate'],
        
        # Recall info
        start_position = np.array(d['serialpos'])[:, 0]
    )
    
    # Create columns for presented items and whether they were recalled
    recalled = np.array(d['recalled'])
    ffr_recalled = np.array(d['ffr_recalled'])
    for i in range(24):
        subj_df['pres_itemno%i' % (i+1)] = pres_itemnos[:, i]
    for i in range(24):
        subj_df['rec%i' % (i+1)] = recalled[:, i]
    for i in range(24):
        subj_df['ffr_rec%i' % (i+1)] = ffr_recalled[:, i]

    # Create columns for recalled items' serial positions (up to 48 recalls included)
    rec_serialpos = np.array(d['serialpos'])
    for i in range(min(rec_serialpos.shape[1], 48)):
        subj_df['rec_serialpos%i' % (i+1)] = rec_serialpos[:, i]
    padding = np.zeros(rec_serialpos.shape[0])
    while i < 48:
        subj_df['rec_serialpos%i' % (i+1)] = padding
        i += 1
    
    # Convert data into a data frame and add it to the full table
    subj_df = pd.DataFrame(subj_df)
    df = pd.concat((df, subj_df))
    
# Write CSVs
df.to_csv('../Data/recall_data_expanded.csv', index=False, header=True)