In [33]:
import json
import pandas as pd
import numpy as np
from os import path as path
import glob
import imageio
import numpy.ma as ma

def to_csv(filename):
    dfs=[]
    with open(filename+'.txt') as json_file:
        for i,line in enumerate(json_file):
            if line[0:14]!='Consent given.':
                dfs.append(pd.read_json(line))
    group_df = pd.concat(dfs)
    group_df.to_csv(filename+'.csv', index=False)
    return(group_df)

# group_df = to_csv(path.join('..','experiments','pilots','letters','data','jatos_results_batch3'))

In [61]:
A = imageio.imread('stimuli/A.png')[:,:,1]
S = imageio.imread('stimuli/S.png')[:,:,1]

targets = {'A': A, 'S':S}

def get_correlation_with_target_letter(row):
    if str(row['presented_pixel_data'])=='nan':
        return 0
    else:
        presented_pixel_data = np.array(row['presented_pixel_data'], dtype=float)
        target = targets[row['target']];
        correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame.flatten()),
                                            ma.masked_invalid(target.flatten()))[0,1] for frame in presented_pixel_data])
        return correlation_per_frame
    
def get_correlation_with_mask(row):
    if str(row['presented_pixel_data'])=='nan':
        return 0
    else:
        presented_pixel_data = np.array(row['presented_pixel_data'], dtype=float)
        target = targets[row['target']];
        correlation_with_nans = ma.corrcoef(np.where(np.isnan(presented_pixel_data[0,:,:].flatten()),1,0),
                                            target.flatten())[0,1]
        return correlation_with_nans

In [65]:
def get_num_pixels_hidden(row):
    if str(row['presented_pixel_data'])=='nan':
        return 0
    else:
        presented_pixel_data = row['presented_pixel_data']
        first_frame = presented_pixel_data[0];
        number_of_hidden_pixels = len([item for sublist in first_frame for item in sublist if str(item) == 'None'])
        return number_of_hidden_pixels

def to_csv_multiple_files(dirname):
    dfs=[]
    json_files = glob.glob(path.join(dirname,'*','*.json'))
    for i,file in enumerate(json_files):
                mydf = pd.read_json(file)
                mydf['num_pixels_hidden'] = mydf.apply(get_num_pixels_hidden, axis=1)
                mydf['correlation_with_target_letter'] = mydf.apply(get_correlation_with_target_letter, axis=1)   
                mydf['correlation_with_mask'] = mydf.apply(get_correlation_with_mask, axis=1)
                mydf = mydf.explode('correlation_with_target_letter');
                mydf['frame_index'] = mydf.groupby('trial_index').cumcount()
                dfs.append(mydf.drop('presented_pixel_data',axis=1));
    group_df = pd.concat(dfs)
    group_df.to_csv(dirname+'/all_data.csv', index=False)
    return(group_df)


In [68]:
group_df=to_csv_multiple_files(path.join('..','experiments','letters','data','jatos_resultfiles_batch1'))

In [69]:
group_df=to_csv_multiple_files(path.join('..','experiments','letters2','data','jatos_resultfiles_batch1'))

In [67]:
group_df=to_csv_multiple_files(path.join('..','experiments','letters3occluded','version2','data','jatos_resultfiles_batch1'))

Unnamed: 0,success,timeout,failed_images,failed_audio,failed_video,trial_type,trial_index,time_elapsed,internal_node_id,subject_identifier,...,hide_proportion,test_part,present,target,correct_response,correct,num_pixels_hidden,correlation_with_target_letter,correlation_with_mask,frame_index
0,1.0,0.0,[],[],[],preload,0,65,0.0-0.0,250693,...,,,,,,,0,0,0,0
1,1.0,,,,,fullscreen,1,4505,0.0-1.0,250693,...,,,,,,,0,0,0,0
2,,,,,,instructions,2,142104,0.0-2.0,250693,...,,,,,,,0,0,0,0
3,,,,,,html-keyboard-response,3,143026,0.0-3.0-1.0-0.0,250693,...,,,,,,,0,0,0,0
4,,,,,,noisyLetter,4,149515,0.0-3.0-1.0-1.0,250693,...,0.00,practice,1.0,S,f,0.0,0,0.366862,--,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,,,,,,noisyLetter,222,473056,0.0-13.0-1.15,251056,...,0.15,test2,0.0,A,f,1.0,32,-0.093629,0.081932,16
222,,,,,,noisyLetter,222,473056,0.0-13.0-1.15,251056,...,0.15,test2,0.0,A,f,1.0,32,-0.132171,0.081932,17
223,,,,,,html-keyboard-response,223,474062,0.0-13.0-2.15,251056,...,,,,,,,0,0,0,0
224,,,,,,survey-text,224,482050,0.0-14.0,251056,...,,,,,,,0,0,0,0


In [64]:
group_df

Unnamed: 0,success,timeout,failed_images,failed_audio,failed_video,trial_type,trial_index,time_elapsed,internal_node_id,subject_identifier,...,hide_proportion,test_part,present,target,correct_response,correct,num_pixels_hidden,correlation_with_target_letter,correlation_with_mask,frame_index
0,1.0,0.0,[],[],[],preload,0,65,0.0-0.0,250693,...,,,,,,,0,0,0,0
1,1.0,,,,,fullscreen,1,4505,0.0-1.0,250693,...,,,,,,,0,0,0,0
2,,,,,,instructions,2,142104,0.0-2.0,250693,...,,,,,,,0,0,0,0
3,,,,,,html-keyboard-response,3,143026,0.0-3.0-1.0-0.0,250693,...,,,,,,,0,0,0,0
4,,,,,,noisyLetter,4,149515,0.0-3.0-1.0-1.0,250693,...,0.00,practice,1.0,S,f,0.0,0,0.366862,--,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,,,,,,noisyLetter,222,458110,0.0-13.0-1.15,250699,...,0.05,test2,1.0,S,g,1.0,11,0.258142,-0.045784,12
222,,,,,,noisyLetter,222,458110,0.0-13.0-1.15,250699,...,0.05,test2,1.0,S,g,1.0,11,0.278444,-0.045784,13
223,,,,,,html-keyboard-response,223,459114,0.0-13.0-2.15,250699,...,,,,,,,0,0,0,0
224,,,,,,survey-text,224,472200,0.0-14.0,250699,...,,,,,,,0,0,0,0
