In [1]:
import json
import pandas as pd
import numpy as np
from os import path as path
import glob
import imageio
import numpy.ma as ma
import pathlib

def to_csv(filename):
    dfs=[]
    with open(filename+'.txt') as json_file:
        for i,line in enumerate(json_file):
            if line[0:14]!='Consent given.':
                dfs.append(pd.read_json(line))
    group_df = pd.concat(dfs)
    group_df.to_csv(filename+'.csv', index=False)
    return(group_df)

# group_df = to_csv(path.join('..','experiments','pilots','letters','data','jatos_results_batch3'))

In [2]:
A = imageio.imread('stimuli/A.png')[:,:,1]
S = imageio.imread('stimuli/S.png')[:,:,1]

targets = {'A': A, 'S':S}

def get_correlation_with_S(row):
    if str(row['presented_pixel_data'])=='nan':
        return 0
    else:
        presented_pixel_data = np.array(row['presented_pixel_data'], dtype=float)
        correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame.flatten()),
                                            ma.masked_invalid(S.flatten()))[0,1] for frame in presented_pixel_data])
        return correlation_per_frame
    
def get_correlation_with_A(row):
    if str(row['presented_pixel_data'])=='nan':
        return 0
    else:
        presented_pixel_data = np.array(row['presented_pixel_data'], dtype=float)
        correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame.flatten()),
                                            ma.masked_invalid(A.flatten()))[0,1] for frame in presented_pixel_data])
        return correlation_per_frame
    
def get_correlation_with_target_letter(row):
    if row.target=='S':
        return get_correlation_with_S(row)
    elif row.target=='A':
        return(get_correlation_with_A(row))
    
def get_frame_data(row):
    if str(row['presented_pixel_data'])=='nan':
        return 0
    else:
        presented_pixel_data = np.array(row['presented_pixel_data'], dtype=float)
        num_frames = presented.pixel.data.shape[0]
        flattened_strings = []
        for frame in range(num_frames):
            flattened_array = presented_pixel_data[frame].flatten()
            flattened_string = ','.join(map(str, flattened_array))
            flattened_strings.append(flattened_string)
        return flattened_strings
    
def get_correlation_with_mask(row):
    if str(row['presented_pixel_data'])=='nan':
        return 0
    else:
        presented_pixel_data = np.array(row['presented_pixel_data'], dtype=float)
        target = targets[row['target']];
        correlation_with_nans = ma.corrcoef(np.where(np.isnan(presented_pixel_data[0,:,:].flatten()),1,0),
                                            target.flatten())[0,1]
        return correlation_with_nans

In [4]:
def get_num_pixels_hidden(row):
    if str(row['presented_pixel_data'])=='nan':
        return 0
    else:
        presented_pixel_data = row['presented_pixel_data']
        first_frame = presented_pixel_data[0];
        number_of_hidden_pixels = len([item for sublist in first_frame for item in sublist if str(item) == 'None'])
        return number_of_hidden_pixels

def to_csv_multiple_files(dirname):
    dfs=[]
    json_files = glob.glob(path.join(dirname,'*','*.json'))
    for i,file in enumerate(json_files):
                mydf = pd.read_json(file)
                mydf['num_pixels_hidden'] = mydf.apply(get_num_pixels_hidden, axis=1)
#                 mydf['correlation_with_A'] = mydf.apply(get_correlation_with_A, axis=1)   
#                 mydf['correlation_with_S'] = mydf.apply(get_correlation_with_S, axis=1) 
                mydf['correlation_with_target_letter'] = mydf.apply(get_correlation_with_target_letter, axis=1) 
#                 mydf['frame_data'] = mydf.apply(get_frame_data,axis=1)
                mydf['correlation_with_mask'] = mydf.apply(get_correlation_with_mask, axis=1)
                mydf = mydf.explode('correlation_with_target_letter');
                mydf['frame_index'] = mydf.groupby('trial_index').cumcount()
                dfs.append(mydf.drop('presented_pixel_data',axis=1));
    group_df = pd.concat(dfs)
    group_df.to_csv(dirname+'/all_data.csv', index=False)
    return(group_df)

#12.03.2023: JATOS changed their export data format (the tree structure is more convolved now)
def to_csv_multiple_files_new_format(dirname):
    dfs=[]
    json_files = pathlib.Path(dirname).glob('**/*.json')
    for i,file in enumerate(json_files):
                mydf = pd.read_json(file)
                mydf['num_pixels_hidden'] = mydf.apply(get_num_pixels_hidden, axis=1)
                mydf['correlation_with_target_letter'] = mydf.apply(get_correlation_with_target_letter, axis=1)   
                mydf['correlation_with_mask'] = mydf.apply(get_correlation_with_mask, axis=1)
                mydf = mydf.explode('correlation_with_target_letter');
                mydf['frame_index'] = mydf.groupby('trial_index').cumcount()
                dfs.append(mydf.drop('presented_pixel_data',axis=1));
    group_df = pd.concat(dfs)
    group_df.to_csv(dirname+'/all_data.csv', index=False)
    return(group_df)

In [5]:
group_df=to_csv_multiple_files(path.join('..','experiments','Exp1pixels','version2','data','jatos_resultfiles_batch1'))

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame.flatten()),


In [23]:
group_df=to_csv_multiple_files_new_format(path.join('..','experiments','Exp2rows','data','jatos_resultfiles_batch1'))

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame.flatten()),


In [6]:
def get_correlation_with_S_3stim(row):
    if str(row['presented_pixel_data'])=='nan':
        return 0
    else:
        presented_pixel_data = np.array(row['presented_pixel_data'], dtype=float)
        correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
                                            ma.masked_invalid(S.flatten()))[0,1] for frame in presented_pixel_data])
        return correlation_per_frame
    
def get_correlation_with_A_3stim(row):
    if str(row['presented_pixel_data'])=='nan':
        return 0
    else:
        presented_pixel_data = np.array(row['presented_pixel_data'], dtype=float)
        correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
                                            ma.masked_invalid(A.flatten()))[0,1] for frame in presented_pixel_data])
        return correlation_per_frame
    
def get_correlation_with_target_letter_3stim(row):
    if row.target=='S':
        return get_correlation_with_S_3stim(row)
    elif row.target=='A':
        return(get_correlation_with_A_3stim(row))
    
    

def to_csv_multiple_files_new_3stim(dirname):
    dfs=[]
    json_files = pathlib.Path(dirname).glob('**/*.json')
    for i,file in enumerate(json_files):
                mydf = pd.read_json(file)
                mydf['correlation_with_target_letter'] = mydf.apply(get_correlation_with_target_letter_3stim, axis=1)   
                mydf = mydf.explode('correlation_with_target_letter');
                mydf['frame_index'] = mydf.groupby('trial_index').cumcount()
                dfs.append(mydf.drop('presented_pixel_data',axis=1));
    group_df = pd.concat(dfs)
    group_df.to_csv(dirname+'/all_data.csv', index=False)
    return(group_df)

group_df=to_csv_multiple_files_new_3stim(path.join('..','experiments','Exp3reference','data','jatos_results_files_batch1'))

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corr

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corr

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corr

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corr

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corr

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corr

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corr

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corr

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corr

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corr

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corr

  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corrcoef(ma.masked_invalid(frame[1].flatten()),
  correlation_per_frame = np.array([ma.corr