In [1]:
import os
import sys
import scipy
from numba import njit, prange
import numpy as np
import scipy.stats as stats
import bayesflow as bf
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pickle

import ctypes
from numba.extending import get_cython_function_address
from simulators import levy
from sklearn.covariance import EmpiricalCovariance


# Get a pointer to the C function levy.c
addr_levy= get_cython_function_address("levy", "levy_trial")
functype = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double, ctypes.c_double, 
                            ctypes.c_double, ctypes.c_double, ctypes.c_double,
                            ctypes.c_double, ctypes.c_double, ctypes.c_double, 
                            ctypes.c_double, ctypes.c_double, ctypes.c_int)
levy_trial = functype(addr_levy)

# Suppress scientific notation for floats
np.set_printoptions(suppress=True)
RNG = np.random.default_rng(2023)

2024-06-12 09:26:37.631158: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-12 09:26:37.651610: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from tqdm.autonotebook import tqdm


In [2]:
#Settings
# Path to data
PATH = '/home/mischa/Documents/bayesflow/prj_real_life_ddm/data/prepared_data/'

# Where to save files
PATH_TO_SAVE = '/home/mischa/Documents/bayesflow/prj_real_life_ddm/data/pickle_levy/'

# Load neural networks from checkpoint

In [3]:
PARAM_NAMES = [
    r"$v_{1}$",
    r"$v_{2}$",
    r"$a_{1}$",
    r"$a_{2}$",
    r"$\tau_{correct}$",
    r"$\tau_{error}$",
    r"$\alpha$"
]

prior = bf.simulation.Prior(prior_fun=ps.levy_prior_fun, param_names=PARAM_NAMES)

prior_means, prior_stds = prior.estimate_means_and_stds(n_draws=100000)
prior_means = np.round(prior_means, decimals=1)
prior_stds = np.round(prior_stds, decimals=1)

simulator = bf.simulation.Simulator(simulator_fun=ps.levy_simulator_fun, context_generator=context_gen)

model = bf.simulation.GenerativeModel(prior=prior, simulator=simulator, name="Levy")

summary_net = bf.networks.SetTransformer(input_dim=4, summary_dim=30, name="ddm_summary")

inference_net = bf.networks.InvertibleNetwork(
    num_params=len(prior.param_names),
    coupling_settings={"dense_args": dict(kernel_regularizer=None), "dropout": False},
    name="ddm_inference")

amortizer = bf.amortizers.AmortizedPosterior(inference_net, summary_net, name="levy_amortizer",
                                            summary_loss_fun='MMD')

trainer = bf.trainers.Trainer(
    generative_model=model, amortizer=amortizer, configurator= ps.configurator, checkpoint_path="levy_model_net2")

INFO:root:Performing 2 pilot runs with the Levy model...
INFO:root:Shape of parameter batch after 2 pilot simulations: (batch_size = 2, 7)
INFO:root:Shape of simulation batch after 2 pilot simulations: (batch_size = 2, 120, 1)
INFO:root:No optional prior non-batchable context provided.
INFO:root:No optional prior batchable context provided.
INFO:root:No optional simulation non-batchable context provided.
INFO:root:Could not determine shape of simulation batchable context. Type appears to be non-array: <class 'list'>,                                    so make sure your input configurator takes cares of that!
2024-06-12 09:26:44.005560: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-12 09:26:44.019235: I tensorflow/com

In [8]:
#This is where the magic happens

# 1. Store all data-set chunk names in a list
datasets = sorted(os.listdir(PATH))

# 2. For each chunk
for dataset_name in datasets:
    
        # 2.1 Load chunk
        loaded_pickle = pickle.load(open(PATH +str(dataset_name), "rb" ))
        
        X_test = loaded_pickle['data_array']
        y_test = loaded_pickle['outcome_array']
        rt_summaries = loaded_pickle['rt_summaries']
        
        print(str(dataset_name)+ " loaded")


        # 2.2 Estimate chunk

        samples_dm = np.concatenate([amortizer.sample(input_dict = {"summary_conditions": x}, n_samples=3000,
                                                  to_numpy=True) for x in np.array_split(X_test, 50)], axis=0)

        samples_dm = samples_dm * prior_stds + prior_means
        
        # Discard negative samples for positively bounded parameters
        samples_dm[:,:,2:7][samples_dm[:,:,2:7]<0] = np.nan 
        
        # Discard samples > 2 for alpha
        samples_dm[:,:,6][samples_dm[:,:,6]>2] = np.nan
        
        print(str(np.round(np.sum(np.isnan(samples_dm))/
                           (np.sum(np.isnan(samples_dm))+np.sum(~np.isnan(samples_dm))),
                           5)*100) +"% out of prior samples rejected")
        print(str(dataset_name)+ " inference done")

        # 2.3 Compute summaries of parameter posteriors: means, medians, stds, Q0.025, Q0.0975
        
        estimates = ps.compute_summaries(samples_dm)

        # Exclude people with less than 1000 proper prior samples for at least one parameter
        estimates[np.sum((np.sum(np.isnan(samples_dm), axis=1)>4000), axis=1)>0,:] = np.nan 
                
        print(str(np.sum((np.sum(np.isnan(samples_dm), axis=1))>4000)) +
              " people with <1000 proper samples (for at least one parameter) excluded.")

        # 2.4 Get empirical Mahalanobis distances for summary statistics provided by network
            
        summary_statistics_empirical = np.concatenate([trainer.amortizer.summary_net (x)
                                                       for x in np.array_split(X_test, 50)], axis=0)
        
        cov = EmpiricalCovariance().fit(summary_statistics_empirical)

        mahalanobis_empirical = cov.mahalanobis(summary_statistics_empirical)
        
        print(str(dataset_name)+ " Mahalanobis check done")

        
        # 2.5 Store everything together (serialized, pickle.dump) as a dict with keys 
        dict_to_store = {'data_array': X_test, 'est_array': estimates, "outcome_array": y_test,
                        'mahalanobis': mahalanobis_empirical, 'rt_summaries': rt_summaries}
        pickle.dump(dict_to_store,
                    open(PATH_TO_SAVE +"estimates_" +str(dataset_name),"wb"))
        print(str(dataset_name)+ " saving done")

# 3. Celebrate

Initial shape: 5604
After removing no age IDs: 4550
After removing invalid IDs: 4550
After excluding zero latency IDs: 4546
After excluding less than 120 trials IDs: 3367
After excluding more than 50% error IDs: 3363
After excluding zero error IDs: 3276
Successfully read datafile 2003iat.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (3276, 120, 4)
Final shape of outcomes chunk:  (3276, 2)
IDs with less than 12 trials >300 ms: (3249, 120, 2)
In prior-cases: (3249,)
2003iat.txt done
Initial shape: 25354
After removing no age IDs: 4225
After removing invalid IDs: 4225
After excluding zero latency IDs: 4220
After excluding less than 120 trials IDs: 2050
After excluding more than 50% error IDs: 2047
After excluding zero error IDs: 1994
Successfully read datafile 2004iat.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (1994, 120, 4)
Final s

Initial shape: 0
After removing no age IDs: 0
After removing invalid IDs: 0
After excluding zero latency IDs: 0
After excluding less than 120 trials IDs: 0
After excluding more than 50% error IDs: 0
After excluding zero error IDs: 0
Successfully read datafile 2007iat4.txt .
Converting to X and y arrays...
Initial shape: 0
After removing no age IDs: 0
After removing invalid IDs: 0
After excluding zero latency IDs: 0
After excluding less than 120 trials IDs: 0
After excluding more than 50% error IDs: 0
After excluding zero error IDs: 0
Successfully read datafile 2007iat5.txt .
Converting to X and y arrays...
Initial shape: 0
After removing no age IDs: 0
After removing invalid IDs: 0
After excluding zero latency IDs: 0
After excluding less than 120 trials IDs: 0
After excluding more than 50% error IDs: 0
After excluding zero error IDs: 0
Successfully read datafile 2007iat6.txt .
Converting to X and y arrays...
Initial shape: 0
After removing no age IDs: 0
After removing invalid IDs: 0
Aft

Initial shape: 0
After removing no age IDs: 0
After removing invalid IDs: 0
After excluding zero latency IDs: 0
After excluding less than 120 trials IDs: 0
After excluding more than 50% error IDs: 0
After excluding zero error IDs: 0
Successfully read datafile 2011iat3.txt .
Converting to X and y arrays...
Initial shape: 0
After removing no age IDs: 0
After removing invalid IDs: 0
After excluding zero latency IDs: 0
After excluding less than 120 trials IDs: 0
After excluding more than 50% error IDs: 0
After excluding zero error IDs: 0
Successfully read datafile 2011iat4.txt .
Converting to X and y arrays...
Initial shape: 0
After removing no age IDs: 0
After removing invalid IDs: 0
After excluding zero latency IDs: 0
After excluding less than 120 trials IDs: 0
After excluding more than 50% error IDs: 0
After excluding zero error IDs: 0
Successfully read datafile 2011iat5.txt .
Converting to X and y arrays...
Initial shape: 0
After removing no age IDs: 0
After removing invalid IDs: 0
Aft

Initial shape: 0
After removing no age IDs: 0
After removing invalid IDs: 0
After excluding zero latency IDs: 0
After excluding less than 120 trials IDs: 0
After excluding more than 50% error IDs: 0
After excluding zero error IDs: 0
Successfully read datafile 2015MarDeciat5.txt .
Converting to X and y arrays...
Initial shape: 0
After removing no age IDs: 0
After removing invalid IDs: 0
After excluding zero latency IDs: 0
After excluding less than 120 trials IDs: 0
After excluding more than 50% error IDs: 0
After excluding zero error IDs: 0
Successfully read datafile 2015MarDeciat6.txt .
Converting to X and y arrays...
Initial shape: 0
After removing no age IDs: 0
After removing invalid IDs: 0
After excluding zero latency IDs: 0
After excluding less than 120 trials IDs: 0
After excluding more than 50% error IDs: 0
After excluding zero error IDs: 0
Successfully read datafile 2015MarDeciat7.txt .
Converting to X and y arrays...
Initial shape: 0
After removing no age IDs: 0
After removing 

After excluding zero latency IDs: 15840
After excluding less than 120 trials IDs: 15800
After excluding more than 50% error IDs: 15789
After excluding zero error IDs: 14776
Successfully read datafile 2017iat10.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (14776, 120, 4)
Final shape of outcomes chunk:  (14776, 2)
IDs with less than 12 trials >300 ms: (14659, 120, 2)
In prior-cases: (14659,)
2017iat10.txt done
Initial shape: 26049
After removing no age IDs: 16072
After removing invalid IDs: 16072
After excluding zero latency IDs: 15941
After excluding less than 120 trials IDs: 15887
After excluding more than 50% error IDs: 15875
After excluding zero error IDs: 14833
Successfully read datafile 2017iat11.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (14833, 120, 4)
Final shape of outcomes chunk:  (14833, 2)
IDs with less than 12 trials

After excluding zero error IDs: 14587
Successfully read datafile 2018iat10.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (14587, 120, 4)
Final shape of outcomes chunk:  (14587, 2)
IDs with less than 12 trials >300 ms: (14457, 120, 2)
In prior-cases: (14457,)
2018iat10.txt done
Initial shape: 25994
After removing no age IDs: 15741
After removing invalid IDs: 15741
After excluding zero latency IDs: 15635
After excluding less than 120 trials IDs: 15613
After excluding more than 50% error IDs: 15597
After excluding zero error IDs: 14571
Successfully read datafile 2018iat11.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (14571, 120, 4)
Final shape of outcomes chunk:  (14571, 2)
IDs with less than 12 trials >300 ms: (14453, 120, 2)
In prior-cases: (14453,)
2018iat11.txt done
Initial shape: 26006
After removing no age IDs: 14931
After remov

In prior-cases: (15069,)
2019iat11.txt done
Initial shape: 25999
After removing no age IDs: 16224
After removing invalid IDs: 16224
After excluding zero latency IDs: 16047
After excluding less than 120 trials IDs: 16028
After excluding more than 50% error IDs: 15998
After excluding zero error IDs: 14944
Successfully read datafile 2019iat12.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (14944, 120, 4)
Final shape of outcomes chunk:  (14944, 2)
IDs with less than 12 trials >300 ms: (14791, 120, 2)
In prior-cases: (14791,)
2019iat12.txt done
Initial shape: 19932
After removing no age IDs: 12112
After removing invalid IDs: 12112
After excluding zero latency IDs: 12011
After excluding less than 120 trials IDs: 11992
After excluding more than 50% error IDs: 11976
After excluding zero error IDs: 11130
Successfully read datafile 2019iat13.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more

After excluding zero latency IDs: 17669
After excluding less than 120 trials IDs: 17646
After excluding more than 50% error IDs: 17641
After excluding zero error IDs: 15477
Successfully read datafile 2020iat13.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (15477, 120, 4)
Final shape of outcomes chunk:  (15477, 2)
IDs with less than 12 trials >300 ms: (15439, 120, 2)
In prior-cases: (15439,)
2020iat13.txt done
Initial shape: 25739
After removing no age IDs: 17627
After removing invalid IDs: 17627
After excluding zero latency IDs: 17557
After excluding less than 120 trials IDs: 17528
After excluding more than 50% error IDs: 17524
After excluding zero error IDs: 15500
Successfully read datafile 2020iat14.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (15500, 120, 4)
Final shape of outcomes chunk:  (15500, 2)
IDs with less than 12 trials

After excluding zero error IDs: 14745
Successfully read datafile 2020iat3.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (14745, 120, 4)
Final shape of outcomes chunk:  (14745, 2)
IDs with less than 12 trials >300 ms: (14569, 120, 2)
In prior-cases: (14569,)
2020iat3.txt done
Initial shape: 25869
After removing no age IDs: 16441
After removing invalid IDs: 16441
After excluding zero latency IDs: 16276
After excluding less than 120 trials IDs: 16244
After excluding more than 50% error IDs: 16233
After excluding zero error IDs: 15100
Successfully read datafile 2020iat4.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (15100, 120, 4)
Final shape of outcomes chunk:  (15100, 2)
IDs with less than 12 trials >300 ms: (14981, 120, 2)
In prior-cases: (14981,)
2020iat4.txt done
Initial shape: 25740
After removing no age IDs: 16093
After removing 

In prior-cases: (10492,)
2021iat16.txt done
Initial shape: 25802
After removing no age IDs: 11554
After removing invalid IDs: 11554
After excluding zero latency IDs: 11434
After excluding less than 120 trials IDs: 11411
After excluding more than 50% error IDs: 11400
After excluding zero error IDs: 10338
Successfully read datafile 2021iat17.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (10338, 120, 4)
Final shape of outcomes chunk:  (10338, 2)
IDs with less than 12 trials >300 ms: (10272, 120, 2)
In prior-cases: (10272,)
2021iat17.txt done
Initial shape: 25816
After removing no age IDs: 11654
After removing invalid IDs: 11654
After excluding zero latency IDs: 11526
After excluding less than 120 trials IDs: 11516
After excluding more than 50% error IDs: 11509
After excluding zero error IDs: 10472
Successfully read datafile 2021iat18.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more

After excluding zero latency IDs: 21709
After excluding less than 120 trials IDs: 21366
After excluding more than 50% error IDs: 21336
After excluding zero error IDs: 19559
Successfully read datafile 20221iat3.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (19559, 120, 4)
Final shape of outcomes chunk:  (19559, 2)
IDs with less than 12 trials >300 ms: (19402, 120, 2)
In prior-cases: (19402,)
20221iat3.txt done
Initial shape: 26010
After removing no age IDs: 12980
After removing invalid IDs: 12980
After excluding zero latency IDs: 12815
After excluding less than 120 trials IDs: 12597
After excluding more than 50% error IDs: 12583
After excluding zero error IDs: 11581
Successfully read datafile 20221iat4.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (11581, 120, 4)
Final shape of outcomes chunk:  (11581, 2)
IDs with less than 12 trials

After excluding zero error IDs: 22831
Successfully read datafile 20231iat3.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (22831, 120, 4)
Final shape of outcomes chunk:  (22831, 2)
IDs with less than 12 trials >300 ms: (22387, 120, 2)
In prior-cases: (22387,)
20231iat3.txt done
Initial shape: 25662
After removing no age IDs: 25653
After removing invalid IDs: 25653
After excluding zero latency IDs: 25645
After excluding less than 120 trials IDs: 24714
After excluding more than 50% error IDs: 24644
After excluding zero error IDs: 22925
Successfully read datafile 20231iat4.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (22925, 120, 4)
Final shape of outcomes chunk:  (22925, 2)
IDs with less than 12 trials >300 ms: (22492, 120, 2)
In prior-cases: (22492,)
20231iat4.txt done
Initial shape: 25531
After removing no age IDs: 25520
After remov

In prior-cases: (19774,)
2005iat4.txt done
Initial shape: 25255
After removing no age IDs: 19896
After removing invalid IDs: 19896
After excluding zero latency IDs: 19885
After excluding less than 120 trials IDs: 19810
After excluding more than 50% error IDs: 19778
After excluding zero error IDs: 19187
Successfully read datafile 2005iat5.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (19187, 120, 4)
Final shape of outcomes chunk:  (19187, 2)
IDs with less than 12 trials >300 ms: (18974, 120, 2)
In prior-cases: (18974,)
2005iat5.txt done
Initial shape: 903
After removing no age IDs: 753
After removing invalid IDs: 753
After excluding zero latency IDs: 753
After excluding less than 120 trials IDs: 750
After excluding more than 50% error IDs: 750
After excluding zero error IDs: 719
Successfully read datafile 2005iat6.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w

After excluding less than 120 trials IDs: 30500
After excluding more than 50% error IDs: 30476
After excluding zero error IDs: 29577
Successfully read datafile 2007iat7.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (29577, 120, 4)
Final shape of outcomes chunk:  (29577, 2)
IDs with less than 12 trials >300 ms: (29322, 120, 2)
In prior-cases: (29322,)
2007iat7.txt done
Initial shape: 4861
After removing no age IDs: 2553
After removing invalid IDs: 2553
After excluding zero latency IDs: 2552
After excluding less than 120 trials IDs: 2526
After excluding more than 50% error IDs: 2524
After excluding zero error IDs: 2430
Successfully read datafile 2007iat8.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (2430, 120, 4)
Final shape of outcomes chunk:  (2430, 2)
IDs with less than 12 trials >300 ms: (2410, 120, 2)
In prior-cases: (2410,)
200

Successfully read datafile 2009iat7.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (25461, 120, 4)
Final shape of outcomes chunk:  (25461, 2)
IDs with less than 12 trials >300 ms: (25404, 120, 2)
In prior-cases: (25404,)
2009iat7.txt done
Initial shape: 43978
After removing no age IDs: 26850
After removing invalid IDs: 26850
After excluding zero latency IDs: 26719
After excluding less than 120 trials IDs: 26559
After excluding more than 50% error IDs: 26541
After excluding zero error IDs: 25759
Successfully read datafile 2009iat8.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (25759, 120, 4)
Final shape of outcomes chunk:  (25759, 2)
IDs with less than 12 trials >300 ms: (25564, 120, 2)
In prior-cases: (25564,)
2009iat8.txt done
Initial shape: 50663
After removing no age IDs: 32308
After removing invalid IDs: 32308
After excluding zer

In prior-cases: (21215,)
2012iat.txt done
Initial shape: 51088
After removing no age IDs: 29128
After removing invalid IDs: 29128
After excluding zero latency IDs: 29088
After excluding less than 120 trials IDs: 28807
After excluding more than 50% error IDs: 28786
After excluding zero error IDs: 27774
Successfully read datafile 2012iat2.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (27774, 120, 4)
Final shape of outcomes chunk:  (27774, 2)
IDs with less than 12 trials >300 ms: (27471, 120, 2)
In prior-cases: (27471,)
2012iat2.txt done
Initial shape: 50939
After removing no age IDs: 4737
After removing invalid IDs: 4737
After excluding zero latency IDs: 4731
After excluding less than 120 trials IDs: 4696
After excluding more than 50% error IDs: 4686
After excluding zero error IDs: 4529
Successfully read datafile 2012iat3.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding 

After excluding zero latency IDs: 32495
After excluding less than 120 trials IDs: 32225
After excluding more than 50% error IDs: 32195
After excluding zero error IDs: 31189
Successfully read datafile 2014JanOctiat4.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (31189, 120, 4)
Final shape of outcomes chunk:  (31189, 2)
IDs with less than 12 trials >300 ms: (30914, 120, 2)
In prior-cases: (30914,)
2014JanOctiat4.txt done
Initial shape: 8787
After removing no age IDs: 5660
After removing invalid IDs: 5660
After excluding zero latency IDs: 5654
After excluding less than 120 trials IDs: 5617
After excluding more than 50% error IDs: 5612
After excluding zero error IDs: 5431
Successfully read datafile 2014JanOctiat5.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (5431, 120, 4)
Final shape of outcomes chunk:  (5431, 2)
IDs with less than 12 

After removing invalid IDs: 27143
After excluding zero latency IDs: 27088
After excluding less than 120 trials IDs: 26896
After excluding more than 50% error IDs: 26873
After excluding zero error IDs: 25836
Successfully read datafile 2016JanSepiat.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (25836, 120, 4)
Final shape of outcomes chunk:  (25836, 2)
IDs with less than 12 trials >300 ms: (25627, 120, 2)
In prior-cases: (25627,)
2016JanSepiat.txt done
Initial shape: 50741
After removing no age IDs: 27562
After removing invalid IDs: 27562
After excluding zero latency IDs: 27480
After excluding less than 120 trials IDs: 27268
After excluding more than 50% error IDs: 27249
After excluding zero error IDs: 26324
Successfully read datafile 2016JanSepiat2.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (26324, 120, 4)
Final shape of outcomes 

After excluding less than 120 trials IDs: 15613
After excluding more than 50% error IDs: 15607
After excluding zero error IDs: 14534
Successfully read datafile 2017iat.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (14534, 120, 4)
Final shape of outcomes chunk:  (14534, 2)
IDs with less than 12 trials >300 ms: (14412, 120, 2)
In prior-cases: (14412,)
2017iat.txt done
Initial shape: 25478
After removing no age IDs: 15816
After removing invalid IDs: 15816
After excluding zero latency IDs: 15717
After excluding less than 120 trials IDs: 15686
After excluding more than 50% error IDs: 15676
After excluding zero error IDs: 14671
Successfully read datafile 2017iat10.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (14671, 120, 4)
Final shape of outcomes chunk:  (14671, 2)
IDs with less than 12 trials >300 ms: (14548, 120, 2)
In prior-cases: (1

Successfully read datafile 2018iat.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (14469, 120, 4)
Final shape of outcomes chunk:  (14469, 2)
IDs with less than 12 trials >300 ms: (14352, 120, 2)
In prior-cases: (14352,)
2018iat.txt done
Initial shape: 25681
After removing no age IDs: 15580
After removing invalid IDs: 15580
After excluding zero latency IDs: 15452
After excluding less than 120 trials IDs: 15418
After excluding more than 50% error IDs: 15409
After excluding zero error IDs: 14465
Successfully read datafile 2018iat10.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (14465, 120, 4)
Final shape of outcomes chunk:  (14465, 2)
IDs with less than 12 trials >300 ms: (14323, 120, 2)
In prior-cases: (14323,)
2018iat10.txt done
Initial shape: 25513
After removing no age IDs: 15582
After removing invalid IDs: 15582
After excluding zer

In prior-cases: (15162,)
2019iat10.txt done
Initial shape: 25545
After removing no age IDs: 16183
After removing invalid IDs: 16183
After excluding zero latency IDs: 16055
After excluding less than 120 trials IDs: 16029
After excluding more than 50% error IDs: 16008
After excluding zero error IDs: 14973
Successfully read datafile 2019iat11.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (14973, 120, 4)
Final shape of outcomes chunk:  (14973, 2)
IDs with less than 12 trials >300 ms: (14828, 120, 2)
In prior-cases: (14828,)
2019iat11.txt done
Initial shape: 25572
After removing no age IDs: 15893
After removing invalid IDs: 15893
After excluding zero latency IDs: 15735
After excluding less than 120 trials IDs: 15706
After excluding more than 50% error IDs: 15678
After excluding zero error IDs: 14726
Successfully read datafile 2019iat12.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more

After excluding zero latency IDs: 17540
After excluding less than 120 trials IDs: 17514
After excluding more than 50% error IDs: 17509
After excluding zero error IDs: 15504
Successfully read datafile 2020iat12.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (15504, 120, 4)
Final shape of outcomes chunk:  (15504, 2)
IDs with less than 12 trials >300 ms: (15481, 120, 2)
In prior-cases: (15481,)
2020iat12.txt done
Initial shape: 25719
After removing no age IDs: 17748
After removing invalid IDs: 17748
After excluding zero latency IDs: 17684
After excluding less than 120 trials IDs: 17658
After excluding more than 50% error IDs: 17653
After excluding zero error IDs: 15642
Successfully read datafile 2020iat13.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (15642, 120, 4)
Final shape of outcomes chunk:  (15642, 2)
IDs with less than 12 trials

Successfully read datafile 2020iat25.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (7513, 120, 4)
Final shape of outcomes chunk:  (7513, 2)
IDs with less than 12 trials >300 ms: (7476, 120, 2)
In prior-cases: (7476,)
2020iat25.txt done
Initial shape: 25578
After removing no age IDs: 15673
After removing invalid IDs: 15673
After excluding zero latency IDs: 15522
After excluding less than 120 trials IDs: 15491
After excluding more than 50% error IDs: 15473
After excluding zero error IDs: 14511
Successfully read datafile 2020iat3.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (14511, 120, 4)
Final shape of outcomes chunk:  (14511, 2)
IDs with less than 12 trials >300 ms: (14299, 120, 2)
In prior-cases: (14299,)
2020iat3.txt done
Initial shape: 25571
After removing no age IDs: 16529
After removing invalid IDs: 16529
After excluding zero 

In prior-cases: (10458,)
2021iat15.txt done
Initial shape: 25648
After removing no age IDs: 11647
After removing invalid IDs: 11647
After excluding zero latency IDs: 11519
After excluding less than 120 trials IDs: 11494
After excluding more than 50% error IDs: 11487
After excluding zero error IDs: 10556
Successfully read datafile 2021iat16.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (10556, 120, 4)
Final shape of outcomes chunk:  (10556, 2)
IDs with less than 12 trials >300 ms: (10472, 120, 2)
In prior-cases: (10472,)
2021iat16.txt done
Initial shape: 25754
After removing no age IDs: 11603
After removing invalid IDs: 11603
After excluding zero latency IDs: 11466
After excluding less than 120 trials IDs: 11462
After excluding more than 50% error IDs: 11452
After excluding zero error IDs: 10514
Successfully read datafile 2021iat17.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more

After excluding zero latency IDs: 11610
After excluding less than 120 trials IDs: 11593
After excluding more than 50% error IDs: 11579
After excluding zero error IDs: 10632
Successfully read datafile 20221iat2.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (10632, 120, 4)
Final shape of outcomes chunk:  (10632, 2)
IDs with less than 12 trials >300 ms: (10559, 120, 2)
In prior-cases: (10559,)
20221iat2.txt done
Initial shape: 25567
After removing no age IDs: 21546
After removing invalid IDs: 21546
After excluding zero latency IDs: 21282
After excluding less than 120 trials IDs: 20981
After excluding more than 50% error IDs: 20956
After excluding zero error IDs: 19309
Successfully read datafile 20221iat3.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (19309, 120, 4)
Final shape of outcomes chunk:  (19309, 2)
IDs with less than 12 trials

After excluding zero error IDs: 22750
Successfully read datafile 20231iat2.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (22750, 120, 4)
Final shape of outcomes chunk:  (22750, 2)
IDs with less than 12 trials >300 ms: (22300, 120, 2)
In prior-cases: (22300,)
20231iat2.txt done
Initial shape: 25795
After removing no age IDs: 25785
After removing invalid IDs: 25785
After excluding zero latency IDs: 25768
After excluding less than 120 trials IDs: 24855
After excluding more than 50% error IDs: 24763
After excluding zero error IDs: 23034
Successfully read datafile 20231iat3.txt .
Converting to X and y arrays...
Final shape of RTs chunk (after once more excluding cases w/ missing age or trials):  (23034, 120, 4)
Final shape of outcomes chunk:  (23034, 2)
IDs with less than 12 trials >300 ms: (22540, 120, 2)
In prior-cases: (22540,)
20231iat3.txt done
Initial shape: 25795
After removing no age IDs: 25789
After remov

In [None]:
df = pd.DataFrame()
datasets = os.listdir(PATH_TO_SAVE)

for dataset in datasets:
        pickles = pickle.load(open(PATH_TO_SAVE +str(dataset), "rb" ))
        df_oneset = np.concatenate((pickles['est_array'],pickles['outcome_array'],pickles['rt_summaries'],
                                   np.expand_dims(pickles['mahalanobis'], axis=1)), axis=1)
        df = pd.concat([df, pd.DataFrame(data = df_oneset)])
        df["dataset"] = str(dataset)      
        
        print(str(dataset)+" done")

df.columns = ["v_congruent", "v_incongruent", "a_congruent", "a_incongruent",
                 "tplus", "tminus", "alpha",
                "v_congruent_median", "v_incongruent_median", "a_congruent_median", "a_incongruent_median",
                 "tplus_median","tminus_median", "alpha_median",
                "v_congruent_std", "v_incongruent_std", "a_congruent_std", "a_incongruent_std",
                 "tplus_std","tminus_std", "alpha_std", 
                "v_congruent_q025", "v_incongruent_q025", "a_congruent_q025", "a_incongruent_q025",
                 "tplus_q025","tminus_q025", "alpha_q025", 
                "v_congruent_q975", "v_incongruent_q975", "a_congruent_q975", "a_incongruent_q975",
                 "tplus_q975","tminus_q975", "alpha_q975", 
              
                  "session_id", "age",
              
                 "congruent_rt_correct", "congruent_rt_error", "congruent_accuracy",
                "incongruent_rt_correct", "incongruent_rt_error","incongruent_accuracy",
                "word_rt_correct","word_rt_error","word_accuracy",
                "picture_rt_correct","picture_rt_error","picture_accuracy",
              
                 "mahalanobis_distance", "dataset"]

df.to_csv("df_levy.csv", index=False)