In [1]:
import os
import pickle 
import numpy as np
import matplotlib.pyplot as plt
import tensorflow_probability as tfp
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm.notebook import tqdm
import seaborn as sns
import pandas as pd
from numba import njit
from scipy.stats import skew
from scipy.stats import binom
from functools import partial
np.set_printoptions(suppress=True)

# Import our bayesflow lib
from deep_bayes.models import BayesFlow, InvariantNetwork
from deep_bayes.training import train_online
from deep_bayes.losses import maximum_likelihood_loss
from deep_bayes.viz import plot_true_est_scatter, plot_true_est_posterior
import deep_bayes.diagnostics as diag

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
import tensorflow as tf
if tf.__version__.startswith('1'):
    tf.enable_eager_execution()

In [3]:
%matplotlib inline

In [4]:
@njit
def diffusion_trial(v, a, zr, ndt_c, ndt_w, dt, max_steps):
    """Simulates a trial from the diffusion model."""

    n_steps = 0.
    x = a * zr

    # Simulate a single DM path
    while (x > 0 and x < a and n_steps < max_steps):

        # DDM equation
        x += v*dt + np.sqrt(dt) * np.random.normal()

        # Increment step
        n_steps += 1.0

    rt = n_steps * dt
    return rt + ndt_c if x > 0. else -rt - ndt_w

@njit
def simulate_diffusion_condition(n_trials, v, a, zr, ndt_c, ndt_w, dt=0.005, max_steps=10000):
    """Simulates a diffusion process over an entire condition."""
    
    x = np.empty(n_trials)
    for i in range(n_trials):
        x[i] = diffusion_trial(v, a, zr, ndt_c, ndt_w, dt, max_steps)
    return x


@njit
def simulate_diffusion_2_conds(theta, n_trials, dt=0.005, max_steps=10000):
    """Simulates a diffusion process for 2 conditions with 7 parameters (v1, v2, a1, a2, ndt1, ndt2, zr=0.5)"""
    
    n_trials_c1 = n_trials[0]
    n_trials_c2 = n_trials[1]
    
    
    v1, v2, a1, a2, ndt_c, ndt_w = theta
    rt_c1 = simulate_diffusion_condition(n_trials_c1, v1, a1, 0.5, ndt_c, ndt_w, dt, max_steps)
    rt_c2 = simulate_diffusion_condition(n_trials_c2, v2, a2, 0.5, ndt_c, ndt_w, dt, max_steps)
    rts = np.concatenate((rt_c1, rt_c2))
    return rts

def data_generator(batch_size, n_obs=None, to_tensor=True, n_obs_min=25, n_obs_max=120):
    """
    Runs the forward model 'batch_size' times by first sampling fromt the prior
    theta ~ p(theta) and running x ~ p(x|theta) with the specified n_obs. If 
    n_obs is None, random number of trials for each condition are generated.
    ----------
    
    Arguments:
    batch_size : int -- the number of samples to draw from the prior
    n_obs      : tuple (int, int) or None -- the numebr of observations to draw from p(x|theta)
                                  for each condition
    n_obs_min  : int -- the minimum number of observations per condition
    n_obs_max  : int -- the maximum number of observations per condition
    to_tensor  : boolean -- converts theta and x to tensors if True
    ----------
    
    Output:
    theta : tf.Tensor or np.ndarray of shape (batch_size, theta_dim) - the data gen parameters 
    x     : tf.Tensor of np.ndarray of shape (batch_size, n_obs, x_dim)  - the generated data
    """
    
    # Sample from prior
    # theta is a np.array of shape (batch_size, theta_dim)
    theta = prior(batch_size)
    
    if n_obs is None:
        n_obs = np.random.randint(n_obs_min, n_obs_max+1, 2)
    
    # Generate data
    # x is a np.ndarray of shape (batch_size x n_obs, x_dim)
    x = np.apply_along_axis(simulate_diffusion_2_conds, axis=1, arr=theta, n_trials=n_obs)
    
    # Assign conditions
    cond_arr = np.stack( batch_size * [np.concatenate((np.zeros(n_obs[0]), np.ones(n_obs[1])))] )
    x = np.stack((x, cond_arr), axis=-1)
    
    # Convert to tensor, if specified 
    if to_tensor:
        theta = tf.convert_to_tensor(theta, dtype=tf.float32)
        x = tf.convert_to_tensor(x, dtype=tf.float32)
    return {'theta': theta, 'x': x}

# Summary Network Structure
Here, we will define the basic outline of a permutation-invariant neural network which maps raw reaction times data to outcomes.
<br>
See https://arxiv.org/pdf/1901.06082.pdf (p.28) for more details.

In [5]:
class InvariantModule(tf.keras.Model):
    """Implements an invariant nn module as proposed by Bloem-Reddy and Teh (2019)."""

    def __init__(self, meta, pooler=tf.reduce_mean):
        """
        Creates an invariant function with mean pooling.
        ----------

        Arguments:
        meta : dict -- a dictionary with hyperparameter name - values
        """

        super(InvariantModule, self).__init__()


        self.module = tf.keras.Sequential([
            tf.keras.layers.Dense(**meta['dense_inv_args'])
            for _ in range(meta['n_dense_inv'])
        ])
        
        self.pooler = pooler
            

        self.post_pooling_dense = tf.keras.Sequential([
            tf.keras.layers.Dense(**meta['dense_inv_args'])
            for _ in range(meta['n_dense_inv'])
        ])

    def call(self, x):
        """
        Transofrms the input into an invariant representation.
        ----------

        Arguments:
        x : tf.Tensor of shape (batch_size, n, m) - the input where n is the 'time' or 'samples' dimensions
            over which pooling is performed and m is the input dimensionality
        ----------

        Returns:
        out : tf.Tensor of shape (batch_size, h_dim) -- the pooled and invariant representation of the input
        """

        # Embed
        x_emb = self.module(x)

        # Pool representation
        pooled = self.pooler(x_emb, axis=1)
    
        # Increase representational power
        out = self.post_pooling_dense(pooled)
        return out


class EquivariantModule(tf.keras.Model):
    """Implements an equivariant nn module as proposed by Bloem-Reddy and Teh (2019)."""

    def __init__(self, meta):
        """
        Creates an equivariant neural network consisting of a FC network with
        equal number of hidden units in each layer and an invariant module
        with the same FC structure.
        ----------

        Arguments:
        meta : dict -- a dictionary with hyperparameter name - values
        """

        super(EquivariantModule, self).__init__()

        self.module = tf.keras.Sequential([
            tf.keras.layers.Dense(**meta['dense_equiv_args'])
            for _ in range(meta['n_dense_equiv'])
        ])

        self.invariant_module = InvariantModule(meta)

    def call(self, x):
        """
        Transofrms the input into an equivariant representation.
        ----------

        Arguments:
        x : tf.Tensor of shape (batch_size, n, m) - the input where n is the 'time' or 'samples' dimensions
            over which pooling is performed and m is the input dimensionality
        ----------

        Returns:
        out : tf.Tensor of shape (batch_size, h_dim) -- the pooled and invariant representation of the input
        """

        x_inv = self.invariant_module(x)
        x_inv = tf.stack([x_inv] * int(x.shape[1]), axis=1) # Repeat x_inv n times
        x = tf.concat((x_inv, x), axis=-1)
        out = self.module(x)
        return out


class InvariantNetwork(tf.keras.Model):
    """
    Implements a network which parameterizes a
    permutationally invariant function according to Bloem-Reddy and Teh (2019).
    """

    def __init__(self, meta):
        """
        Creates a permutationally invariant network
        consisting of two equivariant modules and one invariant module.
        ----------

        Arguments:
        meta : dict -- hyperparameter settings for the equivariant and invariant modules
        """

        super(InvariantNetwork, self).__init__()

        self.equiv = tf.keras.Sequential([
            EquivariantModule(meta)
            for _ in range(meta['n_equiv'])
        ])
        self.inv = InvariantModule(meta)

    def call(self, x, **kwargs):
        """
        Transofrms the input into a permutationally invariant
        representation by first passing it through multiple equivariant
        modules in order to increase representational power.
        ----------

        Arguments:
        x : tf.Tensor of shape (batch_size, n, m) - the input where n is the
        'samples' dimensions over which pooling is performed and m is the input dimensionality
        ----------

        Returns:
        out : tf.Tensor of shape (batch_size, h_dim) -- the pooled and invariant representation of the input
        """

        x = self.equiv(x)
        out = self.inv(x)
        return out

# Hyperparameter settings and model definition

In [6]:
# Neural network structure
summary_meta = {
    'dense_inv_args'   :  dict(units=64, activation='elu', kernel_initializer='glorot_normal'),
    'dense_equiv_args' :  dict(units=64, activation='elu', kernel_initializer='glorot_normal'),
    'dense_post_args'  :  dict(units=64, activation='elu', kernel_initializer='glorot_normal'),
    'n_equiv'          :  2,
    'n_dense_inv'      :  3,
    'n_dense_equiv'    :  3,
}

# Network hyperparameters
inv_meta = {
    'n_units': [128, 128, 128],
    'activation': 'elu',
    'w_decay': 0.00000,
    'initializer': 'glorot_uniform'
}
n_inv_blocks = 4

# Forward model hyperparameters
param_names = [r'$v_1$', r'$v_2$', r'$a_1$', r'$a_2$', r'$\tau_{c}$', r'$\tau_{w}$']
theta_dim = len(param_names)
n_test = 300
n_obs_max = 60
n_obs_min = 60
n_obs_test = (60, 60)

# Utility for online learning
data_gen = partial(data_generator, n_obs_min=n_obs_min, n_obs_max=n_obs_max)


# Training and optimizer hyperparameters
ckpt_file = "iat_bayesflow"
batch_size = 64
epochs = 50
iterations_per_epoch = 1000
n_samples_posterior = 2000
clip_value = 5.

learning_rate = 0.001
if tf.__version__.startswith('1'):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
else:
    optimizer = tf.keras.optimizers.Adam(lr=learning_rate)               

In [7]:
summary_net = InvariantNetwork(summary_meta)
model = BayesFlow(inv_meta, n_inv_blocks, theta_dim, summary_net=summary_net, permute=True)

# Checkpoint manager
Used for saving/loading the model.

In [8]:
checkpoint = tf.train.Checkpoint(optimizer=optimizer, net=model)
manager = tf.train.CheckpointManager(checkpoint, 'C:/Users/User1/Downloads/DataSizeMatters-main/DataSizeMatters-main/iat_bayesflow', max_to_keep=5)
checkpoint.restore(manager.latest_checkpoint)
if manager.latest_checkpoint:
    print("Restored from {}".format(manager.latest_checkpoint))
else:
    print("Initializing from scratch.")

Restored from C:/Users/User1/Downloads/DataSizeMatters-main/DataSizeMatters-main/iat_bayesflow\ckpt-final


# Inference on all real data

In [9]:
def read_iat_rt_file(file_to_read, black_not_african):
    """
    Reads and preprocesses a single IAT file.
    """
    
    if black_not_african:
        pairs_to_retain = ['Black people/Bad,White people/Good', 
                            'White people/Bad,Black people/Good', 
                            'White people/Good,Black people/Bad', 
                            'Black people/Good,White people/Bad']
    else:
        pairs_to_retain =['African Americans/Bad,European Americans/Good', 
                            'European Americans/Bad,African Americans/Good', 
                            'European Americans/Good,African Americans/Bad', 
                            'African Americans/Good,European Americans/Bad']


    # We only need certain columsn from the data, so we can indicate pandas to only read
    # a subset of them and save memory. The problem is, that the data has been inappropriately
    # saved and when the header is read, there are whitespaces in the data, so we shall assume
    # that all relevant columns have the same order in all datafiles
    cols_to_use_str = ['block_number', 'block_pairing_definition', 'trial_latency', 'trial_error', 'session_id']
    cols_to_use_idx = [0, 3, 10, 11, 12]
    valid_values_error = [0, 1, '1', '0'] # Use for some datasets containing invalid values
    
    df = pd.read_csv(PATH + file_to_read, delimiter='\t', usecols=cols_to_use_idx)

    # Fix absolutely inadequate data handling in project implicit
    # There are whitespaces in the columns, to we need to get rid of them
    df.columns = [c.strip() for c in df.columns]
    assert all([c1 == c2 for c1, c2 in zip(df.columns, cols_to_use_str)]), 'Expected column order does not match!'
 

    # Retain only relevant pairs
    df = df[df['block_pairing_definition'].isin(pairs_to_retain)]
    
    # Remove invalid ids
    df = df[df['trial_error'].isin(valid_values_error)]
    
    # Ensure correct types
    df = df.astype({"block_number": int, 
                    "block_pairing_definition": str,
                    "trial_latency": int,
                    "trial_error": int})

    # Remove sessions with 0 latency
    zero_latency_id = df[df['trial_latency']==0]['session_id'].to_list()
    df = df[~df['session_id'].isin(zero_latency_id)]

    # Remove sessions with 0 errors
    zero_errors_id = df.groupby('session_id').sum()
    zero_errors_id = zero_errors_id[zero_errors_id['trial_error'] == 0].index.to_list()
    df = df[~df['session_id'].isin(zero_errors_id)]

    # Convert RTs in ms
    df.loc[:, 'trial_latency'] = df['trial_latency'] / 1000

    # Code compatible vs incompatible
    # changes for different types of stimuli! (black/white vs african american/european american)
    if black_not_african:
        df['trial_compatible'] = np.where((df['block_pairing_definition'] == "Black people/Bad,White people/Good") | 
                                      (df['block_pairing_definition'] == 'White people/Good,Black people/Bad'), 1, 0)
    else:
        df['trial_compatible'] = np.where((df['block_pairing_definition'] == "African Americans/Bad,European Americans/Good") | 
                                      (df['block_pairing_definition'] == 'European Americans/Good,African Americans/Bad'), 1, 0)

    
    # Sort dataframe according to session id
    df = df.sort_values('session_id')
    
    # Convert rt dataset to dictionary (session_id - np.array of rt/errors/comopatible)
    df_rt_dict = df.groupby('session_id')['trial_latency'].apply(np.array).to_dict()
    df_err_dict = df.groupby('session_id')['trial_error'].apply(np.array).to_dict()
    df_comp_dict = df.groupby('session_id')['trial_compatible'].apply(np.array).to_dict()
    df_block_dict = df.groupby('session_id')['block_number'].apply(np.array).to_dict()
    
    rts_data = {}
    for k1, k2, k3, k4 in zip(df_rt_dict, df_err_dict, df_comp_dict, df_block_dict):
        assert k1 == k2 == k3 == k4, 'Key mismatch within the same dataset, something went wrong!'
        rts_data[k1] = np.c_[df_rt_dict[k1], df_err_dict[k1], df_comp_dict[k1], df_block_dict[k1]]
    return rts_data

In [10]:
def prepare_data(dataset_name, black_not_african):
    """
    Preprocesses a single IAT file and combines with outcomes.
    """    
    # Prepare a list for current chunk data/outcomes
    # Since we don't know how many will survive the next step
    # we initialize lists which we late convert to np.arrays

    X_arr = []
    y_arr = []
    
    # Read in file
    data_dict = read_iat_rt_file(dataset_name, black_not_african)
    print('Successfully read datafile', dataset_name, '.')
    print('Converting to X and y arrays...')
    
    # Loop through sessions and get those RT arrays for which we have outcomes
    for key in data_dict.keys():
        outcome = outcomes_dict.get(key)
        
        # Add only if outcome for the dataset is present
        if outcome is not None:
            
            # Add only if 120 trials and 2 outcomes present
            if data_dict[key].shape == (120, 4) and outcomes_dict[key].shape == (2,):
                X_arr.append(data_dict[key])
                y_arr.append(outcomes_dict[key])
            
    # X becomes a 3D array (N_datasets x 120 x 4)
    # y becomes a 2D array (N_datasets x 2)
    X_arr = np.stack(X_arr)
    y_arr = np.stack(y_arr)

    print('Final shape of RTs chunk: ', X_arr.shape)
    print('Final shape of outcomes chunk: ', y_arr.shape)
    
    return X_arr, y_arr


In [11]:
def data_cleaning_300 (X_test, y_test):
    """
    Get data in correct format and remove cases with more than 12 trials < 300 ms
    """        
    # Get X_test into the correct format:
    rts = np.where(X_test[:, :, 1], -X_test[:, :, 0], X_test[:, :, 0])
    comps = X_test[:, :, 2]
    X_test = np.stack((rts, comps), axis=2)

    # Exclusion criterion (< 0.3)
    idx_300 = (np.abs(X_test[:, :, 0]) < 0.3).sum(axis=1) <= 12
    X_test = X_test[idx_300, :, :]

    # Keep only corresponding y
    y_test = y_test[idx_300, :]
    y_test = np.int64(y_test)
    
    # Set trials with latency <300 ms or >10 seconds to 0
    X_test[:,:,0][np.abs(X_test[:,:,0]) < .3] = 0
    X_test[:,:,0][np.abs(X_test[:,:,0]) > 10] = 0
    X_test = np.float32(X_test)

    return X_test, y_test


In [12]:
def compute_summaries(samples_dm):
    """
    Compute summary statistics and correlations
    """          
    param_means = samples_dm.mean(axis=0)
    param_medians = np.median(samples_dm,axis=0)
    param_stds = np.std(samples_dm,axis=0)
    param_q025 = np.quantile(samples_dm, .025, axis=0)  
    param_q975 = np.quantile(samples_dm, .975, axis=0)  
    
    corr = tfp.stats.correlation(samples_dm, sample_axis=0, event_axis=2).numpy()
    corr_array = np.concatenate((
        corr[:,0,1:6],corr[:,1,2:6], corr[:,2,3:6], corr[:,3,4:6], corr[:,4,5:6]), axis=1)
    
    estimates = np.concatenate((param_means, param_medians, param_stds,
                                param_q025, param_q975, corr_array), axis=1) 
    
    return estimates

In [13]:
def post_processing(X_test, estimates, param_means, y_test):
    """
    Delete datasets with out-of-prior parameter estimates (from data and estimates)
    """
    good_cases = ((param_means[:,0]>.1) & (param_means[:,0]<7.) & #v1 between .1 and 7
                  (param_means[:,1]>.1) & (param_means[:,1]<7.) & #v2 between .1 and 7
                  (param_means[:,2]>.1) & (param_means[:,2]<4.) & #a1 between .1 and 4
                  (param_means[:,3]>.1) & (param_means[:,3]<4.) & #v2 between .1 and 4
                  (param_means[:,4]>.1) & (param_means[:,4]<3.) & #tplus between .1 and 3
                  (param_means[:,5]>.1) & (param_means[:,5]<7.))  #tminus between .1 and 7

    
    estimates_clean = estimates[good_cases,:]
    data_chunk_clean = X_test[good_cases,:,:]
    y_clean = y_test[good_cases,:]
    
    return data_chunk_clean, estimates_clean, y_clean


In [14]:
#Settings
# Path to data
PATH = 'D:/iat/data/'
# Path to outcomes
PATH_OUTCOME = 'D:/iat/outcomes/'

# where to save files
PATH_TO_SAVE = 'D:/iat/pickle/'

# Choose type of stimulus to process (Black/African American)
black_not_african = True

In [15]:
# Read in outcomes, drop missing, sort
outcomes = pd.read_csv(PATH_OUTCOME + 'outcomes_with_id_161718.csv', header=0, na_values='NA')
outcomes = outcomes.dropna()
outcomes = outcomes.sort_values('session_id')

# Convert outcomes to a dictionary for instant lookup; use only cases containing age and session id
outcomes_id2_dict = outcomes.groupby('session_id')['id2'].apply(int).to_dict()
outcomes_age_dict = outcomes.groupby('session_id')['age'].apply(int).to_dict()


outcomes_dict = {}
for k1, k2 in zip(outcomes_id2_dict, outcomes_age_dict):
    assert k1 == k2 
    outcomes_dict[k1] = np.array([outcomes_id2_dict[k1], 
                                  outcomes_age_dict[k2]])
    
# Note, that outcomes dict will be a global dictionary

In [16]:
#This is where the magic happens

# 1. Store all data-set chunk names in a list
datasets = os.listdir(PATH)

# 2. For each chunk
for dataset_name in datasets:
    
    # 2.1 Load chunk
    X_test, y_test = prepare_data(dataset_name, black_not_african)
 
    # 2.2 Preprocess chunk 
    # 2.2.1 Apply IAT exclusion criteria (conservative)
    # 2.2.2 Format data for NN, negative coding
    # 2.2.3 Add 0s for <0.3, and >10
    X_test, y_test = data_cleaning_300(X_test, y_test)
      
    # 2.3 Estimate chunk
    samples_dm = np.concatenate([model.sample(x, n_samples=2000,
                                              to_numpy=True) for x in np.array_split(X_test, 20)], axis=1)
                
    # 2.4 Compute summaries: means, medians, stds, Q0.025, Q0.0975, post_corr
    estimates = compute_summaries(samples_dm)
    param_means = estimates[:,0:6]

    # 2.5 Post-processing 
        # 2.5.1 find inices of implausible (out of prior) parameter means
        # 2.5.2 remove from estimates
        # 2.5.3 remove from datasets
    data_chunk_clean, estimates_clean, y_clean = post_processing(X_test, estimates, param_means, y_test)
      
    # 2.6 Store everything together (serialized, pickle.dump) as a dict with keys 
    dict_to_store = {'data_array': data_chunk_clean, 'est_array': estimates_clean, "outcome_array": y_clean}
    pickle.dump(dict_to_store,
                open(PATH_TO_SAVE +str(black_not_african)+ str(dataset_name.replace('.txt', '.p') ),"wb"))
    print(str(dataset_name)+ " done")

# 3. Celebrate

  mask |= (ar1 == a)


Successfully read datafile 2018iat10.txt .
Converting to X and y arrays...
Final shape of RTs chunk:  (20553, 120, 4)
Final shape of outcomes chunk:  (20553, 2)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
This op will be removed after the deprecation date. Please switch to tf.sets.difference().
2018iat10.txt done


  mask |= (ar1 == a)


Successfully read datafile 2018iat11.txt .
Converting to X and y arrays...
Final shape of RTs chunk:  (20863, 120, 4)
Final shape of outcomes chunk:  (20863, 2)
2018iat11.txt done


  mask |= (ar1 == a)


Successfully read datafile 2018iat12.txt .
Converting to X and y arrays...
Final shape of RTs chunk:  (20313, 120, 4)
Final shape of outcomes chunk:  (20313, 2)
2018iat12.txt done
Successfully read datafile 2018iat13.txt .
Converting to X and y arrays...
Final shape of RTs chunk:  (6171, 120, 4)
Final shape of outcomes chunk:  (6171, 2)
2018iat13.txt done


  mask |= (ar1 == a)


Successfully read datafile 2018iat2.txt .
Converting to X and y arrays...
Final shape of RTs chunk:  (20688, 120, 4)
Final shape of outcomes chunk:  (20688, 2)
2018iat2.txt done


  mask |= (ar1 == a)


Successfully read datafile 2018iat3.txt .
Converting to X and y arrays...
Final shape of RTs chunk:  (20063, 120, 4)
Final shape of outcomes chunk:  (20063, 2)
2018iat3.txt done


  mask |= (ar1 == a)


Successfully read datafile 2018iat4.txt .
Converting to X and y arrays...
Final shape of RTs chunk:  (20128, 120, 4)
Final shape of outcomes chunk:  (20128, 2)
2018iat4.txt done


  mask |= (ar1 == a)


Successfully read datafile 2018iat5.txt .
Converting to X and y arrays...
Final shape of RTs chunk:  (20085, 120, 4)
Final shape of outcomes chunk:  (20085, 2)
2018iat5.txt done


  mask |= (ar1 == a)


Successfully read datafile 2018iat6.txt .
Converting to X and y arrays...
Final shape of RTs chunk:  (19729, 120, 4)
Final shape of outcomes chunk:  (19729, 2)
2018iat6.txt done


  mask |= (ar1 == a)


Successfully read datafile 2018iat7.txt .
Converting to X and y arrays...
Final shape of RTs chunk:  (20168, 120, 4)
Final shape of outcomes chunk:  (20168, 2)
2018iat7.txt done


  mask |= (ar1 == a)


Successfully read datafile 2018iat8.txt .
Converting to X and y arrays...
Final shape of RTs chunk:  (20558, 120, 4)
Final shape of outcomes chunk:  (20558, 2)
2018iat8.txt done


  mask |= (ar1 == a)


Successfully read datafile 2018iat9.txt .
Converting to X and y arrays...
Final shape of RTs chunk:  (20347, 120, 4)
Final shape of outcomes chunk:  (20347, 2)
2018iat9.txt done
