# Simulations

* Runs simulations
* Creates dataframes that support the figures.

In [1]:
import os
import os.path as op
import numpy as np
from numpy.random import SeedSequence, default_rng
import pandas as pd
import utils
import ipyparallel as ipp

____________
## Set up

Parallelization

In [2]:
cluster = ipp.Cluster(n=4)
cluster.start_cluster_sync()

Using existing profile dir: '/Users/sfavila/.ipython/profile_default'
Starting 4 engines with <class 'ipyparallel.cluster.launcher.LocalEngineSetLauncher'>


In [3]:
rc = cluster.connect_client_sync()
rc.wait_for_engines(4); rc.ids

100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:07<00:00,  1.76s/engine]


[0, 1, 2, 3]

In [4]:
dv = rc[:]
v = rc.load_balanced_view()

Assign project paths and variables

In [5]:
data_dir = op.join('..', 'data')
df_dir = op.join(data_dir, 'dataframes')

In [6]:
subjects, rois, tasks = utils.default_data()

Assign random seed

In [7]:
entropy = 157245829812966997872450835235695796168 
ss = SeedSequence(entropy, n_children_spawned=500)

------------------------
## Data Loading 

#### Load stim assignment

In [8]:
stim_info = []
for s_i, subj in enumerate(subjects):
    stim_file = op.join(data_dir,  subj, 'behav', 'stim_info.csv')
    s = pd.read_csv(stim_file).sort_values(by='stim_id').set_index('stim_id')
    stim_info.append(s)

#### Load vertex data 

In [9]:
vert_data = pd.read_csv(op.join(df_dir, 'vertex_data.csv.gz'))

# Reduce to essential columns
core_cols = ['subj', 'hemi', 'roi', 'vert', 'task', 'stim_id', 
             'stim_angle_brain', 'full-angle', 'ang_dist_bin', 'eccen_bin', 'beta', 'se']
vert_data = vert_data[core_cols]  

____________________________
## Simulate spatial response profiles under different noise assumptions

In [10]:
dv.push({"vert_data":vert_data, "subjects":subjects, "n_sims":100, "stim_ids":stim_info[0].index.values})

<AsyncResult: _push>

### 1. SNR Simulation

In [11]:
def snr_simulation(stim_group):
        
    import numpy
    import pandas
        
    (rng, ((subj, roi, stim), stim_df)) = stim_group
        
    # Rename beta in our df
    stim_df = stim_df.rename(columns={'beta':'beta_true'})
    
    # Get the median beta and standard error for each voxel during perception and memory
    med_p = stim_df.query("task=='perception'")['beta_true'].values
    med_m = stim_df.query("task=='memory'")['beta_true'].values
    se_p = stim_df.query("task=='perception'")['se'].values
    se_m = stim_df.query("task=='memory'")['se'].values
    
    # Calculate snr 
    snr_p = numpy.abs(med_p) / se_p
    snr_m = numpy.abs(med_m) / se_m
    
    # Calculate the ratio of perception to memory snr
    snr_ratio = snr_p / snr_m 
    
    # Scale up each voxels noise
    if noise_factor == 0:
        se_new = se_p * 1
    else:
        se_new = noise_factor * ((med_p * se_m) / med_m)
    
    # New snr ratio
    snr_p_new = numpy.abs(med_p) / se_new
    snr_ratio_new = snr_p_new / snr_m
    
    # Generate datasets with new parameter estimates drawn from noiser distribution
    sim_betas = []
    for i in numpy.arange(n_sims): 
        c = rng.normal(0, 1)  
        s_i = se_new * c #multiple all ses by a common scale factor to make amount of noise in each vox correlated
        n = [m+s for (m,s) in zip(med_p, s_i)] #add noise values to medians to get parameters for this dataset
        sim_betas.append(n)
    sim_betas = numpy.vstack(sim_betas).T
    
    # Create new dataframe with all simulations
    sim_df  = pandas.DataFrame(sim_betas, columns=numpy.arange(n_sims))
    info_df = stim_df.query("task=='perception'").reset_index(drop=True)
    info_df = info_df.assign(snr_per=snr_p, snr_mem=snr_m, 
                             snr_ratio=snr_ratio, snr_ratio_new=snr_ratio_new)
    sim_data = pandas.concat([info_df, sim_df], axis=1)
    sim_data = sim_data.melt(id_vars=sim_data.columns[:-n_sims], var_name='simulation', value_name='beta')
    sim_data = sim_data.assign(noise_factor=noise_factor)
    
    return sim_data

In [12]:
noise_range = [0, 1, 2, 4, 8]

child_seeds = ss.spawn(len(noise_range))
snr_norm, snr_params = [], []

# Loop over each snr level
for seed, nf in zip(child_seeds, noise_range):
    
    # Handle parallelization
    dv.push({"noise_factor":nf})
    d = vert_data.dropna(subset=['ang_dist_bin']).groupby(['subj', 'roi', 'stim_id'])
    
    grandchildren = seed.spawn(len(d))
    grand_streams = [default_rng(s) for s in grandchildren]
    
    # Generate simulated data for this snr level
    snr_data = v.map(snr_simulation, zip(grand_streams, d), ordered=False)
    snr_data = pd.concat([n for n in snr_data])
    
    # Do averaging and von mises fitting
    n = utils.norm_group(snr_data, group_cols=['noise_factor', 'simulation'])
    p = utils.fit_diff_vonmises(n, 'beta_adj', group_cols=['noise_factor', 'simulation'])
    
    snr_norm.append(n)
    snr_params.append(p)
    
snr_norm = pd.concat(snr_norm).reset_index(drop=True)
snr_params = pd.concat(snr_params).reset_index(drop=True)

  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  d = Y - (max(Y) / 2)
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))


In [13]:
snr_norm.to_csv(op.join(df_dir, 'sim_snr_group_ang_data.csv'), index=False)
snr_params.to_csv(op.join(df_dir, 'sim_snr_group_ang_fits.csv'), index=False)

In [14]:
del snr_data 
rc.purge_everything()

### 2. Lapse Simulation

In [15]:
def lapse_simulation(stim_group):
        
    import numpy
    import pandas
    
    (rng, ((subj, roi, stim), stim_df)) = stim_group
    
    # Rename some info in our df
    stim_df = stim_df.rename(columns={'beta':'beta_true'})
    
    # Get the median beta and standard error for each voxel during perception
    med_p = stim_df.query("task=='perception'")['beta_true'].values
    se_p = stim_df.query("task=='perception'")['se'].values
    
    sim_betas = []
    for i in numpy.arange(n_sims):
        
        c = rng.normal(0, 1) 
        se_i = se_p * c #correlate noise across voxels
        
        sorder = rng.permutation(stim_ids)
        if stim in sorder[:nrem]:
            # some stim are drawn from perception distribution
            n = [m + s for (m,s) in zip(med_p, se_i)]
        else:
            # others are drawn from distribution with a mean of zero (just noise)
            n = [0 + s for s in se_i]
        sim_betas.append(n)
        
    sim_betas = numpy.vstack(sim_betas).T
    
    # Create new dataframe with all simulations
    sim_df  = pandas.DataFrame(sim_betas, columns=numpy.arange(n_sims))
    info_df = stim_df.query("task=='perception'").copy().reset_index(drop=True)
    sim_data = pandas.concat([info_df, sim_df], axis=1)
    sim_data = sim_data.melt(id_vars=sim_data.columns[:-n_sims], var_name='simulation', value_name='beta')
    sim_data = sim_data.assign(nrem=nrem, nlapse=len(stim_ids)-nrem)
               
    return sim_data

In [16]:
nrem_range = np.arange(0, 5)

child_seeds = ss.spawn(len(nrem_range))
lapse_norm, lapse_params = [], []

# Loop over the inverse of lapse rate (number of stim remembered)
for seed, nrem in zip(child_seeds, nrem_range):
    
    # Handle parallelization
    dv.push({"nrem":nrem})
    d = vert_data.dropna(subset=['ang_dist_bin']).groupby(['subj', 'roi', 'stim_id'])
    
    grandchildren = seed.spawn(len(d))
    grand_streams = [default_rng(s) for s in grandchildren]
    
    # Generate simulated data for this rate
    lapse_data = v.map(lapse_simulation, zip(grand_streams, d), ordered=False)
    lapse_data = pd.concat([n for n in lapse_data])
    
    # Do averaging and von mises fitting
    n = utils.norm_group(lapse_data, group_cols=['nrem', 'nlapse', 'simulation'])
    p = utils.fit_diff_vonmises(n, 'beta_adj', group_cols=['nrem', 'nlapse', 'simulation'])
    
    lapse_norm.append(n)
    lapse_params.append(p)
    
lapse_norm = pd.concat(lapse_norm).reset_index(drop=True)
lapse_params = pd.concat(lapse_params).reset_index(drop=True)

  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  d = Y - (max(Y) / 2)
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  d = Y - (max(Y) / 2)
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  

  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))


Create group average data for each simulated data set and fit params

In [17]:
lapse_norm.to_csv(op.join(df_dir, 'sim_lapse_group_ang_data.csv'), index=False)
lapse_params.to_csv(op.join(df_dir, 'sim_lapse_group_ang_fits.csv'), index=False)

In [18]:
del lapse_data
rc.purge_everything()

### 3. Associative Error Simulation

In [19]:
def assoc_simulation(stim_group):
    
    import numpy
    import pandas
    
    (rng, ((subj, roi), stim_df)) = stim_group
    
    # Rename some info in our df
    stim_df = stim_df.rename(columns={'beta':'beta_true'})
    
    # Get the median beta and standard error for each voxel during perception
    med_p, se_p = {}, {}
    for stim, g in stim_df.groupby('stim_id'):
        med_p[stim] = g.query("task=='perception'")['beta_true'].values
        se_p[stim] = g.query("task=='perception'")['se'].values
                
    sim_betas = {s:[] for s in stim_ids}
    for i in numpy.arange(n_sims):
        
        sorder = rng.permutation(stim_ids)
        
        for stim in stim_ids:
            c = rng.normal(0, 1) 
            if stim in sorder[:nrem]:
                # some stim are drawn from perception distribution of correct stimulus
                n = [m + s for (m,s) in zip(med_p[stim], se_p[stim] * c)]
            else:
                # others are drawn from perception distribution of the *wrong* stimulus
                rand_stim = rng.permutation(stim_ids[stim_ids!=stim])[0]
                n = [m + s for (m,s) in zip(med_p[rand_stim], se_p[rand_stim] * c)]
            sim_betas[stim].append(n)
    
    sim_data = []
    for stim in stim_ids:
        sim_betas[stim] = numpy.vstack(sim_betas[stim]).T
    
        # Create new dataframe with all simulations
        sim_df  = pandas.DataFrame(sim_betas[stim], columns=numpy.arange(n_sims))
        info_df = stim_df.query("task=='perception' & stim_id==@stim").copy().reset_index(drop=True)
        sd = pandas.concat([info_df, sim_df], axis=1)
        sd = sd.melt(id_vars=sd.columns[:-n_sims], var_name='simulation', value_name='beta')
        sd = sd.assign(nrem=nrem, n_assoc_err=len(stim_ids)-nrem)
        sim_data.append(sd)
    
    sim_data = pandas.concat(sim_data)
    
    return sim_data

In [20]:
nrem_range = np.arange(0, 5)

child_seeds = ss.spawn(len(nrem_range))
assoc_norm, assoc_params = [], []

# Loop over the inverse of associative error rate (number of stim remembered)
for seed, nrem in zip(child_seeds, nrem_range):
    
    # Handle parallelization
    dv.push({"nrem":nrem})
    d = vert_data.dropna(subset=['ang_dist_bin']).groupby(['subj', 'roi'])
    
    grandchildren = seed.spawn(len(d))
    grand_streams = [default_rng(s) for s in grandchildren]
    
    # Generate simulated data for this rate
    assoc_data = v.imap(assoc_simulation, zip(grand_streams, d), ordered=False)
    assoc_data = pd.concat([n for n in assoc_data])
    
    # Do averaging and von mises fitting
    n = utils.norm_group(assoc_data, group_cols=['nrem', 'n_assoc_err', 'simulation'])
    p = utils.fit_diff_vonmises(n, 'beta_adj', group_cols=['nrem', 'n_assoc_err', 'simulation'])
    
    assoc_norm.append(n)
    assoc_params.append(p)
    
assoc_norm = pd.concat(assoc_norm).reset_index(drop=True)
assoc_params = pd.concat(assoc_params).reset_index(drop=True)

  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  d = Y - (max(Y) / 2)
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  d = Y - (max(Y) / 2)
  

  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2

In [21]:
assoc_norm.to_csv(op.join(df_dir, 'sim_assoc_group_ang_data.csv'), index=False)
assoc_params.to_csv(op.join(df_dir, 'sim_assoc_group_ang_fits.csv'), index=False)

In [22]:
del assoc_data
rc.purge_everything()

### 4. Angular Error Simulation

In [23]:
def ang_simulation(stim_group):
        
    import numpy
    import pandas
        
    (rng, ((subj, roi, stim), stim_df)) = stim_group
    
    # Rename some info in our df
    stim_df = stim_df.rename(columns={'beta':'beta_true'})
    
    # Get the median beta and standard error for each voxel during perception
    med_p = stim_df.query("task=='perception'")['beta_true'].values
    se_p = stim_df.query("task=='perception'")['se'].values
    
    # Simulate perception data and a wrong position to remember this stimulus
    sim_betas, mem_angles = [], []
    for i in numpy.arange(n_sims):
        
        c = rng.normal(0, 1)
        se_i = se_p * c #correlate noise across voxels
        
        n = [m + s for (m,s) in zip(med_p, se_i)]
        sim_betas.append(n)
        
        ang = stim_df['stim_angle_brain'].iloc[0]
        m = rng.normal(ang, err_std)
        mem_angles.append(m)
        
    sim_betas = numpy.vstack(sim_betas).T
    
    # Create new dataframe with all simulations
    sim_df  = pandas.DataFrame(sim_betas, columns=numpy.arange(n_sims))
    info_df = stim_df.query("task=='perception'").copy().reset_index(drop=True)
    sim_data = pandas.concat([info_df, sim_df], axis=1)
    sim_data = sim_data.melt(id_vars=sim_data.columns[:-n_sims], var_name='simulation', value_name='beta')
    sim_data = sim_data.assign(err_std=err_std, mem_angle=numpy.repeat(mem_angles, len(info_df)))
    
    return sim_data

In [24]:
err_range = [0, 30, 60, 90, 180]

child_seeds = ss.spawn(len(err_range))
ang_norm, ang_params = [], []

# Loop over angular error standard deviation
for seed, err_std in zip(child_seeds, err_range):
    
    # Handle parallelization
    dv.push({"err_std":err_std})
    d = vert_data.dropna(subset=['ang_dist_bin']).groupby(['subj', 'roi', 'stim_id'])
    
    grandchildren = seed.spawn(len(d))
    grand_streams = [default_rng(s) for s in grandchildren]
    
    # Generate simulated data for this std deviation
    ang_data = v.map(ang_simulation, zip(grand_streams, d), ordered=False)
    ang_data = pd.concat([n for n in ang_data])
    ang_data = utils.calc_ang_distance(ang_data, rotate_by="mem_angle", exclude_eccen=False)
    
    # Do averageing and von mises fitting
    n = utils.norm_group(ang_data, group_cols=['err_std', 'simulation'])
    p = utils.fit_diff_vonmises(n, 'beta_adj', group_cols=['err_std', 'simulation'])
    
    ang_norm.append(n)
    ang_params.append(p)
    
ang_norm = pd.concat(ang_norm).reset_index(drop=True)
ang_params = pd.concat(ang_params).reset_index(drop=True)

  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2*np.pi*iv(0,kappa))
  p = scale * np.exp(kappa*np.cos(theta-loc))/(2

In [25]:
ang_norm.to_csv(op.join(df_dir, 'sim_ang_group_ang_data.csv'), index=False)
ang_params.to_csv(op.join(df_dir, 'sim_ang_group_ang_fits.csv'), index=False)

In [26]:
del ang_data
rc.purge_everything()