# Replicate Buccino model

author: laquitainesteeve@gmail.com

Simulate Buccino's model without filtering

Tested on Ubuntu 24.04.1 LTS (32 cores, 188 GB RAM, Intel(R) Core(TM) i9-14900K ＠3.2 GHz/5.8 GHz) with RTX 5090 GPU with 40GB VRAM (GPU not needed)

Required:
- Storage: 14 GB for 1,500 templates (13GB) and Recording (~500 MB)

tips to free up storage

0. check storage with df -h
1. restart the notebook kernel will free up 100 GB of storage; some large deleted files remain opened and are deleted when the kernel is restarted. 
2. import os; import glob; for f in glob.glob('temp/pros_of_details/waveformextractor'): os.remove(f) # 100 GB saved
3. free up RAM: clear cache with sudo sh -c 'echo 1 >  /proc/sys/vm/drop_caches'

## Setup 

Setup "_mearec_si_103_0_.yml" virtual environment

In [None]:
%%time
%load_ext autoreload
%autoreload 2

# import packages
import os
import MEArec as mr
import numpy as np
import spikeinterface.extractors as se
import spikeinterface.sorters as sorters 
import spikeinterface.comparison as sc  
from matplotlib import pyplot as plt;
from numpy.linalg import norm as lalgnorm
import warnings
from scipy.spatial.distance import cdist
import json 
import spikeinterface as si
from spikeinterface.curation import remove_excess_spikes
from spikeinterface import extract_waveforms
from spikeinterface.core.sparsity import compute_sparsity
import spikeinterface.qualitymetrics as sqm
import spikeinterface.core.template_tools as ttools
print(si.__version__)

warnings.filterwarnings('ignore')

# SET PROJECT PATH

PROJ_PATH = "/home/steeve/steeve/epfl/code/spikebias/"
os.chdir(PROJ_PATH)

# parameters
DURATION_S = 600 # duration of recording period considered in seconds

# setup simulation common template parameters
DATA_PATH = './temp/pros_of_details/'
CELLS_PATH = mr.get_default_cell_models_folder()

# parameter Buccino replicate
TEMPLATE_PARAMS_PATH = './assets/mearec/global_templates.json' # templates used for all simulations
TEMPLATES_PATH = './temp/pros_of_details/templates.h5' # warning initially was not 1500 templates but much less 200 - check if that has an effect
INPUT_PARAMS_REP_PATH =  './assets/mearec/buccino_rep/input_params.json'
OUTPUT_PARAMS_REP_PATH = './assets/mearec/buccino_rep/output_params.json'
RECORDING_REP_H5_PATH = './temp/pros_of_details/recordings.h5'
RECORDING_REP_PATH = 'dataset/00_raw/recording_buccino_rep/'

# parallel processing parameters
save_recording_params = {'n_jobs': 20, 'verbose':True, 'progress_bar':True, 'overwrite':True, 'chunk_size': 50000}

# parallel processing parameters 
job_kwargs = dict(n_jobs=-1, chunk_duration="1s", progress_bar=True)
savefig_cfg = {"transparent":True, "dpi": 400}

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
0.103.0
CPU times: user 964 µs, sys: 0 ns, total: 964 µs
Wall time: 1.11 ms


## Functions

In [8]:

def standardize_site_location_property(Recording, zero_coord=0):
    """
    zero_coord: coordinate with zero values
    """
    site_pos = Recording.get_channel_locations()
    site_pos = np.insert(site_pos, zero_coord, 0, axis=1)
    Recording.set_property('location3D', site_pos)
    return Recording


def set_metadata_to_mearec_simulations(recording_generator, Recording, Sorting):
    
    # set cell features
    features = ['cell_type', 'snr', 'mtype', 'soma_position', 'bursting', 'exp_decay', 'max_spikes_per_burst', 'max_burst_duration']
    n_cells = len(recording_generator.spiketrains)

    # set cell properties in sorting extractor
    for feature in features:
        feature_data = []
        for cell_ix in range(n_cells):
            feature_data.append(recording_generator.spiketrains[cell_ix].annotations[feature])
        Sorting.set_property(feature, np.array(feature_data))

    # set electrode properties in recording extractor
    # by default, channel_locations are attached to Recording extractor
    Recording.set_property('layer', np.array(['L5']*384)) # layer
    return Recording, Sorting

def simulate_synthetic_recording_with_mearec(rec_prms: dict, layer:str, templates_path:str, recording_clone_h5_path:str):

    # generate simulated recording (takes 22 min)
    rec_gen = mr.gen_recordings(params=rec_prms, templates=templates_path)

    # from ipdb import set_trace; set_trace()
    
    # save simulated recording
    mr.save_recording_generator(rec_gen, recording_clone_h5_path)

    # load recording h5 file
    RecordingClone = se.MEArecRecordingExtractor(recording_clone_h5_path) 
    RecordingClone.set_property("layers", np.array([layer]*384)) # add layer metadata

    # load ground truth
    SortingClone = se.MEArecSortingExtractor(recording_clone_h5_path)

    # delete intermediate h5 file
    os.remove(recording_clone_h5_path)
    return RecordingClone, SortingClone, rec_gen


def save_simulation_parameters_to_assets(rec_prms:dict, rec_gen, input_params_path:str, output_params_path:str):
        
    folder_path = os.path.dirname(input_params_path)
    os.makedirs(folder_path, exist_ok=True)

    # save input simulation parameters to MEAREC
    with open(input_params_path, 'w') as f:
        json.dump(rec_prms, f, indent=4)

    folder_path = os.path.dirname(output_params_path)
    os.makedirs(folder_path, exist_ok=True)

    # save parameters recorded in recording generator
    with open(output_params_path, 'w') as f:
        json.dump(rec_gen.params, f, indent=4)


## Parametrize cell templates

In [9]:
# setup common cell templates, probe, and seed parameters (takes 4,531 secs)
tpl_prms = mr.get_default_templates_params()
tpl_prms['probe'] = 'Neuropixels-384'           # probe
tpl_prms['n'] = 1500                            # (default = 50) number of templates per cell model (minimum that worked to match npx_spont cell counts), I use 1500.
tpl_prms['seed'] = 0                            # setup reproducibility
tpl_prms['overhang'] = 50                       # (default = 30) extension in um beyond MEA boundaries for neuron locations
tpl_prms['ncontacts'] = 1                       # (default = 10) number of contacts per recording site, spatial extent of the electrode over which to average potential
tpl_prms['offset'] = 0                          # (default = 0) x offset; MEA is in the yz plane with x 0 by default. One can center the MEA in cell population by setting x offset.
tpl_prms['xlim'] = [-50, 50]                    # (default = [10,80]) limits ( low high ) for neuron locations in the x-axis (depth)
tpl_prms['ylim'] = None                         # (default = None) limits ( low high ) for neuron locations in the y-axis
tpl_prms['zlim'] = [-1001, 1001]                # (default = None) limits ( low high ) for neuron locations in the z-axis

# # create and save waveform templates (10 min for 200 templates per cell, 15 MB) - uncomment to run (1h20min)
# tpl_gen = mr.gen_templates(cell_models_folder = CELLS_PATH, params = tpl_prms)
# mr.save_template_generator(tpl_gen, TEMPLATES_PATH)

tpl_prms

{'sim_time': 1,
 'target_spikes': [3, 50],
 'cut_out': [2, 5],
 'dt': 0.03125,
 'delay': 10,
 'weights': [0.25, 1.75],
 'rot': 'physrot',
 'probe': 'Neuropixels-384',
 'ncontacts': 1,
 'overhang': 50,
 'offset': 0,
 'xlim': [-50, 50],
 'ylim': None,
 'zlim': [-1001, 1001],
 'x_distr': 'uniform',
 'beta_distr_params': [1.5, 5],
 'min_amp': 30,
 'check_eap_shape': True,
 'n': 1500,
 'seed': 0,
 'drifting': False,
 'max_drift': 100,
 'min_drift': 30,
 'drift_steps': 31,
 'drift_xlim': [-10, 10],
 'drift_ylim': [-10, 10],
 'drift_zlim': [30, 80],
 'check_for_drift_amp': False,
 'drift_within_bounds': False,
 'timeout': None,
 'max_iterations': 1000}

## Parametrize recording

In [10]:
%%time

# setup common recording parameters
rec_prms = mr.get_default_recordings_params()

rec_prms['recordings']['filter'] = False                         # no filtering

rec_prms['spiketrains']['duration'] = 600          # (default=600) recording duration
rec_prms['spiketrains']['min_rate'] = 0.5          # default value

rec_prms['seeds']['spiketrains'] = 0               # seeds for reproducibility 
rec_prms['seeds']['templates'] = 1
rec_prms['seeds']['noise'] = 2
rec_prms['seeds']['convolution'] = 3

# save common template parameters
with open(TEMPLATE_PARAMS_PATH, 'w') as f:
    json.dump(tpl_prms, f, indent=4)

CPU times: user 5.36 ms, sys: 0 ns, total: 5.36 ms
Wall time: 5.25 ms



- This replicates Buccino et al., simulation with MEAREC as described in the Spikeinterface paper.
- Execution time: 23 min


## Simulate

- execution time: 23 min

In [11]:
%%time

# setup parameters
rec_prms['spiketrains']['n_exc'] = 200
rec_prms['spiketrains']['n_inh'] = 50

# simulate recording (23 min)
RecordingRep, GtRep, RecGen = simulate_synthetic_recording_with_mearec(rec_prms, 'L5', TEMPLATES_PATH, RECORDING_REP_H5_PATH)

# setup metadata
RecordingRep, GtRep = set_metadata_to_mearec_simulations(RecGen, RecordingRep, GtRep)
RecordingRep = standardize_site_location_property(RecordingRep)

# save extractors
RecordingRep.save(folder=RECORDING_REP_PATH, **save_recording_params)

# save parameters
save_simulation_parameters_to_assets(rec_prms, RecGen, INPUT_PARAMS_REP_PATH, OUTPUT_PARAMS_REP_PATH)

# report parameters
# report_simulation_parameters(RecGen)
print('Recording metadata:', si.load_extractor(RECORDING_REP_PATH).get_property_keys())

Loading templates...
Done loading templates...
Spiketrains seed:  0
dtype float32
Noise Level  10
Templates selection seed:  1
Selecting cells
Padding template edges
Elapsed pad time: 1.4876055717468262
Creating time jittering
Elapsed jitter time: 1.3652050495147705
Smoothing templates
Computing spike train SNR
Adding spiketrain annotations
Convolution seed:  3
Electrode modulaton
Adding noise
Noise seed:  2
Elapsed time:  999.3307459810167
Deleted /tmp/tmpgeq447ki
write_binary_recording 
engine=process - n_jobs=20 - samples_per_chunk=50,000 - chunk_memory=73.24 MiB - total_memory=1.43 GiB - chunk_duration=1.56s


write_binary_recording (workers: 20 processes):   0%|          | 0/384 [00:00<?, ?it/s]

Recording metadata: ['gain_to_uV', 'offset_to_uV', 'contact_vector', 'location', 'group', 'gain_to_physical_unit', 'location3D', 'layer', 'offset_to_physical_unit', 'channel_name', 'physical_unit', 'layers']
CPU times: user 15min 23s, sys: 1min 32s, total: 16min 55s
Wall time: 17min 41s
