# Convert input spikes of a simulation campaign from .dat to .h5 format

- Incl. patching the `simulation_config.json` files to point to the new .h5 files

In [1]:
import json
import subprocess
from pathlib import Path
from shutil import copyfile, move

In [2]:
# data_root = Path("/Users/pokorny/Data")
data_root = Path("/Users/james/Documents/obi/Data")

# CAMPAIGN_PATH = data_root / Path("Simulations/BBP-curated/SM-whisker-scan")
# SPIKE_PATH = data_root / Path("Simulations/BBP-raw/SM-whisker-scan/518fbeaf-f0ba-4f99-a0ff-22cb7e89eed7")
# blue_config_converted=False

CAMPAIGN_PATH = data_root / Path("Simulations/BBP-curated/assemblies")
SPIKE_PATH = data_root / Path("Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf")
blue_config_converted=True

SIM_CONFIG_NAME = "simulation_config.json"
STIM_NAME = "Stimulus spikeReplay"
STIM_POPULATION = "VPM"

# CONVERTER_TOOL = "/Users/pokorny/JupyterLab/git/libsonatareport/build/tools/converter/spikes_converter"
CONVERTER_TOOL = "/Users/james/Documents/obi/code/libsonatareport/build/tools/converter/spikes_converter"

In [3]:
# Check sub-folders
sim_folders = list(CAMPAIGN_PATH.glob("*[0-9]"))
sim_folder_names = [f.name for f in sim_folders]
sim_configs = [_sim / SIM_CONFIG_NAME for _sim in sim_folders]
num_sims = len(sim_folders)
print(f"Found {num_sims} simulation(s) in campaign")
assert all(str(idx) in sim_folder_names for idx in range(num_sims)), "ERROR: Subfolder names do not match simulation indices!"
assert all(_cfg.is_file() for _cfg in sim_configs), "ERROR: Simulation config(s) not found!"

Found 5 simulation(s) in campaign


In [5]:
# Check spike files
## NOTE THIS WONT WORK FOR CONVERTED BLUECONFIG SIMS WHICH WILL BE DEALT WITH LATER
input_spike_files = []
output_spike_files = []
for idx, cfg in enumerate(sim_configs):
    with open(cfg, "r") as f:
        cfg_dict = json.load(f)
    if blue_config_converted:
        inp_path = SPIKE_PATH / str(idx) / "input.dat"
        out_file = "input.h5"
    else:
        inp_file = cfg_dict["inputs"][STIM_NAME]["spike_file"]
        assert Path(inp_file).suffix.lower() == ".dat", "ERROR: .dat input expected!"
        inp_path = SPIKE_PATH / str(idx) / inp_file
        out_file = Path(inp_file).stem + ".h5"
    assert inp_path.is_file(), f"ERROR: Input spike file '{inp_path}' not found!"
    out_path = CAMPAIGN_PATH / str(idx) / out_file
    assert not out_path.is_file(), f"ERROR: Output spike file '{out_path}' already exists!"
    input_spike_files.append(inp_path)
    output_spike_files.append(out_path)
    print(f"Simulation {idx}: Spike file '{inp_path}'")

Simulation 0: Spike file '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/0/input.dat'
Simulation 1: Spike file '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/1/input.dat'
Simulation 2: Spike file '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/2/input.dat'
Simulation 3: Spike file '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/3/input.dat'
Simulation 4: Spike file '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/4/input.dat'


In [9]:
# Run conversion
for idx, (inp, out) in enumerate(zip(input_spike_files, output_spike_files)):
    print(f"Simulation {idx}: Converting '{inp}' -> '{out}'...")

    # Run conversion
    convert_out = subprocess.check_output(f"{CONVERTER_TOOL} {inp} {STIM_POPULATION}", shell=True, text=True)
    print(convert_out)

    # Move converted file from current directory to destination folder
    converted_file = inp.name + ".h5"
    assert Path(converted_file).is_file(), "ERROR: Converted file not found!"

    # if not blue_config_converted:
    move(src=converted_file, dst=out)


Simulation 0: Converting '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/0/input.dat' -> '/Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/0/input.h5'...
[REPORTS] [info] :: Trying to convert '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/0/input.dat' binary report...'
[REPORTS] [info] :: File '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/0/input.dat' successfully converted to 'input.dat.h5'

Simulation 1: Converting '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/1/input.dat' -> '/Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/1/input.h5'...
[REPORTS] [info] :: Trying to convert '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/1/input.dat' binary report...'
[REPORTS] [info] :: File '/Us

In [7]:
if not blue_config_converted:

    # Patch simulation configs
    for idx, cfg in enumerate(sim_configs):
        # Create .BAK
        bak_file = cfg.parent / (cfg.stem + ".BAK")
        if not bak_file.is_file():
            # Create only if not existing yet (to avoid overwriting)
            copyfile(src=cfg, dst=bak_file)

        # Patch config
        with open(cfg, "r") as f:
            cfg_dict = json.load(f)
        
        spk_file = cfg_dict["inputs"][STIM_NAME]["spike_file"]
        assert spk_file == input_spike_files[idx].name, "ERROR: Spike file name mismatch!"
        spk_file_new = output_spike_files[idx].name
        cfg_dict["inputs"][STIM_NAME]["spike_file"] = spk_file_new
        
        with open(cfg, "w") as f:
            json.dump(cfg_dict, f, indent=2)
        print(f"Simulation {idx}: Config file '{cfg}' written")

# Optionally split nbS1 inputs into two files (VPM and POm)
For old BlueConfig based sims in which inputs were in one file

In [10]:
thalamic_input_files = output_spike_files

for i in range(5):
    spike_file = CAMPAIGN_PATH / f"{i}" / "input.h5"
    output_spike_root = CAMPAIGN_PATH / f"{i}"

    assert spike_file.is_file(), f"ERROR: Output spike file '{spike_file}' not found!"

    import h5py
    import numpy as np

    VPM_LO = 5_000_000
    VPM_HI = 6_000_000  # exclusive
    
    vpm_out = output_spike_root / ("input_VPM.h5")
    pom_out = output_spike_root / ("input_POm.h5")

    # vpm_out = spike_file.with_name(spike_file.stem + "_VPM.h5")
    # pom_out = spike_file.with_name(spike_file.stem + "_POm.h5")

    with h5py.File(spike_file, "r") as fin:
        node_ids = fin["spikes/VPM/node_ids"][:]       # (N,)
        timestamps = fin["spikes/VPM/timestamps"][:]   # (N,)

        # Masks
        vpm_mask = (node_ids >= VPM_LO) & (node_ids < VPM_HI)
        pom_mask = ~vpm_mask

    # -------- VPM FILE --------
    with h5py.File(vpm_out, "w") as fout:
        g_spikes = fout.create_group("spikes")
        g_vpm = g_spikes.create_group("VPM")

        g_vpm.create_dataset("node_ids", data=node_ids[vpm_mask])
        g_vpm.create_dataset("timestamps", data=timestamps[vpm_mask])

    # -------- POm FILE --------
    with h5py.File(pom_out, "w") as fout:
        g_spikes = fout.create_group("spikes")
        g_pom = g_spikes.create_group("POm")

        g_pom.create_dataset("node_ids", data=node_ids[pom_mask])
        g_pom.create_dataset("timestamps", data=timestamps[pom_mask])

    print(f"Input file:   {spike_file}")
    print(f"VPM file:     {vpm_out} ({vpm_mask.sum()} spikes)")
    print(f"POm file:     {pom_out} ({pom_mask.sum()} spikes)")
    print("-" * 50)


Input file:   /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/0/input.h5
VPM file:     /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/0/input_VPM.h5 (125148 spikes)
POm file:     /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/0/input_POm.h5 (109604 spikes)
--------------------------------------------------
Input file:   /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/1/input.h5
VPM file:     /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/1/input_VPM.h5 (125148 spikes)
POm file:     /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/1/input_POm.h5 (109604 spikes)
--------------------------------------------------
Input file:   /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/2/input.h5
VPM file:     /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/2/input_VPM.h5 (125148 spikes)
POm file:     /Users/james/Documents/obi/Data/Simulations/BBP-

# Optionally create new output spike file with "S1nonbarrel_neurons" name
(For old BlueConfig sims) 

In [None]:
import h5py

if blue_config_converted:
    for idx, cfg in enumerate(sim_configs):
        inp_path = SPIKE_PATH / str(idx) / "out.h5"

        with h5py.File(inp_path, "r") as fin:
            node_ids = fin["spikes/All/node_ids"][:]       # (N,)
            timestamps = fin["spikes/All/timestamps"][:]   # (N,)

        reporting_dir = CAMPAIGN_PATH / str(idx) / "reporting"
        reporting_dir.mkdir(exist_ok=True)
        with h5py.File(reporting_dir / "spikes_corrected_population.h5", "w") as f:

            g_spikes = f.create_group("spikes")
            g_pom = g_spikes.create_group("S1nonbarrel_neurons")

            g_pom.create_dataset("node_ids", data=node_ids)
            g_pom.create_dataset("timestamps", data=timestamps)


# Remap output spike ids

In [23]:
import h5py
import json
import numpy as np
from pathlib import Path
from time import perf_counter

_ID_MAPPING_CACHE = None
_ID_MAPPING_FILE = Path("/Users/james/Documents/obi/Data/Circuits/nbS1-O1/id_mapping.json")

def _load_id_mapping():
    global _ID_MAPPING_CACHE

    if _ID_MAPPING_CACHE is not None:
        return _ID_MAPPING_CACHE

    if not _ID_MAPPING_FILE.exists():
        print(f"Mapping file not found: {_ID_MAPPING_FILE}")
        _ID_MAPPING_CACHE = {}
        return _ID_MAPPING_CACHE

    t0 = perf_counter()
    with _ID_MAPPING_FILE.open("r", encoding="utf-8") as f:
        _ID_MAPPING_CACHE = json.load(f)

    # print(mapping_dict['S1nonbarrel_neurons']["old_id"][1:10])
    # _ID_MAPPING_CACHE = {int(k): int(v) for k, v in mapping_dict.items()}
    # print(
    #     f"Loaded {_ID_MAPPING_FILE.name} with {len(_ID_MAPPING_CACHE):,} ids "
    #     f"in {perf_counter() - t0:.2f}s"
    # )
    return _ID_MAPPING_CACHE

def adjust_spike_file(spike_path, output_spike_path, population, map_spikes_to_new_node_ids):

    print(f"Adjusting spike file: {spike_path}")

    with h5py.File(spike_path, "r") as fin:
        print(fin["spikes"].keys())
        node_ids = fin[f"spikes/{population}/node_ids"][:]       # (N,)
        timestamps = fin[f"spikes/{population}/timestamps"][:]   # (N,)

    # new_node_ids = node_ids
    if map_spikes_to_new_node_ids:
        mapping_dict = _load_id_mapping()
        if mapping_dict:
            pop_old_ids = mapping_dict.get(population, {}).get("old_id", {})
            pop_old_ids_dict = {old_id: new_id for new_id, old_id in enumerate(pop_old_ids)}
            new_node_ids = np.asarray([pop_old_ids_dict[node_id] for node_id in node_ids])
            # print(new_node_ids[:10])
            changed = int(np.count_nonzero(new_node_ids != node_ids))
            print(f"Remapped {changed:,}/{len(node_ids):,} spike node ids")

        reporting_dir = output_spike_path.parent
        reporting_dir.mkdir(exist_ok=True)
        with h5py.File(output_spike_path, "w") as f:

            g_spikes = f.create_group("spikes")
            g_pom = g_spikes.create_group("S1nonbarrel_neurons")

            g_pom.create_dataset("node_ids", data=new_node_ids)
            g_pom.create_dataset("timestamps", data=timestamps)

if blue_config_converted:
    for idx, cfg in enumerate(sim_configs):
        adjust_spike_file(CAMPAIGN_PATH / str(idx) / "reporting" / "spikes_corrected_population.h5", CAMPAIGN_PATH / str(idx) / "reporting" / "spikes.h5", "S1nonbarrel_neurons", True)

Adjusting spike file: /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/0/reporting/spikes_corrected_population.h5
<KeysViewHDF5 ['S1nonbarrel_neurons']>
Remapped 14,436,988/14,436,988 spike node ids
Adjusting spike file: /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/1/reporting/spikes_corrected_population.h5
<KeysViewHDF5 ['S1nonbarrel_neurons']>
Remapped 14,441,051/14,441,051 spike node ids
Adjusting spike file: /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/2/reporting/spikes_corrected_population.h5
<KeysViewHDF5 ['S1nonbarrel_neurons']>
Remapped 14,417,765/14,417,765 spike node ids
Adjusting spike file: /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/3/reporting/spikes_corrected_population.h5
<KeysViewHDF5 ['S1nonbarrel_neurons']>
Remapped 14,436,096/14,436,096 spike node ids
Adjusting spike file: /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/4/reporting/spikes_corrected_population.h5
