# Convert input spikes of a simulation campaign from .dat to .h5 format

- Incl. patching the `simulation_config.json` files to point to the new .h5 files

In [29]:
import json
import subprocess
from pathlib import Path
from shutil import copyfile, move

In [30]:
# data_root = Path("/Users/pokorny/Data")
data_root = Path("/Users/james/Documents/obi/Data")

# CAMPAIGN_PATH = data_root / Path("Simulations/BBP-curated/SM-whisker-scan")
# SPIKE_PATH = data_root / Path("Simulations/BBP-raw/SM-whisker-scan/518fbeaf-f0ba-4f99-a0ff-22cb7e89eed7")
CAMPAIGN_PATH = data_root / Path("Simulations/BBP-curated/assemblies")
SPIKE_PATH = data_root / Path("Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf")

SIM_CONFIG_NAME = "simulation_config.json"
STIM_NAME = "Stimulus spikeReplay"
STIM_POPULATION = "VPM"

# CONVERTER_TOOL = "/Users/pokorny/JupyterLab/git/libsonatareport/build/tools/converter/spikes_converter"
CONVERTER_TOOL = "/Users/james/Documents/obi/code/libsonatareport/build/tools/converter/spikes_converter"

In [31]:
# Check sub-folders
sim_folders = list(CAMPAIGN_PATH.glob("*[0-9]"))
sim_folder_names = [f.name for f in sim_folders]
sim_configs = [_sim / SIM_CONFIG_NAME for _sim in sim_folders]
num_sims = len(sim_folders)
print(f"Found {num_sims} simulation(s) in campaign")
assert all(str(idx) in sim_folder_names for idx in range(num_sims)), "ERROR: Subfolder names do not match simulation indices!"
assert all(_cfg.is_file() for _cfg in sim_configs), "ERROR: Simulation config(s) not found!"

Found 5 simulation(s) in campaign


In [32]:
# Check spike files
input_spike_files = []
output_spike_files = []
for idx, cfg in enumerate(sim_configs):
    with open(cfg, "r") as f:
        cfg_dict = json.load(f)
    inp_file = cfg_dict["inputs"][STIM_NAME]["spike_file"]
    assert Path(inp_file).suffix.lower() == ".dat", "ERROR: .dat input expected!"
    inp_path = SPIKE_PATH / str(idx) / inp_file
    assert inp_path.is_file(), f"ERROR: Input spike file '{inp_path}' not found!"
    out_file = Path(inp_file).stem + ".h5"
    out_path = CAMPAIGN_PATH / str(idx) / out_file
    assert not out_path.is_file(), f"ERROR: Output spike file '{out_path}' already exists!"
    input_spike_files.append(inp_path)
    output_spike_files.append(out_path)
    print(f"Simulation {idx}: Spike file '{inp_file}'")

Simulation 0: Spike file 'input.dat'
Simulation 1: Spike file 'input.dat'
Simulation 2: Spike file 'input.dat'
Simulation 3: Spike file 'input.dat'
Simulation 4: Spike file 'input.dat'


In [33]:
# Run conversion
for idx, (inp, out) in enumerate(zip(input_spike_files, output_spike_files)):
    print(f"Simulation {idx}: Converting '{inp}' -> '{out}'...")

    # Run conversion
    convert_out = subprocess.check_output(f"{CONVERTER_TOOL} {inp} {STIM_POPULATION}", shell=True, text=True)
    print(convert_out)

    # Move converted file from currend directory to destination folder
    converted_file = inp.name + ".h5"
    assert Path(converted_file).is_file(), "ERROR: Converted file not found!"
    move(src=converted_file, dst=out)


Simulation 0: Converting '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/0/input.dat' -> '/Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/0/input.h5'...
[REPORTS] [info] :: Trying to convert '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/0/input.dat' binary report...'
[REPORTS] [info] :: File '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/0/input.dat' successfully converted to 'input.dat.h5'

Simulation 1: Converting '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/1/input.dat' -> '/Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/1/input.h5'...
[REPORTS] [info] :: Trying to convert '/Users/james/Documents/obi/Data/Simulations/BBP-raw/assemblies/d21efd45-7740-4268-b06f-e1de35f2b6cf/1/input.dat' binary report...'
[REPORTS] [info] :: File '/Us

In [None]:
# Patch simulation configs
for idx, cfg in enumerate(sim_configs):
    # Create .BAK
    bak_file = cfg.parent / (cfg.stem + ".BAK")
    if not bak_file.is_file():
        # Create only if not existing yet (to avoid overwriting)
        copyfile(src=cfg, dst=bak_file)

    # Patch config
    with open(cfg, "r") as f:
        cfg_dict = json.load(f)
    
    spk_file = cfg_dict["inputs"][STIM_NAME]["spike_file"]
    assert spk_file == input_spike_files[idx].name, "ERROR: Spike file name mismatch!"
    spk_file_new = output_spike_files[idx].name
    cfg_dict["inputs"][STIM_NAME]["spike_file"] = spk_file_new
    
    with open(cfg, "w") as f:
        json.dump(cfg_dict, f, indent=2)
    print(f"Simulation {idx}: Config file '{cfg}' written")

# Optionally split nbS1 inputs into two files (VPM and POm)
For old BlueConfig based sims in which inputs were in one file

In [34]:
thalamic_input_files = output_spike_files

for spike_file in thalamic_input_files:
    assert spike_file.is_file(), f"ERROR: Output spike file '{spike_file}' not found!"

    import h5py
    import numpy as np

    VPM_LO = 5_000_000
    VPM_HI = 6_000_000  # exclusive

    vpm_out = spike_file.with_name(spike_file.stem + "_VPM.h5")
    pom_out = spike_file.with_name(spike_file.stem + "_POm.h5")

    with h5py.File(spike_file, "r") as fin:
        node_ids = fin["spikes/VPM/node_ids"][:]       # (N,)
        timestamps = fin["spikes/VPM/timestamps"][:]   # (N,)

        # Masks
        vpm_mask = (node_ids >= VPM_LO) & (node_ids < VPM_HI)
        pom_mask = ~vpm_mask

    # -------- VPM FILE --------
    with h5py.File(vpm_out, "w") as fout:
        g_spikes = fout.create_group("spikes")
        g_vpm = g_spikes.create_group("VPM")

        g_vpm.create_dataset("node_ids", data=node_ids[vpm_mask])
        g_vpm.create_dataset("timestamps", data=timestamps[vpm_mask])

    # -------- POm FILE --------
    with h5py.File(pom_out, "w") as fout:
        g_spikes = fout.create_group("spikes")
        g_pom = g_spikes.create_group("POm")

        g_pom.create_dataset("node_ids", data=node_ids[pom_mask])
        g_pom.create_dataset("timestamps", data=timestamps[pom_mask])

    print(f"Input file:   {spike_file}")
    print(f"VPM file:     {vpm_out} ({vpm_mask.sum()} spikes)")
    print(f"POm file:     {pom_out} ({pom_mask.sum()} spikes)")
    print("-" * 50)


Input file:   /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/0/input.h5
VPM file:     /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/0/input_VPM.h5 (125148 spikes)
POm file:     /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/0/input_POm.h5 (109604 spikes)
--------------------------------------------------
Input file:   /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/1/input.h5
VPM file:     /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/1/input_VPM.h5 (125148 spikes)
POm file:     /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/1/input_POm.h5 (109604 spikes)
--------------------------------------------------
Input file:   /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/2/input.h5
VPM file:     /Users/james/Documents/obi/Data/Simulations/BBP-curated/assemblies/2/input_VPM.h5 (125148 spikes)
POm file:     /Users/james/Documents/obi/Data/Simulations/BBP-

# Optionally create new output spike file with "S1nonbarrel_neurons" name
(For old BlueConfig sims) 

In [None]:
import h5py

for idx, cfg in enumerate(sim_configs):
    inp_path = SPIKE_PATH / str(idx) / "out.h5"

    with h5py.File(inp_path, "r") as fin:
        node_ids = fin["spikes/All/node_ids"][:]       # (N,)
        timestamps = fin["spikes/All/timestamps"][:]   # (N,)

    with h5py.File(CAMPAIGN_PATH / str(idx) / "spikes.h5", "w") as f:

        g_spikes = f.create_group("spikes")
        g_pom = g_spikes.create_group("S1nonbarrel_neurons")

        g_pom.create_dataset("node_ids", data=node_ids)
        g_pom.create_dataset("timestamps", data=timestamps)
