# Check and register existing (BBP) simulation campaigns to entitycore

In [92]:
import h5py
import json
import numpy as np
from bluepysnap import Circuit, Simulation
from libsonata import SimulationConfig
from pathlib import Path

In [2]:
SIM_ROOT = Path("/Users/pokorny/Data/Simulations/BBP-curated")
CIRCUIT_ROOT = Path("/Users/pokorny/Data/Circuits")

SIM_CONFIG_NAME = "simulation_config.json"
CIRCUIT_CONFIG_NAME = "circuit_config.json"

### Campaign specification
- Campaign path assumed to be `<SIM_ROOT>/<name>`
- Circuit path assumed to be `<CIRCUIT_ROOT>/<circuit>`

In [4]:
sim_campaign = {
    "name": "SM-whisker-soma",
    "gpfs_path": "/gpfs/bbp.cscs.ch/project/proj83/home/isbister/simulations/elife_sscx_physiology_2024/9-SchneiderMizell/2_whisker_deflections_soma/2_whisker_deflections_soma_0__19_11_24",
    # "circuit": "nbS1-O1-SM"  # FIXME
    "circuit": "nbS1-O1"
}

# sim_campaign = {
#     "name": "beta-spont",
#     "gpfs_path": "/gpfs/bbp.cscs.ch/project/proj83/home/pokorny/SimplifiedConnectomeModels/simulations_v2/SSCx-HexO1-Release-TC__Spont",
#     "circuit": "nbS1-O1-beta"
# }

# sim_campaign = {
#     "name": "beta-evoked",
#     "gpfs_path": "/gpfs/bbp.cscs.ch/project/proj83/home/pokorny/SimplifiedConnectomeModels/simulations_v2/SSCx-HexO1-Release-TC__Evoked",
#     "circuit": "nbS1-O1-beta"
# }

---
### 1. Check input paths
- Assuming individual simulation subfolders `/0`, `/1`, ... (= index)
- Assuming `"simulation_config.json"` within each subfolder

In [5]:
# Check folders
campaign_path = SIM_ROOT / sim_campaign["name"]
circuit_path = CIRCUIT_ROOT / sim_campaign["circuit"]

assert campaign_path.is_dir(), "ERROR: Simulation path not found!"
assert circuit_path.is_dir(), "ERROR: Circuit path not found!"

In [6]:
# Check sub-folders
sim_folders = list(campaign_path.glob("*[0-9]"))
sim_folder_names = [f.name for f in sim_folders]
num_sims = len(sim_folders)
print(f"Found {num_sims} simulation(s) in campaign '{sim_campaign['name']}'")
assert all(str(idx) in sim_folder_names for idx in range(num_sims)), "ERROR: Subfolder names do not match simulation indices!"

Found 1 simulation(s) in campaign 'SM-whisker-soma'


In [7]:
# Check simulation configs
sim_configs = [_dir / SIM_CONFIG_NAME for _dir in sim_folders]
orig_circuit = None
for cfg in sim_configs:
    assert cfg.is_file(), f"ERROR: Simulation config '{sim_config}' not found!"
    with open(cfg, "r") as f:
        cfg_dict = json.load(f)
    if orig_circuit is None:
        orig_circuit = cfg_dict["network"]
    else:
        assert cfg_dict["network"] == orig_circuit, "ERROR: Circuit mismatch!"

In [8]:
# Check circuit config
circuit_config = circuit_path / CIRCUIT_CONFIG_NAME
assert circuit_config.is_file(), f"ERROR: Circuit config '{circuit_config}' not found!"
print(f"Original circuit:     '{orig_circuit}'")
print(f"New campaign circuit: '{circuit_config}'")

Original circuit:     '/gpfs/bbp.cscs.ch/project/proj83/home/pokorny/InterneuronRewiringO1/circuits/SSCx-HexO1-Release__O1_Schneider_Mizell_v2/circuit_config_reparam_tc.json'
New campaign circuit: '/Users/pokorny/Data/Circuits/nbS1-O1/circuit_config.json'


### 2. Check simulation contents

In [64]:
def read_spikes(h5_file, population_name):
    with h5py.File(h5_file, "r") as h5:
        nids = h5[f"spikes/{population_name}"]["node_ids"][:]
        ts = h5[f"spikes/{population_name}"]["timestamps"][:]
    return nids, ts, np.unique(nids)

In [112]:
c = Circuit(circuit_config)
for idx, cfg in enumerate(sim_configs):
    sim = Simulation(cfg)
    output_dir = Path(sim.config["output"]["output_dir"])
    node_set = sim.config["node_set"]
    print(f"SIMULATION {idx}")
    print(f"  Simulation length: {sim.time_start}-{sim.time_stop}ms")

    # Check populations
    npop = sim.spikes.population_names
    assert len(npop) == 1, "ERROR: Only one node population expected!"
    npop = npop[0]
    assert npop in c.nodes.population_names, f"ERROR: Node population '{npop}' not found in circuit!"
    print(f"  Population name: '{npop}'")
    
    # Check output spikes
    spike_file = Path(sim.spikes.config.output_dir) / sim.spikes.config.spikes_file
    nids, ts, unique_nids = read_spikes(spike_file, npop)
    assert np.all(np.isin(unique_nids, c.nodes[npop].ids(node_set))), f"ERROR: Node ID mismatch in '{spike_file}'!"
    print(f"  Node set: {node_set} ({len(c.nodes[npop].ids(node_set))} neurons)")
    print(f"  Node IDs: {min(nids)}-{max(nids)} ({len(unique_nids)} spiking neurons)")
    print(f"  Time stamps {min(ts):.1f}-{max(ts):.1f}ms")
    print(f"  #Spikes: {len(nids)}")

    # Check voltage reports (if any)
    for report in sim.reports.values():
        report_file = output_dir / (report.name + ".h5")
        report_node_set = report.node_set
        with h5py.File(report_file, "r") as h5:
            nids = h5[f"report/{npop}/mapping"]["node_ids"][:]
        assert np.all(np.isin(nids, c.nodes[npop].ids(report_node_set))), f"ERROR: Node ID mismatch in '{report_file}'!"
        print(f"  Report '{report.name}':")
        print(f"    Node set: {report_node_set} ({len(c.nodes[npop].ids(report_node_set))} neurons)")
        print(f"    Node IDs: {min(nids)}-{max(nids)} ({len(np.unique(nids))} recorded neurons)")
        print(f"    Length: {report.time_start}-{report.time_stop}ms")

    # Check input spikes (if any)
    for inp_name, inp in sim.inputs.items():
        if not inp.input_type == SimulationConfig.InputBase.InputType.spikes:
            continue
        inp_spike_file = inp.spike_file
        print(f"  Spike input '{inp_name}':")
        inp_pop = inp.reader.get_population_names()
        assert len(inp_pop) == 1, "ERROR: Only one input node population expected!"
        inp_pop = inp_pop[0]
        inp_node_set = inp.node_set
        assert inp_pop in c.nodes.population_names, f"ERROR: Input node population '{inp_pop}' not found in circuit!"
        print(f"    Source population: '{inp_pop}'")
        assert inp_node_set in c.node_sets.content, f"ERROR: Input node set '{inp_node_set}' not found in circuit!"
        print(f"    Target node set: '{inp_node_set}'")
        inp_nids, inp_ts, unique_inp_nids = read_spikes(inp_spike_file, inp_pop)
        assert np.all(np.isin(unique_inp_nids, c.nodes[inp_pop].ids())), f"ERROR: Input node ID mismatch in '{inp_spike_file}'!"
        print(f"    Node IDs: {min(inp_nids)}-{max(inp_nids)} ({len(unique_inp_nids)} spiking neurons)")
        print(f"    Time stamps {min(inp_ts):.1f}-{max(inp_ts):.1f}ms")
        print(f"    #Spikes: {len(inp_nids)}")

    print()


SIMULATION 0
  Simulation length: 0-12000.0ms
  Population name: 'S1nonbarrel_neurons'
  Node set: hex_O1 (211712 neurons)
  Node IDs: 11-211711 (58922 spiking neurons)
  Time stamps 44.0-12000.0ms
  #Spikes: 514379
  Report 'soma':
    Node set: hex0 (30190 neurons)
    Node IDs: 7-211708 (30190 recorded neurons)
    Length: 1400.0-12000.0ms
  Spike input 'Stimulus spikeReplay':
    Source population: 'VPM'
    Target node set: 'hex_O1'
    Node IDs: 2609-12416 (59 spiking neurons)
    Time stamps 1503.3-11509.4ms
    #Spikes: 1974



### 3. Create and register `Simulation`, `SimulationCampaign`, and `SimulationResult` entities