# Check and register existing (BBP) simulation campaigns to entitycore

In [162]:
import h5py
import json
import numpy as np
import subprocess
from bluepysnap import Circuit, Simulation
from datetime import datetime, UTC
from entitysdk import Client, ProjectContext, models, types
from itertools import product
from libsonata import SimulationConfig
from obi_auth import get_token
from pathlib import Path

In [2]:
SIM_ROOT = Path("/Users/pokorny/Data/Simulations/BBP-curated")
CIRCUIT_ROOT = Path("/Users/pokorny/Data/Circuits")

SIM_CONFIG_NAME = "simulation_config.json"
CIRCUIT_CONFIG_NAME = "circuit_config.json"

AWS_S3 = "s3://openbluebrain/"

# Options to upload assets directly from AWS S3
use_aws_spike_assets = True
use_aws_voltage_assets = True
use_aws_replay_assets = True

### Campaign specification
- Campaign path assumed to be `<SIM_ROOT>/<name>`
- Circuit path assumed to be `<CIRCUIT_ROOT>/<circuit>`

In [3]:
# Campaign info & metadata
sim_campaign = {
    "campaign_name": "BBP-SM-whisker-soma",
    "campaign_description": "BBP campaign consisting of a single simulation of the nbS1-O1-SM (Schneider-Mizell) circuit with whisker flick stimulation on VPM fibers, incl. soma recording.",
    "folder_name": "SM-whisker-soma",
    "campaign_config": "config.json",
    "gpfs_path": "/gpfs/bbp.cscs.ch/project/proj83/home/isbister/simulations/elife_sscx_physiology_2024/9-SchneiderMizell/2_whisker_deflections_soma/2_whisker_deflections_soma_0__19_11_24",
    "aws_path": "Simulation_data/Simulation_Campaigns/Rat/SSCx/f132b97aa0",
    "aws_inputs": "e6efa007-e7fa-42ae-ae3d-53385ec57154",  # Subfolder on AWS with spike replay inputs
    "circuit": "nbS1-O1-SM"
}

# sim_campaign = {
#     "folder_name": "beta-spont",
#     "gpfs_path": "/gpfs/bbp.cscs.ch/project/proj83/home/pokorny/SimplifiedConnectomeModels/simulations_v2/SSCx-HexO1-Release-TC__Spont",
#     "circuit": "nbS1-O1-beta"
# }

# sim_campaign = {
#     "folder_name": "beta-evoked",
#     "gpfs_path": "/gpfs/bbp.cscs.ch/project/proj83/home/pokorny/SimplifiedConnectomeModels/simulations_v2/SSCx-HexO1-Release-TC__Evoked",
#     "circuit": "nbS1-O1-beta"
# }

---
### 1. Check input paths
- Assuming individual simulation subfolders `/0`, `/1`, ... (= index)
- Assuming `"simulation_config.json"` within each subfolder

In [4]:
# Check folders
campaign_path = SIM_ROOT / sim_campaign["folder_name"]
circuit_path = CIRCUIT_ROOT / sim_campaign["circuit"]

assert campaign_path.is_dir(), "ERROR: Simulation path not found!"
assert circuit_path.is_dir(), "ERROR: Circuit path not found!"

In [5]:
# Check sub-folders
sim_folders = list(campaign_path.glob("*[0-9]"))
sim_folder_names = [f.name for f in sim_folders]
num_sims = len(sim_folders)
print(f"Found {num_sims} simulation(s) in campaign '{sim_campaign['folder_name']}'")
assert all(str(idx) in sim_folder_names for idx in range(num_sims)), "ERROR: Subfolder names do not match simulation indices!"

Found 1 simulation(s) in campaign 'SM-whisker-soma'


In [6]:
# Check simulation configs
sim_configs = [_dir / SIM_CONFIG_NAME for _dir in sim_folders]
orig_circuit = None
for cfg in sim_configs:
    assert cfg.is_file(), f"ERROR: Simulation config '{sim_config}' not found!"
    with open(cfg, "r") as f:
        cfg_dict = json.load(f)
    if orig_circuit is None:
        orig_circuit = cfg_dict["network"]
    else:
        assert cfg_dict["network"] == orig_circuit, "ERROR: Circuit mismatch!"

In [7]:
# Check circuit config
circuit_config = circuit_path / CIRCUIT_CONFIG_NAME
assert circuit_config.is_file(), f"ERROR: Circuit config '{circuit_config}' not found!"
print(f"Original circuit:     '{orig_circuit}'")
print(f"New campaign circuit: '{circuit_config}'")

Original circuit:     '/gpfs/bbp.cscs.ch/project/proj83/home/pokorny/InterneuronRewiringO1/circuits/SSCx-HexO1-Release__O1_Schneider_Mizell_v2/circuit_config_reparam_tc.json'
New campaign circuit: '/Users/pokorny/Data/Circuits/nbS1-O1-SM/circuit_config.json'


In [8]:
# Check AWS path (optional)
if sim_campaign.get("aws_path"):
    aws_out = subprocess.check_output(f"aws s3 ls {AWS_S3 + sim_campaign['aws_path']}/ --no-sign-request --human-readable --recursive", shell=True, text=True)
else:
    aws_out = None

### 2. Check simulation contents

In [9]:
def read_spikes(h5_file, population_name):
    with h5py.File(h5_file, "r") as h5:
        nids = h5[f"spikes/{population_name}"]["node_ids"][:]
        ts = h5[f"spikes/{population_name}"]["timestamps"][:]
    return nids, ts, np.unique(nids)

In [10]:
c = Circuit(circuit_config)
spike_files = []
voltage_reports = []
replay_files = []
for idx, cfg in enumerate(sim_configs):
    sim = Simulation(cfg)
    output_dir = Path(sim.config["output"]["output_dir"])
    node_set = sim.config["node_set"]
    print(f"SIMULATION {idx}")
    print(f"  Simulation length: {sim.time_start}-{sim.time_stop}ms")

    # Check populations
    npop = sim.spikes.population_names
    assert len(npop) == 1, "ERROR: Only one node population expected!"
    npop = npop[0]
    assert npop in c.nodes.population_names, f"ERROR: Node population '{npop}' not found in circuit!"
    print(f"  Population name: '{npop}'")
    
    # Check output spikes
    spike_file = Path(sim.spikes.config.output_dir) / sim.spikes.config.spikes_file
    nids, ts, unique_nids = read_spikes(spike_file, npop)
    assert np.all(np.isin(unique_nids, c.nodes[npop].ids(node_set))), f"ERROR: Node ID mismatch in '{spike_file}'!"
    print(f"  Node set: {node_set} ({len(c.nodes[npop].ids(node_set))} neurons)")
    print(f"  Node IDs: {min(nids)}-{max(nids)} ({len(unique_nids)} spiking neurons)")
    print(f"  Time stamps {min(ts):.1f}-{max(ts):.1f}ms")
    print(f"  #Spikes: {len(nids)}")
    if use_aws_spike_assets and aws_out:
        spike_file = str(Path(sim_campaign["aws_path"]) / spike_file.relative_to(campaign_path))
        assert spike_file in aws_out, f"ERROR: Spike file '{spike_file}' not found on {AWS_S3}!"
        print(f"  AWS: {spike_file}")
    spike_files.append(spike_file)

    # Check voltage reports (if any)
    report_files = []
    for report in sim.reports.values():
        report_file = output_dir / (report.name + ".h5")
        report_node_set = report.node_set
        with h5py.File(report_file, "r") as h5:
            nids = h5[f"report/{npop}/mapping"]["node_ids"][:]
        assert np.all(np.isin(nids, c.nodes[npop].ids(report_node_set))), f"ERROR: Node ID mismatch in '{report_file}'!"
        print(f"  Report '{report.name}':")
        print(f"    Node set: {report_node_set} ({len(c.nodes[npop].ids(report_node_set))} neurons)")
        print(f"    Node IDs: {min(nids)}-{max(nids)} ({len(np.unique(nids))} recorded neurons)")
        print(f"    Length: {report.time_start}-{report.time_stop}ms")
        if use_aws_voltage_assets and aws_out:
            report_file = str(Path(sim_campaign["aws_path"]) / report_file.relative_to(campaign_path))
            assert report_file in aws_out, f"ERROR: Report file '{report_file}' not found on {AWS_S3}!"
            print(f"    AWS: {report_file}")
        report_files.append(report_file)
    voltage_reports.append(report_files)

    # Check input spikes (if any)
    inp_files = []
    for inp_name, inp in sim.inputs.items():
        if not inp.input_type == SimulationConfig.InputBase.InputType.spikes:
            continue
        inp_spike_file = inp.spike_file
        print(f"  Spike input '{inp_name}':")
        inp_pop = inp.reader.get_population_names()
        assert len(inp_pop) == 1, "ERROR: Only one input node population expected!"
        inp_pop = inp_pop[0]
        inp_node_set = inp.node_set
        assert inp_pop in c.nodes.population_names, f"ERROR: Input node population '{inp_pop}' not found in circuit!"
        print(f"    Source population: '{inp_pop}'")
        assert inp_node_set in c.node_sets.content, f"ERROR: Input node set '{inp_node_set}' not found in circuit!"
        print(f"    Target node set: '{inp_node_set}'")
        inp_nids, inp_ts, unique_inp_nids = read_spikes(inp_spike_file, inp_pop)
        assert np.all(np.isin(unique_inp_nids, c.nodes[inp_pop].ids())), f"ERROR: Input node ID mismatch in '{inp_spike_file}'!"
        print(f"    Node IDs: {min(inp_nids)}-{max(inp_nids)} ({len(unique_inp_nids)} spiking neurons)")
        print(f"    Time stamps {min(inp_ts):.1f}-{max(inp_ts):.1f}ms")
        print(f"    #Spikes: {len(inp_nids)}")
        if use_aws_replay_assets and aws_out:
            inp_sub = sim_campaign.get("aws_inputs", "")
            inp_spike_file = str(Path(sim_campaign["aws_path"]) / inp_sub / Path(inp_spike_file).relative_to(campaign_path))
            assert inp_spike_file in aws_out, f"ERROR: Replay file '{inp_spike_file}' not found on {AWS_S3}!"
            print(f"    AWS: {inp_spike_file}")
        inp_files.append(inp_spike_file)
    replay_files.append(inp_files)

    print()


SIMULATION 0
  Simulation length: 0-12000.0ms
  Population name: 'S1nonbarrel_neurons'
  Node set: hex_O1 (211712 neurons)
  Node IDs: 11-211711 (58922 spiking neurons)
  Time stamps 44.0-12000.0ms
  #Spikes: 514379
  AWS: Simulation_data/Simulation_Campaigns/Rat/SSCx/f132b97aa0/0/reporting/spikes.h5
  Report 'soma':
    Node set: hex0 (30190 neurons)
    Node IDs: 7-211708 (30190 recorded neurons)
    Length: 1400.0-12000.0ms
    AWS: Simulation_data/Simulation_Campaigns/Rat/SSCx/f132b97aa0/0/reporting/soma.h5
  Spike input 'Stimulus spikeReplay':
    Source population: 'VPM'
    Target node set: 'hex_O1'
    Node IDs: 2609-12416 (59 spiking neurons)
    Time stamps 1503.3-11509.4ms
    #Spikes: 1974
    AWS: Simulation_data/Simulation_Campaigns/Rat/SSCx/f132b97aa0/e6efa007-e7fa-42ae-ae3d-53385ec57154/0/vpm_input.h5



### 3. Check campaign config

In [11]:
# Check campaign config
campaign_config_file = campaign_path / sim_campaign["campaign_config"]
with open(campaign_config_file, "r") as f:
    campaign_config_dict = json.load(f)

In [12]:
# Campaign scan params dict
scan_params = {k: v["data"] for k, v in campaign_config_dict["coords"].items()}
scan_indices = {k: list(range(len(v))) for k, v in scan_params.items()}
print("Campaign param dict:")
print(scan_params)

Campaign param dict:
{'ca': [1.05], 'depol_stdev_mean_ratio': [0.4], 'desired_connected_proportion_of_invivo_frs': [0.3], 'vpm_pct': [10.0]}


In [13]:
# Single scan params list
single_params = list(product(*scan_params.values()))
single_indices = list(product(*scan_indices.values()))

In [14]:
# Check consistency with folders
for idx, scan_idx in enumerate(single_indices):
    data = campaign_config_dict["data"]
    for _i in scan_idx:
        data = data[_i]
    assert Path(data).name == str(idx), f"ERROR: Index {idx} mismatch ({scan_idx})!"

In [15]:
# Single scan param dicts
dims = campaign_config_dict["dims"]
assert list(scan_params.keys()) == dims, "ERROR: Dims mismtach!"
single_scan_params = [{dims[_d]: single_params[_i][_d] for _d in range(len(dims))} for _i in range(len(single_params))]
print("Single param dict(s):")
print(single_scan_params[0])
if len(single_scan_params) > 2:
    print("...")
if len(single_scan_params) > 1:
    print(single_scan_params[-1])

Single param dict(s):
{'ca': 1.05, 'depol_stdev_mean_ratio': 0.4, 'desired_connected_proportion_of_invivo_frs': 0.3, 'vpm_pct': 10.0}


### 4. Create and register `Simulation`, `SimulationCampaign`, and `SimulationResult` entities

In [16]:
authorized_public = False

In [86]:
token = get_token(environment="staging")
# project_context = ProjectContext.from_vlab_url("https://staging.openbraininstitute.org/app/virtual-lab/lab/1f91f30e-1489-4e2a-8eb7-1217257c8e19/project/7a411785-6895-4839-aaa2-d9f76e09875a/home")
project_context = ProjectContext.from_vlab_url("https://staging.openbraininstitute.org/app/virtual-lab/lab/e6030ed8-a589-4be2-80a6-f975406eb1f6/project/2720f785-a3a2-4472-969d-19a53891c817")
client = Client(environment="staging", project_context=project_context, token_manager=token)

In [18]:
# Get circuit entity
circuit_entity = client.search_entity(entity_type=models.Circuit, query={"name": sim_campaign["circuit"]}).one()
print(f"Circuit '{circuit_entity.name}' (ID {circuit_entity.id})")

Circuit 'nbS1-O1-SM' (ID 28049bf1-2082-4fa5-a6d6-540497da6d54)


In [19]:
# Create and register SimulationCampaign entity + asset (config.json)
campaign_model = models.SimulationCampaign(
    name=sim_campaign["campaign_name"],
    description=sim_campaign["campaign_description"],
    entity_id=circuit_entity.id,
    scan_parameters=scan_params,
    authorized_public=authorized_public,
)
campaign_entity = client.register_entity(campaign_model)
print(f"Registered SimulationCampaign '{campaign_entity.name}' (ID {campaign_entity.id})")

campaign_asset = client.upload_file(
    entity_id=campaign_entity.id,
    entity_type=models.SimulationCampaign,
    file_path=campaign_config_file,
    file_content_type="application/json",
    asset_label="campaign_summary",
)
print(f"Uploaded '{campaign_asset.label}' asset for campaign '{campaign_entity.name}' (ID {campaign_asset.id})")

Registered campaign 'BBP-SM-whisker-soma' (ID 431763dd-b629-4b64-80d2-0f09002b6d57)
Uploaded 'campaign_summary' asset for campaign 'BBP-SM-whisker-soma' (ID 4c7ded9a-9553-44c7-b992-95ec33dd412a)


In [20]:
# Create and register Simulation entities + asset (SONATA config) and SimulationResult + assets
for idx, param_dict in enumerate(single_scan_params):
    ###############################################################################################
    # Simulation
    ###############################################################################################
    sim_model = models.Simulation(
        name=f"Simulation {idx}",
        description=f"Simulation {idx}",
        scan_parameters=param_dict,
        entity_id=circuit_entity.id,
        simulation_campaign_id=campaign_entity.id,
        authorized_public=authorized_public,
    )

    sim_entity = client.register_entity(sim_model)
    print(f"Registered Simulation '{sim_entity.name}' (ID {sim_entity.id})")

    sim_cfg_asset = client.upload_file(
        entity_id=sim_entity.id,
        entity_type=models.Simulation,
        file_path=sim_configs[idx],
        file_content_type="application/json",
        asset_label="sonata_simulation_config",
    )
    print(f"Uploaded '{sim_cfg_asset.path}' as '{sim_cfg_asset.label}' asset (ID {sim_cfg_asset.id})")

    # REPLAY ASSETS
    asset_label = "replay_spikes"
    content_type = "application/x-hdf5"
    for repl in replay_files[idx]:
        if use_aws_replay_assets:
            sim_replay_asset = client.register_asset(
                entity_id=sim_entity.id,
                entity_type=models.Simulation,
                name=Path(repl).name,
                storage_path=repl,
                storage_type="aws_s3_open",
                is_directory=False,
                content_type=content_type,
                asset_label=asset_label,
            )
            msg = "Registered from AWS-S3"
        else:
            sim_replay_asset = client.upload_file(
                entity_id=sim_entity.id,
                entity_type=models.Simulation,
                file_path=repl,
                file_content_type=content_type,
                asset_label=asset_label,
            )
            msg = "Uploaded"
        print(f"  > {msg} '{sim_replay_asset.path}' as '{sim_replay_asset.label}' asset (ID {sim_replay_asset.id})")

    #
    # TODO: Add custom node sets, if any
    #

    ###############################################################################################
    # SimulationResult
    ###############################################################################################
    sim_res_model = models.SimulationResult(
        name="simulation_result",
        description="Simulation result",
        simulation_id=sim_entity.id,
        authorized_public=authorized_public,
    )
    sim_res_entity = client.register_entity(sim_res_model)
    print(f"Registered SimulationResult '{sim_res_entity.name}' (ID {sim_res_entity.id})")

    # SPIKE ASSET
    asset_label = "spike_report"
    content_type = "application/x-hdf5"
    if use_aws_spike_assets:
        res_spk_asset = client.register_asset(
            entity_id=sim_res_entity.id,
            entity_type=models.SimulationResult,
            name=Path(spike_files[idx]).name,
            storage_path=spike_files[idx],
            storage_type="aws_s3_open",
            is_directory=False,
            content_type=content_type,
            asset_label=asset_label,
        )
        msg = "Registered from AWS-S3"
    else:
        res_spk_asset = client.upload_file(
            entity_id=sim_res_entity.id,
            entity_type=models.SimulationResult,
            file_path=spike_files[idx],
            file_content_type=content_type,
            asset_label=asset_label,
        )
        msg = "Uploaded"
    print(f"  > {msg} '{res_spk_asset.path}' as '{res_spk_asset.label}' asset (ID {res_spk_asset.id})")

    # VOLTAGE REPORT ASSETS
    asset_label = "voltage_report"
    content_type = "application/x-hdf5"
    for vrep in voltage_reports[idx]:
        if use_aws_voltage_assets:
            res_vrep_asset = client.register_asset(
                entity_id=sim_res_entity.id,
                entity_type=models.SimulationResult,
                name=Path(vrep).name,
                storage_path=vrep,
                storage_type="aws_s3_open",
                is_directory=False,
                content_type=content_type,
                asset_label=asset_label,
            )
            msg = "Registered from AWS-S3"
        else:
            res_vrep_asset = client.upload_file(
                entity_id=sim_res_entity.id,
                entity_type=models.SimulationResult,
                file_path=vrep,
                file_content_type=content_type,
                asset_label=asset_label,
            )
            msg = "Uploaded"
        print(f"  > {msg} '{res_vrep_asset.path}' as '{res_vrep_asset.label}' asset (ID {res_vrep_asset.id})")

    ###############################################################################################
    # SimulationExecution(Activity): Required to retrieve a SimulationResult for a given Simulation
    ###############################################################################################

    now = datetime.now(UTC)
    sim_exec_model = models.SimulationExecution(
        start_time=now,
        end_time=now,
        used=[sim_entity],
        generated=[sim_res_entity],
        status=types.SimulationExecutionStatus.done,
    )
    sim_exec_entity = client.register_entity(sim_exec_model)
    print(f"Registered SimulationExecution (ID {sim_exec_entity.id})")
    print()

Registered 'Simulation 0' (ID 27a05ebf-9ef0-4c4c-af6f-25742118acd9)
Uploaded 'simulation_config.json' as 'sonata_simulation_config' asset (ID fd61f9c2-2681-4901-bb6d-09d3ed96d0fe)
Registered from AWS-S3 'vpm_input.h5' as 'replay_spikes' asset (ID 9b5c2c7e-f4f2-42dc-813f-434c41e7a20d)
Registered 'simulation_result' (ID 324c9d48-ffa0-45fa-b3ba-9456bdff6638)
Registered from AWS-S3 'spikes.h5' as 'spike_report' asset (ID 8d52a08c-de51-4a20-b227-1a3385518ab5)
Registered from AWS-S3 'soma.h5' as 'voltage_report' asset (ID 5d00bd7e-9869-4f2c-8a91-0698df385667)


### 5. Check registered campaign

In [172]:
registered_campaign = client.get_entity(entity_id=campaign_entity.id, entity_type=models.SimulationCampaign)
# OR
# registered_campaign = client.search_entity(entity_type=models.SimulationCampaign, query={"name": sim_campaign["campaign_name"]}).one()
print(f"Found campaign '{registered_campaign.name}' (ID {registered_campaign.id})")

Found campaign 'BBP-SM-whisker-soma' (ID 431763dd-b629-4b64-80d2-0f09002b6d57)
