# Manual patching edge files to add missing HDF5 datasets

The following datasets (size N = number of edges) will be added:
- `edge_type_id`: All values -1
- `edge_group_id`: All values 0
- `edge_group_index`: Values 0..N


In [1]:
import h5py
import numpy as np
import os
import shutil

from bluepysnap import Circuit

In [2]:
circuit_config = "/Users/pokorny/JupyterLab/git/obi-output/containerized_EM_circuits/MICrONS__debug/circuit_config.json"
# circuit_config = "/Users/pokorny/JupyterLab/git/obi-output/containerized_EM_circuits/MICrONS/circuit_config.json"
# circuit_config = "/Users/pokorny/JupyterLab/git/obi-output/containerized_EM_circuits/H01/circuit_config.json"

In [5]:
c = Circuit(circuit_config)
edge_populations = c.edges.population_names
edge_files = {}
for edge_pop in edge_populations:
    edge_file = c.edges[edge_pop].h5_filepath
    with h5py.File(edge_file, "r") as h5:
        if "edge_type_id" not in h5["edges"][edge_pop].keys():
            edge_files[edge_pop] = edge_file
print(f"{len(edge_files)} edge files to patch:")
for _, edge_file in edge_files.items():
    print(f"  {edge_file}")

3 edge files to patch:
  /Users/pokorny/JupyterLab/git/obi-output/containerized_EM_circuits/MICrONS__debug/microns_to_sonata_edges/0/extrinsic_edges.h5
  /Users/pokorny/JupyterLab/git/obi-output/containerized_EM_circuits/MICrONS__debug/microns_to_sonata_edges/0/intrinsic_edges.h5
  /Users/pokorny/JupyterLab/git/obi-output/containerized_EM_circuits/MICrONS__debug/microns_to_sonata_edges/0/virtual_edges.h5


In [6]:
# Patch edge files
for edge_pop, edge_file in edge_files.items():
    print(f"Patching population '{edge_pop}'...")
    
    # Make copy of edge file
    fn, fext = os.path.splitext(edge_file)
    edge_file_bak = fn + "__BAK__" + fext
    assert not os.path.exists(edge_file_bak), f"ERROR: Backup file '{edge_file_bak}' already exists!"
    shutil.copy(edge_file, edge_file_bak)

    # Load edge file
    h5 = h5py.File(edge_file, "r+")
    root = h5["edges"][edge_pop]
    
    num_edges = len(np.array(root["source_node_id"]))
    print(f"{num_edges} edges")

    # Create datasets
    edge_type_id = np.full(num_edges, -1)
    edge_group_id = np.zeros(num_edges)
    edge_group_index = np.arange(num_edges)

    # Add datasets to h5 file
    root.create_dataset("edge_type_id", data=edge_type_id, dtype='i8')
    root.create_dataset("edge_group_id", data=edge_group_id, dtype='i8')
    root.create_dataset("edge_group_index", data=edge_group_index, dtype='u8')

    h5.close()

    # Check again by re-loading h5 file
    with h5py.File(edge_file, "r") as h5:
        _edge_type_id = np.array(h5["edges"][edge_pop]["edge_type_id"])
        _edge_group_id = np.array(h5["edges"][edge_pop]["edge_group_id"])
        _edge_group_index = np.array(h5["edges"][edge_pop]["edge_group_index"])
    
    print("edge_type_id: " + str(_edge_type_id))
    print("edge_group_id:" + str(_edge_group_id))
    print("edge_group_index:" + str(_edge_group_index))

    assert np.array_equal(_edge_type_id, edge_type_id), "ERROR: edge_type_id mismatch!"
    assert np.array_equal(_edge_group_id, edge_group_id), "ERROR: edge_group_id mismatch!"
    assert np.array_equal(_edge_group_index, edge_group_index), "ERROR: edge_group_index mismatch!"

    print("...DONE")
    print()

Patching population 'em_extrinsic__microns_intrinsic__chemical'...
231537 edges
edge_type_id: [-1 -1 -1 ... -1 -1 -1]
edge_group_id:[0 0 0 ... 0 0 0]
edge_group_index:[     0      1      2 ... 231534 231535 231536]
...DONE

Patching population 'microns_intrinsic__microns_intrinsic__chemical'...
11556253 edges
edge_type_id: [-1 -1 -1 ... -1 -1 -1]
edge_group_id:[0 0 0 ... 0 0 0]
edge_group_index:[       0        1        2 ... 11556250 11556251 11556252]
...DONE

Patching population 'virtual_microns_intrinsic__microns_intrinsic__chemical'...
655840 edges
edge_type_id: [-1 -1 -1 ... -1 -1 -1]
edge_group_id:[0 0 0 ... 0 0 0]
edge_group_index:[     0      1      2 ... 655837 655838 655839]
...DONE



In [7]:
# Check if loading with SNAP is OK (if not, .size will raise an error)
c = Circuit(circuit_config)
print(circuit_config)
print(f"{c.nodes.size} nodes in total")
print(f"{c.edges.size} edges in total")

/Users/pokorny/JupyterLab/git/obi-output/containerized_EM_circuits/MICrONS__debug/circuit_config.json
246972 nodes in total
12443630 edges in total
