In [1]:
"""Get all imports."""
import csv
import json
import time

import httpx
from bmt import Toolkit

from gandalf import (
    CSRGraph,
    build_graph_from_jsonl,
    compare_trapi_messages,
    debug_missing_edge,
    enrich_knowledge_graph,
    lookup,
    validate_trapi_response,
)
from gandalf.search import (
    do_one_hop,
)

t0 = time.perf_counter()
bmt = Toolkit()
t1 = time.perf_counter()
print(f"BMT Initialization took: {t1 - t0} seconds.")


BMT Initialization took: 1.4295143750059651 seconds.


In [None]:
"""Build a new graph from jsonl files. Saves to pickle."""

# Build graph from edges and nodes
graph = build_graph_from_jsonl(
    edge_jsonl_path="../02_04_2026/edges.jsonl",
    node_jsonl_path="../02_04_2026/nodes.jsonl",
)

# Save for fast reloading
graph.save_mmap("../02_04_2026/gandalf_mmap")
print("Graph saved!")

Pass 1: Collecting vocabularies from ../02_04_2026/edges.jsonl...
  1,000,000 edges scanned...
  2,000,000 edges scanned...
  3,000,000 edges scanned...
  4,000,000 edges scanned...
  5,000,000 edges scanned...
  6,000,000 edges scanned...
  7,000,000 edges scanned...
  8,000,000 edges scanned...
  9,000,000 edges scanned...
  10,000,000 edges scanned...
  11,000,000 edges scanned...
  12,000,000 edges scanned...
  13,000,000 edges scanned...
  14,000,000 edges scanned...
  15,000,000 edges scanned...
  16,000,000 edges scanned...
  17,000,000 edges scanned...
  18,000,000 edges scanned...
  19,000,000 edges scanned...
  20,000,000 edges scanned...
  21,000,000 edges scanned...
  22,000,000 edges scanned...
  23,000,000 edges scanned...
  24,000,000 edges scanned...
  25,000,000 edges scanned...
  26,000,000 edges scanned...
  27,000,000 edges scanned...
  28,000,000 edges scanned...
  29,000,000 edges scanned...
  30,000,000 edges scanned...
  31,000,000 edges scanned...
  32,000,000 

In [2]:
"""Load in a pickled graph."""

# Load saved graph (takes ~1-2 seconds instead of 280!)
graph = CSRGraph.load_mmap("../02_04_2026/gandalf_mmap")

Loading graph from ../02_04_2026/gandalf_mmap (mmap_mode=r)...
  Edge property dedup: 38,037,368 edges -> 574 unique source configs, 3,639 unique qualifier combos
  LMDB detail store: ../02_04_2026/gandalf_mmap/edge_properties.lmdb
Graph loaded in 3.39s (edge_properties: 0.02s)
  1,661,857 nodes, 38,037,368 edges, 52 predicates


In [None]:
CSRGraph.save_mmap(directory="../12_17_2025/gandalf_mmap")

TypeError: CSRGraph.save_mmap() missing 1 required positional argument: 'directory'

In [None]:
"""Just set the start and end nodes you're looking for."""
start_node = "CHEBI:45783"  # Imatinib
end_node = "MONDO:0004979"  # Asthma
start_idx = graph.get_node_idx(start_node)
end_idx = graph.get_node_idx(end_node)

start_idx = graph.get_node_idx(start_node)
end_idx = graph.get_node_idx(end_node)

print(f"Start degree: {graph.degree(start_idx):,}")
print(f"End degree: {len(graph.incoming_neighbors(end_idx)):,}")

In [7]:
valid_predicates = []
for predicate in ["biolink:treats",
        "biolink:affects",
        "biolink:regulates",
        "biolink:increases_expression_of",
        "biolink:decreases_expression_of",
        "biolink:gene_associated_with_condition",
        "biolink:has_metabolite",
        "biolink:metabolized_by",
        "biolink:applied_to_treat",
        "biolink:contraindicated_for",
        "biolink:directly_physically_interacts_with",
        "biolink:has_contraindication",
        "biolink:subject_of_treatment_application_or_study_for_treatment_by",
        "biolink:contribution_from"]:
    if bmt.get_element(predicate) is not None:
        valid_predicates.append(predicate)

print(valid_predicates)

['biolink:treats', 'biolink:affects', 'biolink:regulates', 'biolink:gene_associated_with_condition', 'biolink:has_metabolite', 'biolink:applied_to_treat', 'biolink:directly_physically_interacts_with', 'biolink:has_contraindication', 'biolink:subject_of_treatment_application_or_study_for_treatment_by', 'biolink:contribution_from']


In [9]:
"""Do Pathfinder Imatinib to Asthma."""

t0 = time.perf_counter()

pathfinder_results = lookup(graph, {
  "message": {
    "query_graph": {
      "nodes": {
        "n0": {
          "ids": ["CHEBI:45783"]
        },
        "n1": {},
        "n2": {},
        "n3": {
          "ids": ["MONDO:0004979"]
        }
      },
      "edges": {
        "e0": {
          "subject": "n0",
          "object": "n1",
          "predicates": ["biolink:related_to"]
        },
        "e1": {
          "subject": "n1",
          "object": "n2",
          "predicates": ["biolink:related_to"]
        },
        "e2": {
          "subject": "n2",
          "object": "n3",
          "predicates": ["biolink:related_to"]
        }
      }
    }
  }
}, bmt)

t1 = time.perf_counter()
print(f"Pathfinder returned {len(pathfinder_results['message']['results'])} results in {t1 - t0}s")

# result = validate_trapi_response(graph, pathfinder_results)
# print(result.summary())

Using provided BMT instance
Rewriting query graph for subclass expansion (depth=1)
Query graph: 6 nodes, 5 edges

Processing edge 'n0_subclass_edge': n0 -> n0_superclass
  Subclass traversal: found 1 matches (depth=1)
  Found 1 matching edges
  Remaining edges: 4

Processing edge 'e0': n0 -> n1
  Query predicates: ['biolink:related_to']
  Expanded to 0 forward, 0 inverse predicates
  Forward search from 1 pinned nodes
  [GC] Gen 1: 2.07s, collected 0 objects
  [GC] Gen 2: 4.15s, collected 0 objects
  Traversed 6,776 total neighbors
  Slow nodes (>0.1s): 1
    Node 151747: 6,776 neighbors, 6.22s
  Found 6,776 matches in 6.221s
  Found 6,776 matching edges
  Remaining edges: 3

Processing edge 'n3_subclass_edge': n3 -> n3_superclass
  Subclass traversal: found 20 matches (depth=1)
  Found 20 matching edges
  Remaining edges: 2

Processing edge 'e2': n2 -> n3
  Query predicates: ['biolink:related_to']
  Expanded to 0 forward, 0 inverse predicates
  Backward search from 20 pinned nodes
  T

In [31]:
result = debug_missing_edge(graph, "NCBIGene:596", "biolink:affects", "CHEBI:30742")
print(result)

# print(diagnose_graph_edge_storage(graph, "GO:0022408"))

Debug report for missing edge:
  Expected: NCBIGene:596 --[biolink:affects]--> CHEBI:30742

Predicate info:
  Predicate: biolink:affects
  Is symmetric: False
  Inverse: (none)

Node existence:
  Subject 'NCBIGene:596': EXISTS (idx=431713)
  Object 'CHEBI:30742': EXISTS (idx=141868)

Edges found between these nodes:
  [forward] NCBIGene:596 --[biolink:affects]--> CHEBI:30742
  [forward] NCBIGene:596 --[biolink:affects]--> CHEBI:30742
  [reverse] CHEBI:30742 --[biolink:affects]--> NCBIGene:596
  [reverse] CHEBI:30742 --[biolink:affects]--> NCBIGene:596
  [reverse] CHEBI:30742 --[biolink:affects]--> NCBIGene:596

Expected matches (any of these would validate):
  1. Forward: NCBIGene:596 --[biolink:affects]--> CHEBI:30742

Subject 'NCBIGene:596' neighbors (first 10):
  -> CHEBI:133021 via ['biolink:interacts_with']
  -> CHEBI:16991 via ['biolink:coexists_with', 'biolink:interacts_with']
  -> CHEBI:16991 via ['biolink:coexists_with', 'biolink:interacts_with']
  -> CHEBI:18070 via ['biolink

In [29]:
import json
start_node = "CHEBI:45783"
paths = do_one_hop(graph, start_node)

print(len(paths))

path_obj = []
for path in paths:
    o = graph.get_node_id(path)
    path_obj.append((start_node, o))

with open("imatinib.json", "w", encoding="utf-8") as f:
    json.dump(path_obj, f, indent=2)


6395


In [5]:
"""Check all edges for number of neighbors."""

line_count = 0
edges = []

with open("../02_04_2026/edges.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        if line_count % 1_000_000 == 0:
            print(f"  Processed {line_count:,} edges...")

        # if line_count < 26_000_000:
        #     line_count += 1
        #     continue

        data = json.loads(line)
        # if (
        #     data["predicate"] == "biolink:affects" and
        #     (
        #         data["subject"] == "NCBIGene:2277" or
        #         data["object"] == "NCBIGene:2277"
        #     ) and
        #     (
        #         data["object"] == "CHEBI:30742" or
        #         data["subject"] == "CHEBI:30742"
        #     )
        # ):
        #     edges.append(data)
        # if line_count % 1000 == 0:
        #     edges.append(data)

        if (
            "qualifiers" in data
        ):
            edges.append(data)

        # if data["predicate"] == "biolink:genetic_association":
        #     if (
        #         "biolink:Gene" in nodes[data["subject"]]["category"] and
        #         "biolink:DiseaseOrPhenotypicFeature" in nodes[data["object"]]["category"]
        #     ):
        #         print(data)
        #         break
        # if line_count % 10_000 == 0:
        #     edges.append(data)
        # if data["subject"] == "MONDO:0007186" and data["object"] == "MONDO:0007186":
        #     neighbors += 1
        #     edges.append(data)
        # if data["subject"] == "MONDO:0004979" or data["object"] == "MONDO:0004979":
        #     neighbors += 1
        line_count += 1
        if len(edges) > 1000:
            break

with open("./sample_edges.json", "w", encoding="utf-8") as f:
    json.dump(edges, f, indent=2)


  Processed 0 edges...
  Processed 1,000,000 edges...
  Processed 2,000,000 edges...
  Processed 3,000,000 edges...
  Processed 4,000,000 edges...
  Processed 5,000,000 edges...
  Processed 6,000,000 edges...
  Processed 7,000,000 edges...
  Processed 8,000,000 edges...
  Processed 9,000,000 edges...
  Processed 10,000,000 edges...
  Processed 11,000,000 edges...
  Processed 12,000,000 edges...
  Processed 13,000,000 edges...
  Processed 14,000,000 edges...
  Processed 15,000,000 edges...
  Processed 16,000,000 edges...
  Processed 17,000,000 edges...
  Processed 18,000,000 edges...
  Processed 19,000,000 edges...
  Processed 20,000,000 edges...
  Processed 21,000,000 edges...
  Processed 22,000,000 edges...
  Processed 23,000,000 edges...
  Processed 24,000,000 edges...
  Processed 25,000,000 edges...
  Processed 26,000,000 edges...
  Processed 27,000,000 edges...
  Processed 28,000,000 edges...
  Processed 29,000,000 edges...
  Processed 30,000,000 edges...
  Processed 31,000,000 edg

In [None]:
lookup_paths = lookup(graph, {
    "message": {
      "query_graph": {
        "nodes": {
          "SN": {
            "categories": [
              "biolink:ChemicalEntity"
            ]
          },
          "ON": {
            "ids": [
              "MONDO:0007186"
            ],
            "categories": [
              "biolink:DiseaseOrPhenotypicFeature"
            ]
          },
          "e": {
            "categories": [
              "biolink:ChemicalEntity"
            ]
          },
          "i": {
            "categories": [
              "biolink:BiologicalEntity"
            ]
          }
        },
        "edges": {
          "edge_0": {
            "subject": "e",
            "object": "ON",
            "predicates": [
              "biolink:treats"
            ]
          },
          "edge_1": {
            "subject": "i",
            "object": "SN",
            "predicates": [
              "biolink:affects"
            ],
            "qualifier_constraints": [
              {
                "qualifier_set": [
                  {
                    "qualifier_type_id": "biolink:object_aspect_qualifier",
                    "qualifier_value": "activity_or_abundance"
                  },
                  {
                    "qualifier_type_id": "biolink:object_direction_qualifier",
                    "qualifier_value": "decreased"
                  }
                ]
              }
            ]
          },
          "edge_2": {
            "subject": "i",
            "object": "e",
            "predicates": [
              "biolink:affects"
            ],
            "qualifier_constraints": [
              {
                "qualifier_set": [
                  {
                    "qualifier_type_id": "biolink:object_aspect_qualifier",
                    "qualifier_value": "activity_or_abundance"
                  },
                  {
                    "qualifier_type_id": "biolink:object_direction_qualifier",
                    "qualifier_value": "decreased"
                  }
                ]
              }
            ]
          }
        }
      }
    }
  }, bmt)

with open("../12_17_2025/heartburn_response.json", "w") as f:
    json.dump(lookup_paths, f, indent=2)

Using provided BMT instance
Rewriting query graph for subclass expansion (depth=1)
Query graph: 3 nodes, 2 edges

Processing edge 'ON_subclass_edge': ON -> ON_superclass
  Subclass traversal: found 2 matches (depth=1)
  Found 2 matching edges
  Remaining edges: 1

Processing edge 'edge_0': e -> ON
  Query predicates: ['biolink:treats']
  Expanded to 3 forward, 0 inverse predicates
  Backward search from 2 pinned nodes
  Traversed 4,954 total incoming neighbors
  Found 200 matches in 0.011s
  Found 200 matching edges
  Remaining edges: 0

Reconstructing complete paths...
  Join order: ['ON_subclass_edge', 'edge_0']
  Starting with 2 paths from edge 'ON_subclass_edge'
  Join 1/1: Adding edge 'edge_0' (2 paths)... -> 200 paths (0.00s)
  Path reconstruction took 0.00s
  Enriching 200 paths...
  Cached properties for 151 unique nodes (0.01s)
  Enrichment took 0.07s
Found 200 complete paths
  Grouped into 150 unique node paths (0.00s)
  Built 150 results (0.00s)
  Post-processing total: 0.00

In [7]:
import httpx

with open("./expanded_messages.json", "r", encoding="utf-8") as f:
    messages = json.load(f)

all_results = 0
for indx, message in enumerate(messages):
    message["parameters"]["tiers"] = [1]
    t0 = time.perf_counter()
    try:
        with httpx.Client(timeout=60) as client:
            print(message)
            response = client.post(
                "https://automat.renci.org/translatorkg/query",
                json=message,
            )
            response.raise_for_status()
            response = response.json()
            with open(f"heartburn_responses/response_neo4j_{indx}.json", "w", encoding="utf-8") as f:
                json.dump(response, f, indent=2)
            num_results = len((response.get("message") or {}).get("results") or [])
            result = validate_trapi_response(graph, response)
            print(result.summary())
    except httpx.ReadTimeout:
        print("Timed out")
        num_results = 0
    except httpx.HTTPError as e:
        print("Got bad response:", response.content)
        num_results = 0
    print(f"Returned {num_results} results")
    all_results += num_results
    t1 = time.perf_counter()
    print(f"Query took {t1 - t0} seconds")

print("Total results:", all_results)

{'message': {'query_graph': {'nodes': {'ON': {'categories': ['biolink:Disease'], 'ids': ['MONDO:0007186']}, 'SN': {'categories': ['biolink:ChemicalEntity']}}, 'edges': {'t_edge': {'subject': 'SN', 'object': 'ON', 'predicates': ['biolink:treats']}}}}, 'parameters': {'tiers': [1], 'timeout': 210}}
Validation PASSED
  Total paths: 150
  Valid paths: 150
  Invalid paths: 0
Returned 150 results
Query took 0.7852157919999172 seconds
{'message': {'query_graph': {'nodes': {'SN': {'categories': ['biolink:ChemicalEntity']}, 'ON': {'ids': ['MONDO:0007186'], 'categories': ['biolink:DiseaseOrPhenotypicFeature']}, 'g': {'categories': ['biolink:Gene']}}, 'edges': {'edge_0': {'subject': 'g', 'object': 'SN', 'predicates': ['biolink:affects_response_to']}, 'edge_1': {'subject': 'SN', 'object': 'ON', 'predicates': ['biolink:treats_or_applied_or_studied_to_treat']}, 'edge_2': {'subject': 'g', 'object': 'ON', 'predicates': ['biolink:genetic_association']}}}}, 'parameters': {'tiers': [1], 'timeout': 210}, '

In [6]:
with open("./expanded_messages.json", "r", encoding="utf-8") as f:
    messages = json.load(f)

all_results = 0
for indx, message in enumerate(messages):
    t0 = time.perf_counter()
    print(message)
    response = lookup(graph, message, bmt, subclass=False)
    with open(f"heartburn_responses/response_gandalf_{indx}.json", "w", encoding="utf-8") as f:
        json.dump(response, f, indent=2)
    all_results += len(response["message"]["results"])
    # if len(response["message"]["results"]) > 0:
    #     with open("lookup_paths.json", "w", encoding="utf-8") as f:
    #         json.dump(response, f, indent=2)
    result = validate_trapi_response(graph, response)
    print(result.summary())
    t1 = time.perf_counter()
    print(f"Query took {t1 - t0} seconds")

print("Total results:", all_results)

{'message': {'query_graph': {'nodes': {'ON': {'categories': ['biolink:Disease'], 'ids': ['MONDO:0007186']}, 'SN': {'categories': ['biolink:ChemicalEntity']}}, 'edges': {'t_edge': {'subject': 'SN', 'object': 'ON', 'predicates': ['biolink:treats']}}}}, 'parameters': {'tiers': [0], 'timeout': 210}}
Using provided BMT instance
Query graph: 2 nodes, 1 edges

Processing edge 't_edge': SN -> ON
  Query predicates: ['biolink:treats']
  Expanded to 3 forward, 0 inverse predicates
  Backward search from 1 pinned nodes
  Traversed 4,935 total incoming neighbors
  Found 200 matches in 0.015s
  Found 200 matching edges
  Remaining edges: 0

Reconstructing complete paths...
  Join order: ['t_edge']
  Starting with 200 paths from edge 't_edge'
  Path reconstruction took 0.00s
  Enriching 200 paths...
  Cached properties for 151 unique nodes (0.00s)
  Enrichment took 0.00s
Found 200 complete paths
  Grouped into 150 unique node paths (0.00s)
  Built 150 results (0.00s)
  Post-processing total: 0.00s
V

In [42]:
lookup_paths = lookup(graph, {"message": {"query_graph": {
      "nodes": {
        "SN": {
          "categories": [
            "biolink:ChemicalEntity"
          ]
        },
        "ON": {
          "ids": [
            "MONDO:0007186"
          ],
          "categories": [
            "biolink:DiseaseOrPhenotypicFeature"
          ]
        },
        "e": {
          "categories": [
            "biolink:ChemicalEntity"
          ]
        },
        "i": {
          "categories": [
            "biolink:BiologicalEntity"
          ]
        }
      },
      "edges": {
        "edge_0": {
          "subject": "e",
          "object": "ON",
          "predicates": [
            "biolink:treats"
          ]
        },
        "edge_1": {
          "subject": "i",
          "object": "SN",
          "predicates": [
            "biolink:affects"
          ],
          "qualifier_constraints": [
            {
              "qualifier_set": [
                {
                  "qualifier_type_id": "biolink:object_aspect_qualifier",
                  "qualifier_value": "activity_or_abundance"
                },
                {
                  "qualifier_type_id": "biolink:object_direction_qualifier",
                  "qualifier_value": "decreased"
                }
              ]
            }
          ]
        },
        "edge_2": {
          "subject": "i",
          "object": "e",
          "predicates": [
            "biolink:affects"
          ],
          "qualifier_constraints": [
            {
              "qualifier_set": [
                {
                  "qualifier_type_id": "biolink:object_aspect_qualifier",
                  "qualifier_value": "activity_or_abundance"
                },
                {
                  "qualifier_type_id": "biolink:object_direction_qualifier",
                  "qualifier_value": "decreased"
                }
              ]
            }
          ]
        }
      }
    }}}, bmt)

with open("lookup_paths.json", "w", encoding="utf-8") as f:
    json.dump(lookup_paths, f, indent=2)

result = validate_trapi_response(graph, lookup_paths)
print(result.summary())

Using provided BMT instance
Rewriting query graph for subclass expansion (depth=1)
Query graph: 5 nodes, 4 edges

Processing edge 'ON_subclass_edge': ON -> ON_superclass
  Subclass traversal: found 2 matches (depth=1)
  Found 2 matching edges
  Remaining edges: 3

Processing edge 'edge_0': e -> ON
  Query predicates: ['biolink:treats']
  Expanded to 3 forward, 0 inverse predicates
  Backward search from 2 pinned nodes
  Traversed 4,954 total incoming neighbors
  Found 152 matches in 0.028s
  Found 152 matching edges
  Remaining edges: 2

Processing edge 'edge_2': i -> e
  Query predicates: ['biolink:affects']
  Expanded to 10 forward, 0 inverse predicates
  Backward search from 150 pinned nodes
  Traversed 11,591 total incoming neighbors
  Found 21 matches in 0.067s
  Found 21 matching edges
  Remaining edges: 1

Processing edge 'edge_1': i -> SN
  Query predicates: ['biolink:affects']
  Expanded to 10 forward, 0 inverse predicates
  Forward search from 20 pinned nodes
  Traversed 26,4

In [1]:
asprin_to_colorectral_cancer = lookup(graph, {
  "message": {
    "query_graph": {
      "nodes": {
        "n0": {
          "ids": ["CHEBI:15365"]
        },
        "n1": {},
        "n2": {},
        "n3": {
          "ids": ["MONDO:0005575"]
        }
      },
      "edges": {
        "e0": {
          "subject": "n0",
          "object": "n1",
          "predicates": ["biolink:related_to"]
        },
        "e1": {
          "subject": "n1",
          "object": "n2",
          "predicates": ["biolink:treats", "biolink:affects", "biolink:regulates", "biolink:gene_associated_with_condition", "biolink:has_metabolite", "biolink:applied_to_treat", "biolink:directly_physically_interacts_with", "biolink:has_contraindication", "biolink:subject_of_treatment_application_or_study_for_treatment_by", "biolink:contribution_from"]
        },
        "e2": {
          "subject": "n2",
          "object": "n3",
          "predicates": ["biolink:related_to"]
        }
      }
    }
  }
}, bmt)

NameError: name 'lookup' is not defined

In [None]:
"""Do small query."""
message = {
    "message": {
      "query_graph": {
        "nodes": {
        #   "SN": {
        #     "categories": [
        #       "biolink:ChemicalEntity"
        #     ]
        #   },
          "ON": {
            "ids": [
              "MONDO:0007186"
            ],
            "categories": [
              "biolink:DiseaseOrPhenotypicFeature"
            ]
          },
          "e": {
            "categories": [
              "biolink:ChemicalEntity"
            ]
          },
          "i": {
            # "ids": ["NCBIGene:29072"],
            "categories": [
              "biolink:BiologicalEntity"
            ]
          }
        },
        "edges": {
          "edge_0": {
            "subject": "e",
            "object": "ON",
            "predicates": [
              "biolink:treats"
            ]
          },
        #   "edge_1": {
        #     "subject": "i",
        #     "object": "SN",
        #     "predicates": [
        #       "biolink:affects"
        #     ],
        #     "qualifier_constraints": [
        #       {
        #         "qualifier_set": [
        #           {
        #             "qualifier_type_id": "biolink:object_aspect_qualifier",
        #             "qualifier_value": "activity_or_abundance"
        #           },
        #           {
        #             "qualifier_type_id": "biolink:object_direction_qualifier",
        #             "qualifier_value": "decreased"
        #           }
        #         ]
        #       }
        #     ]
        #   },
          "edge_2": {
            "subject": "i",
            "object": "e",
            "predicates": [
              "biolink:affects"
            ],
            "qualifier_constraints": [
              {
                "qualifier_set": [
                  {
                    "qualifier_type_id": "biolink:object_aspect_qualifier",
                    "qualifier_value": "activity_or_abundance"
                  },
                  {
                    "qualifier_type_id": "biolink:object_direction_qualifier",
                    "qualifier_value": "decreased"
                  }
                ]
              }
            ]
          }
        }
      }
    }
  }

t0 = time.perf_counter()

automat_results = {}
try:
    with httpx.Client(timeout=60) as client:
        response = client.post(
            "https://automat.renci.org/translatorkg/query",
            json=message,
        )
        response.raise_for_status()
        automat_results = response.json()
        num_results = len((automat_results.get("message") or {}).get("results") or [])
        result = validate_trapi_response(graph, automat_results)
        print(result.summary())
except httpx.ReadTimeout:
    print("Timed out")
    num_results = 0
except httpx.HTTPError as e:
    print("Got bad response:", response.content)
    num_results = 0

t1 = time.perf_counter()
print(f"Automat returned {len(automat_results['message']['results'])} results in {t1 - t0}s")

with open("automat_heartburn_response.json", "w", encoding="utf-8") as f:
    json.dump(automat_results, f, indent=2)

t0 = time.perf_counter()
gandalf_results = lookup(graph, message, bmt, subclass=True)

t1 = time.perf_counter()
print(f"Gandalf returned {len(gandalf_results['message']['results'])} results in {t1 - t0}s")

with open("gandalf_heartburn_response.json", "w", encoding="utf-8") as f:
    json.dump(gandalf_results, f, indent=2)

missing_results = compare_trapi_messages(automat_results["message"], gandalf_results["message"])

for missing_result in missing_results:
    print(missing_result)

# result = validate_trapi_response(graph, pathfinder_results)
# print(result.summary())

Validation PASSED
  Total paths: 2
  Valid paths: 2
  Invalid paths: 0
Automat returned 2 results in 0.6051752500061411s
Using provided BMT instance
Rewriting query graph for subclass expansion (depth=1)
Query graph: 5 nodes, 4 edges

Processing edge 'ON_subclass_edge': ON -> ON_superclass
  Subclass traversal: found 2 matches (depth=1)
  Found 2 matching edges
  Remaining edges: 3

Processing edge 'i_subclass_edge': i -> i_superclass
  Subclass traversal: found 10 matches (depth=1)
  Found 10 matching edges
  Remaining edges: 2

Processing edge 'edge_0': e -> ON
  Query predicates: ['biolink:treats']
  Expanded to 3 forward, 0 inverse predicates
  Backward search from 2 pinned nodes
  Traversed 4,954 total incoming neighbors
  Found 200 matches in 0.007s
  Found 200 matching edges
  Remaining edges: 1

Processing edge 'edge_2': i -> e
  Query predicates: ['biolink:affects']
  Expanded to 10 forward, 0 inverse predicates
  Both ends pinned: 10 start, 150 end
    Neighbor traversal: 0.0

In [16]:
with open("/Users/mwang/CoVar/translator/biopack/shepherd/scripts/aragorn/CHEBI_45783_MONDO_0004979_response.json", "r", encoding="utf-8") as f:
    message = json.load(f)

hydrated_response = enrich_knowledge_graph(message, graph)

with open("./hydrated_response.json", "w", encoding="utf-8") as f:
    json.dump(hydrated_response, f, indent=2)
