In [None]:
"""Get all imports."""
import csv
import time

from gandalf import (
    CSRGraph,
    analyze_node_types,
    analyze_predicates,
    build_graph_from_jsonl,
    diagnose_path_explosion,
    find_meaningful_paths,
    find_mechanistic_paths,
)
from gandalf.search import find_3hop_paths_with_properties


In [None]:
"""Build a new graph from jsonl files. Saves to pickle."""

# Build graph from edges (and optionally nodes)
graph = build_graph_from_jsonl(
    "edges.jsonl",
    undirected=True,
    remove_duplicates=True,
    remove_self_loops=True,
    node_jsonl_path="nodes.jsonl",  # Optional: if you have a separate nodes file
)

# Save for fast reloading
graph.save("my_graph.pkl")
print("Graph saved!")

In [None]:
"""Load in a pickled graph."""

# Load saved graph (takes ~1-2 seconds instead of 280!)
graph = CSRGraph.load("my_graph.pkl")

In [None]:
"""Just set the start and end nodes you're looking for."""
start_node = "CHEBI:45783"  # Imatinib
end_node = "MONDO:0004979"  # Asthma

In [None]:
"""Get initial node degrees."""

start_idx = graph.get_node_idx(start_node)
end_idx = graph.get_node_idx(end_node)

print(f"Start degree: {graph.degree(start_idx):,}")
print(f"End degree: {graph.degree(end_idx):,}")

In [None]:
"""Generate paths."""

start_time = time.time()
print(f"\nFinding 3-hop paths from {start_node} to {end_node}...")
paths = find_3hop_paths_with_properties(graph, start_node, end_node)
print(f"Finding paths with properties took {time.time() - start_time}")

In [None]:
"""Get meaningful or mechanistic paths."""

# print("\n=== EXCLUDING ONTOLOGY HIERARCHIES ===")
# paths_meaningful = find_meaningful_paths(graph, start_node, end_node)
# print(f"Meaningful paths: {len(paths_meaningful):,}")

start_time = time.time()
print("\n=== MECHANISTIC ONLY ===")
paths_mechanistic = find_mechanistic_paths(graph, start_node, end_node)
print(f"Mechanistic paths: {len(paths_mechanistic):,}")
print(f"Mechanistic paths took: {time.time() - start_time}")

In [None]:
"""Run all the diagnostics."""

# Run diagnosis
results = diagnose_path_explosion(graph, start_node, end_node)

# Analyze what's in the paths
analyze_node_types(graph, start_node, end_node)
analyze_predicates(graph, start_node, end_node)

In [None]:
"""Save output of the current paths."""

with open("imatinib_asthma_maxgraph_output_full_6M.tsv", "w") as f:
    tsv_writer = csv.writer(f, delimiter="\t")

    tsv_header = [
        "path",
        "categories",
        "first_hop_predicates",
        "second_hop_predicates",
        "third_hop_predicates",
        "path_curies",
    ]
    tsv_writer.writerow(tsv_header)

    for path in paths:
        tsv_writer.writerow([
            f"{path['n0']['name']}->{path['n1']['name']}->{path['n2']['name']}->{path['n3']['name']}",
            f"{path['n0']['category']}->{path['n1']['category']}->{path['n2']['category']}->{path['n3']['category']}",
            path['e0']['predicate'],
            path['e1']['predicate'],
            path['e2']['predicate'],
            f"{path['n0']['id']}->{path['n1']['id']}->{path['n2']['id']}->{path['n3']['id']}",
        ])