In [21]:
import sys
sys.path.insert(0, r"C:\Users\PC\Desktop\graphglue - Copie - Copie")
from graphglue.core.incgraph import IncidenceGraph


In [23]:
# ---------- Setup ----------
G = IncidenceGraph(directed=True)
conditions = ["Healthy", "Stressed", "Disease"]
for c in conditions:
    G.add_layer(c, condition=c)

# Entities
proteins = [f"P{i}" for i in range(1, 151)]   # P1..P150
transcripts = [f"T{i}" for i in range(1, 61)]  # T1..T60 (treat as nodes)
enz_edge_entities = [f"edge_rxn_{i}" for i in range(1, 11)]  # edge-entities for reactions

# Seed some node attributes
for p in proteins[:10]:
    G.add_node(p, layer="Healthy", family="kinase")
for p in proteins[10:]:
    G.add_node(p, layer="Healthy")
for t in transcripts:
    G.add_node(t, layer="Healthy", kind="transcript")
for ee in enz_edge_entities:
    G.add_edge_entity(ee, layer="Healthy", role="enzyme")

# Propagate initial nodes to all layers (cheaply)
for lid in ["Stressed", "Disease"]:
    G._layers[lid]["nodes"].update(G._layers["Healthy"]["nodes"])

In [25]:
# ---------- Build PPI edges in all layers ----------
import random

def rand_weight(base=1.0, jitter=0.5):
    return max(0.05, base + (random.random() - 0.5) * 2 * jitter)

ppis = []
for _ in range(320):
    u, v = random.sample(proteins, 2)
    w = rand_weight(1.2, 0.6)
    e = G.add_edge(u, v, layer="Healthy", weight=w, edge_directed=False)
    ppis.append(e)

# Stress/disease layer variants (override per-layer weights)
for eid in ppis:
    # Stressed: mostly +10% with jitter
    G.add_edge_to_layer("Stressed", eid)
    G.set_edge_layer_attrs("Stressed", eid, weight=G.edge_weights[eid] * rand_weight(1.10, 0.1))
    # Disease: some edges get weaker; others stronger
    G.add_edge_to_layer("Disease", eid)
    factor = 0.7 if random.random() < 0.4 else 1.3
    G.set_edge_layer_attrs("Disease", eid, weight=G.edge_weights[eid] * rand_weight(factor, 0.15))

In [27]:
# ---------- Complexes as undirected hyperedges ----------
complexes = []
for _ in range(12):
    members = set(random.sample(proteins, random.choice([3, 4, 5])))
    hid = G.add_hyperedge(members=members, layer="Healthy", weight=rand_weight(1.0, 0.2))
    complexes.append(hid)
    # complex exists in all layers (same membership)
    for lid in ["Stressed", "Disease"]:
        G.add_edge_to_layer(lid, hid)

In [29]:
# ---------- Directed signaling cascades as hyperedges ----------
cascades = []
while len(cascades) < 8:
    head = set(random.sample(proteins, random.choice([1, 2])))
    tail = set(random.sample(proteins, random.choice([2, 3, 4])))
    if head & tail:
        continue  # resample until disjoint
    hid = G.add_hyperedge(head=head, tail=tail, layer="Healthy", weight=rand_weight(1.0, 0.4))
    cascades.append(hid)
    for lid in ["Stressed", "Disease"]:
        G.add_edge_to_layer(lid, hid)

In [31]:
# ---------- Reactions connecting nodes to edge-entities ----------
for ee in enz_edge_entities:
    s, t = random.sample(proteins, 2)
    G.add_edge(s, ee, layer="Healthy", edge_type="node_edge", weight=1.0 + random.random())
    G.add_edge(ee, t, layer="Healthy", edge_type="node_edge", weight=1.0 + random.random())
    # propagate across layers
    for lid in ["Stressed", "Disease"]:
        G._layers[lid]["edges"].update(G._layers["Healthy"]["edges"])

In [33]:
# ---------- Basic sanity ----------
print("Nodes:", G.number_of_nodes(), "Edges:", G.number_of_edges())

# Only true "nodes" are counted by number_of_nodes() (proteins + transcripts)
expected_nodes = len(set(proteins)) + len(set(transcripts))  # 150 + 60 = 210
assert G.number_of_nodes() >= expected_nodes, f"Expected ≥{expected_nodes}, got {G.number_of_nodes()}"

# Edge-entities are tracked as entity_type == 'edge' (not included in number_of_nodes)
edge_entity_ids = set(enz_edge_entities)
edge_entity_count = sum(1 for _id, et in G.entity_types.items() if et == "edge" and _id in edge_entity_ids)
assert edge_entity_count == len(edge_entity_ids), f"Expected {len(edge_entity_ids)} edge-entities, got {edge_entity_count}"

# Edges: PPIs (320) + complexes (12) + cascades (8) + reaction links (10*2) = 360 minimum
assert G.number_of_edges() >= 320 + 12 + 8 + (10 * 2)


Nodes: 210 Edges: 360


In [35]:
# ---------- Views & top edges by condition ----------
import polars as pl

for cond in conditions:
    EV = G.edges_view(layer=cond, resolved_weight=True)
    print(f"[{cond}] edges_view rows =", EV.height)
    top = (
        EV
        .filter(pl.col("kind") == "binary")
        .sort("effective_weight", descending=True)
        .select(["edge_id", "source", "target", "effective_weight"])
        .head(5)
    )
    print(f"\nTop 5 binary edges by effective_weight in {cond}:")
    print(top)

[Healthy] edges_view rows = 360

Top 5 binary edges by effective_weight in Healthy:
shape: (5, 4)
┌──────────┬─────────────┬────────────┬──────────────────┐
│ edge_id  ┆ source      ┆ target     ┆ effective_weight │
│ ---      ┆ ---         ┆ ---        ┆ ---              │
│ str      ┆ str         ┆ str        ┆ f64              │
╞══════════╪═════════════╪════════════╪══════════════════╡
│ edge_349 ┆ edge_rxn_5  ┆ P16        ┆ 1.976784         │
│ edge_357 ┆ edge_rxn_9  ┆ P5         ┆ 1.967949         │
│ edge_342 ┆ P95         ┆ edge_rxn_2 ┆ 1.934452         │
│ edge_344 ┆ P133        ┆ edge_rxn_3 ┆ 1.869871         │
│ edge_359 ┆ edge_rxn_10 ┆ P5         ┆ 1.863954         │
└──────────┴─────────────┴────────────┴──────────────────┘
[Stressed] edges_view rows = 360

Top 5 binary edges by effective_weight in Stressed:
shape: (5, 4)
┌──────────┬─────────────┬────────────┬──────────────────┐
│ edge_id  ┆ source      ┆ target     ┆ effective_weight │
│ ---      ┆ ---         ┆ ---     

In [37]:
# ---------- Layer analytics ----------
stats = G.layer_statistics()
print("\nLayer stats:", stats)

conserved = G.conserved_edges(min_layers=3)  # present in all 3 conditions
print("\nConserved edges (in all conditions):", len(conserved))

disease_specific = G.layer_specific_edges("Disease")
print("Disease-specific edges:", len(disease_specific))

changes = G.temporal_dynamics(["Healthy", "Stressed", "Disease"], metric="edge_change")
print("\nTemporal edge changes (Healthy→Stressed→Disease):", changes)
assert len(changes) == 2


Layer stats: {'Healthy': {'nodes': 220, 'edges': 360, 'attributes': {'condition': 'Healthy'}}, 'Stressed': {'nodes': 220, 'edges': 360, 'attributes': {'condition': 'Stressed'}}, 'Disease': {'nodes': 220, 'edges': 360, 'attributes': {'condition': 'Disease'}}}

Conserved edges (in all conditions): 360
Disease-specific edges: 0

Temporal edge changes (Healthy→Stressed→Disease): [{'added': 0, 'removed': 0, 'net_change': 0}, {'added': 0, 'removed': 0, 'net_change': 0}]


In [39]:
# ---------- Presence queries ----------
some_e = next(iter(G.edge_to_idx.keys()))
print("\nEdge presence for", some_e, ":", G.edge_presence_across_layers(edge_id=some_e))
some_p = random.choice(proteins)
print("Node presence for", some_p, ":", G.node_presence_across_layers(some_p))


Edge presence for edge_0 : ['Healthy', 'Stressed', 'Disease']
Node presence for P87 : ['Healthy', 'Stressed', 'Disease']


In [41]:
# ---------- Traversal checks ----------
q = random.choice(proteins)
print(f"\nNeighbors({q}) =>", G.neighbors(q)[:10])
print(f"Out({q}) =>", G.out_neighbors(q)[:10])
print(f"In({q}) =>", G.in_neighbors(q)[:10])


Neighbors(P110) => ['P55', 'P120', 'P65', 'P88', 'P98']
Out(P110) => ['P65', 'P55', 'P98']
In(P110) => ['P55', 'P120', 'P65', 'P88', 'P98']


In [43]:
# ---------- Subgraph slice & copy ----------
H = G.subgraph_from_layer("Disease", resolve_layer_weights=True)
assert set(H.nodes()).issubset(set(G.nodes()))
assert set(H.edges()).issubset(set(G.edges()))
print("\nDisease subgraph: nodes =", H.number_of_nodes(), "edges =", H.number_of_edges())

K = G.copy()
assert set(K.nodes()) == set(G.nodes())
assert set(K.edges()) == set(G.edges())
# hyperedge shape preserved
any_hyper = next(e for e,k in G.edge_kind.items() if k == "hyper")
assert K.edge_kind.get(any_hyper) == "hyper"
# layer sets preserved
for lid in G.list_layers(include_default=True):
    assert K._layers[lid]["nodes"] == G._layers[lid]["nodes"]
    assert K._layers[lid]["edges"] == G._layers[lid]["edges"]
print("copy() OK")


Disease subgraph: nodes = 210 edges = 360
copy() OK


In [44]:
# ---------- Remove operations stress ----------
to_drop_nodes = random.sample(proteins, 5)
for n in to_drop_nodes:
    if n in G.entity_to_idx:
        G.remove_node(n)
print("\nAfter removing 5 proteins: nodes =", G.number_of_nodes(), "edges =", G.number_of_edges())

to_drop_edges = list(G.edge_to_idx.keys())[:10]
for eid in to_drop_edges:
    if eid in G.edge_to_idx:
        G.remove_edge(eid)
print("After removing 10 edges: nodes =", G.number_of_nodes(), "edges =", G.number_of_edges())


After removing 5 proteins: nodes = 205 edges = 333
After removing 10 edges: nodes = 205 edges = 323


In [47]:
# ---------- Audit & memory ----------
audit = G.audit_attributes()
print("\nAudit:", audit)
mem_bytes = G.memory_usage()
print("Approx memory usage (bytes):", int(mem_bytes))
assert mem_bytes > 0

print("\nReality-check finished ✅")


Audit: {'extra_node_rows': ['edge_rxn_5', 'edge_rxn_9', 'edge_rxn_3', 'edge_rxn_8', 'edge_rxn_1', 'edge_rxn_2', 'edge_rxn_6', 'edge_rxn_4', 'edge_rxn_10', 'edge_rxn_7'], 'extra_edge_rows': [], 'missing_node_rows': [], 'missing_edge_rows': ['edge_167', 'edge_102', 'edge_197', 'edge_198', 'edge_279', 'edge_195', 'edge_277', 'edge_287', 'edge_328', 'edge_27', 'edge_278', 'edge_242', 'edge_35', 'edge_202', 'edge_18', 'edge_235', 'edge_210', 'edge_226', 'edge_284', 'edge_216', 'edge_79', 'edge_101', 'edge_127', 'edge_266', 'edge_324', 'edge_31', 'edge_311', 'edge_132', 'edge_34', 'edge_317', 'edge_105', 'edge_58', 'edge_275', 'edge_133', 'edge_74', 'edge_269', 'edge_144', 'edge_248', 'edge_12', 'edge_296', 'edge_115', 'edge_151', 'edge_59', 'edge_135', 'edge_314', 'edge_162', 'edge_350', 'edge_199', 'edge_303', 'edge_89', 'edge_228', 'edge_224', 'edge_158', 'edge_146', 'edge_352', 'edge_131', 'edge_36', 'edge_221', 'edge_90', 'edge_161', 'edge_353', 'edge_16', 'edge_192', 'edge_99', 'edge_