In [1]:
import sys
sys.path.insert(0, r"C:\Users\PC\Desktop\graphglue - Copie - Copie")
from graphglue.core.incgraph import IncidenceGraph


In [3]:
# ---------- Setup ----------
G = IncidenceGraph(directed=True)
conditions = ["Healthy", "Stressed", "Disease"]
for c in conditions:
    G.add_layer(c, condition=c)

# Entities
proteins = [f"P{i}" for i in range(1, 151)]   # P1..P150
transcripts = [f"T{i}" for i in range(1, 61)]  # T1..T60 (treat as nodes)
enz_edge_entities = [f"edge_rxn_{i}" for i in range(1, 11)]  # edge-entities for reactions

# Seed some node attributes
for p in proteins[:10]:
    G.add_node(p, layer="Healthy", family="kinase")
for p in proteins[10:]:
    G.add_node(p, layer="Healthy")
for t in transcripts:
    G.add_node(t, layer="Healthy", kind="transcript")
for ee in enz_edge_entities:
    G.add_edge_entity(ee, layer="Healthy", role="enzyme")

# Propagate initial nodes to all layers (cheaply)
for lid in ["Stressed", "Disease"]:
    G._layers[lid]["nodes"].update(G._layers["Healthy"]["nodes"])

In [5]:
# ---------- Build PPI edges in all layers ----------
import random

def rand_weight(base=1.0, jitter=0.5):
    return max(0.05, base + (random.random() - 0.5) * 2 * jitter)

ppis = []
for _ in range(320):
    u, v = random.sample(proteins, 2)
    w = rand_weight(1.2, 0.6)
    e = G.add_edge(u, v, layer="Healthy", weight=w, edge_directed=False)
    ppis.append(e)

# Stress/disease layer variants (override per-layer weights)
for eid in ppis:
    # Stressed: mostly +10% with jitter
    G.add_edge_to_layer("Stressed", eid)
    G.set_edge_layer_attrs("Stressed", eid, weight=G.edge_weights[eid] * rand_weight(1.10, 0.1))
    # Disease: some edges get weaker; others stronger
    G.add_edge_to_layer("Disease", eid)
    factor = 0.7 if random.random() < 0.4 else 1.3
    G.set_edge_layer_attrs("Disease", eid, weight=G.edge_weights[eid] * rand_weight(factor, 0.15))

In [7]:
# ---------- Complexes as undirected hyperedges ----------
complexes = []
for _ in range(12):
    members = set(random.sample(proteins, random.choice([3, 4, 5])))
    hid = G.add_hyperedge(members=members, layer="Healthy", weight=rand_weight(1.0, 0.2))
    complexes.append(hid)
    # complex exists in all layers (same membership)
    for lid in ["Stressed", "Disease"]:
        G.add_edge_to_layer(lid, hid)

In [9]:
# ---------- Directed signaling cascades as hyperedges ----------
cascades = []
while len(cascades) < 8:
    head = set(random.sample(proteins, random.choice([1, 2])))
    tail = set(random.sample(proteins, random.choice([2, 3, 4])))
    if head & tail:
        continue  # resample until disjoint
    hid = G.add_hyperedge(head=head, tail=tail, layer="Healthy", weight=rand_weight(1.0, 0.4))
    cascades.append(hid)
    for lid in ["Stressed", "Disease"]:
        G.add_edge_to_layer(lid, hid)

In [11]:
# ---------- Reactions connecting nodes to edge-entities ----------
for ee in enz_edge_entities:
    s, t = random.sample(proteins, 2)
    G.add_edge(s, ee, layer="Healthy", edge_type="node_edge", weight=1.0 + random.random())
    G.add_edge(ee, t, layer="Healthy", edge_type="node_edge", weight=1.0 + random.random())
    # propagate across layers
    for lid in ["Stressed", "Disease"]:
        G._layers[lid]["edges"].update(G._layers["Healthy"]["edges"])

In [13]:
# ---------- Basic sanity ----------
print("Nodes:", G.number_of_nodes(), "Edges:", G.number_of_edges())

# Only true "nodes" are counted by number_of_nodes() (proteins + transcripts)
expected_nodes = len(set(proteins)) + len(set(transcripts))  # 150 + 60 = 210
assert G.number_of_nodes() >= expected_nodes, f"Expected ≥{expected_nodes}, got {G.number_of_nodes()}"

# Edge-entities are tracked as entity_type == 'edge' (not included in number_of_nodes)
edge_entity_ids = set(enz_edge_entities)
edge_entity_count = sum(1 for _id, et in G.entity_types.items() if et == "edge" and _id in edge_entity_ids)
assert edge_entity_count == len(edge_entity_ids), f"Expected {len(edge_entity_ids)} edge-entities, got {edge_entity_count}"

# Edges: PPIs (320) + complexes (12) + cascades (8) + reaction links (10*2) = 360 minimum
assert G.number_of_edges() >= 320 + 12 + 8 + (10 * 2)


Nodes: 210 Edges: 360


In [15]:
# ---------- Views & top edges by condition ----------
import polars as pl

for cond in conditions:
    EV = G.edges_view(layer=cond, resolved_weight=True)
    print(f"[{cond}] edges_view rows =", EV.height)
    top = (
        EV
        .filter(pl.col("kind") == "binary")
        .sort("effective_weight", descending=True)
        .select(["edge_id", "source", "target", "effective_weight"])
        .head(5)
    )
    print(f"\nTop 5 binary edges by effective_weight in {cond}:")
    print(top)

[Healthy] edges_view rows = 360

Top 5 binary edges by effective_weight in Healthy:
shape: (5, 4)
┌──────────┬────────────┬────────────┬──────────────────┐
│ edge_id  ┆ source     ┆ target     ┆ effective_weight │
│ ---      ┆ ---        ┆ ---        ┆ ---              │
│ str      ┆ str        ┆ str        ┆ f64              │
╞══════════╪════════════╪════════════╪══════════════════╡
│ edge_344 ┆ P119       ┆ edge_rxn_3 ┆ 1.925049         │
│ edge_356 ┆ P13        ┆ edge_rxn_9 ┆ 1.870316         │
│ edge_345 ┆ edge_rxn_3 ┆ P131       ┆ 1.823379         │
│ edge_349 ┆ edge_rxn_5 ┆ P23        ┆ 1.800912         │
│ edge_134 ┆ P27        ┆ P72        ┆ 1.793373         │
└──────────┴────────────┴────────────┴──────────────────┘
[Stressed] edges_view rows = 360

Top 5 binary edges by effective_weight in Stressed:
shape: (5, 4)
┌──────────┬────────────┬────────────┬──────────────────┐
│ edge_id  ┆ source     ┆ target     ┆ effective_weight │
│ ---      ┆ ---        ┆ ---        ┆ ---      

In [17]:
# ---------- Layer analytics ----------
stats = G.layer_statistics()
print("\nLayer stats:", stats)

conserved = G.conserved_edges(min_layers=3)  # present in all 3 conditions
print("\nConserved edges (in all conditions):", len(conserved))

disease_specific = G.layer_specific_edges("Disease")
print("Disease-specific edges:", len(disease_specific))

changes = G.temporal_dynamics(["Healthy", "Stressed", "Disease"], metric="edge_change")
print("\nTemporal edge changes (Healthy→Stressed→Disease):", changes)
assert len(changes) == 2


Layer stats: {'Healthy': {'nodes': 220, 'edges': 360, 'attributes': {'condition': 'Healthy'}}, 'Stressed': {'nodes': 220, 'edges': 360, 'attributes': {'condition': 'Stressed'}}, 'Disease': {'nodes': 220, 'edges': 360, 'attributes': {'condition': 'Disease'}}}

Conserved edges (in all conditions): 360
Disease-specific edges: 0

Temporal edge changes (Healthy→Stressed→Disease): [{'added': 0, 'removed': 0, 'net_change': 0}, {'added': 0, 'removed': 0, 'net_change': 0}]


In [19]:
# ---------- Presence queries ----------
some_e = next(iter(G.edge_to_idx.keys()))
print("\nEdge presence for", some_e, ":", G.edge_presence_across_layers(edge_id=some_e))
some_p = random.choice(proteins)
print("Node presence for", some_p, ":", G.node_presence_across_layers(some_p))


Edge presence for edge_0 : ['Healthy', 'Stressed', 'Disease']
Node presence for P132 : ['Healthy', 'Stressed', 'Disease']


In [21]:
# ---------- Traversal checks ----------
q = random.choice(proteins)
print(f"\nNeighbors({q}) =>", G.neighbors(q)[:10])
print(f"Out({q}) =>", G.out_neighbors(q)[:10])
print(f"In({q}) =>", G.in_neighbors(q)[:10])


Neighbors(P10) => ['P43', 'P19', 'P73', 'P37', 'P46', 'P146', 'P124', 'P142', 'P71', 'P118']
Out(P10) => ['P43', 'P19', 'P73', 'P37', 'P46', 'P146', 'P124', 'P142', 'P71', 'P118']
In(P10) => ['P73', 'P43', 'P19', 'P37', 'P118', 'P142', 'P46', 'P25']


In [23]:
# ---------- Subgraph slice & copy ----------
H = G.subgraph_from_layer("Disease", resolve_layer_weights=True)
assert set(H.nodes()).issubset(set(G.nodes()))
assert set(H.edges()).issubset(set(G.edges()))
print("\nDisease subgraph: nodes =", H.number_of_nodes(), "edges =", H.number_of_edges())

K = G.copy()
assert set(K.nodes()) == set(G.nodes())
assert set(K.edges()) == set(G.edges())
# hyperedge shape preserved
any_hyper = next(e for e,k in G.edge_kind.items() if k == "hyper")
assert K.edge_kind.get(any_hyper) == "hyper"
# layer sets preserved
for lid in G.list_layers(include_default=True):
    assert K._layers[lid]["nodes"] == G._layers[lid]["nodes"]
    assert K._layers[lid]["edges"] == G._layers[lid]["edges"]
print("copy() OK")


Disease subgraph: nodes = 210 edges = 360
copy() OK


In [25]:
# ---------- Remove operations stress ----------
to_drop_nodes = random.sample(proteins, 5)
for n in to_drop_nodes:
    if n in G.entity_to_idx:
        G.remove_node(n)
print("\nAfter removing 5 proteins: nodes =", G.number_of_nodes(), "edges =", G.number_of_edges())

to_drop_edges = list(G.edge_to_idx.keys())[:10]
for eid in to_drop_edges:
    if eid in G.edge_to_idx:
        G.remove_edge(eid)
print("After removing 10 edges: nodes =", G.number_of_nodes(), "edges =", G.number_of_edges())


After removing 5 proteins: nodes = 205 edges = 339
After removing 10 edges: nodes = 205 edges = 329


In [27]:
# ---------- Audit & memory ----------
audit = G.audit_attributes()
print("\nAudit:", audit)
mem_bytes = G.memory_usage()
print("Approx memory usage (bytes):", int(mem_bytes))
assert mem_bytes > 0

print("\nReality-check finished ✅")


Audit: {'extra_node_rows': ['edge_rxn_7', 'edge_rxn_3', 'edge_rxn_6', 'edge_rxn_5', 'edge_rxn_1', 'edge_rxn_2', 'edge_rxn_10', 'edge_rxn_9', 'edge_rxn_8', 'edge_rxn_4'], 'extra_edge_rows': [], 'missing_node_rows': [], 'missing_edge_rows': ['edge_283', 'edge_15', 'edge_105', 'edge_53', 'edge_147', 'edge_133', 'edge_148', 'edge_249', 'edge_122', 'edge_336', 'edge_357', 'edge_305', 'edge_170', 'edge_58', 'edge_356', 'edge_125', 'edge_166', 'edge_224', 'edge_326', 'edge_32', 'edge_277', 'edge_61', 'edge_342', 'edge_98', 'edge_219', 'edge_210', 'edge_191', 'edge_108', 'edge_67', 'edge_266', 'edge_154', 'edge_142', 'edge_215', 'edge_168', 'edge_41', 'edge_36', 'edge_113', 'edge_49', 'edge_104', 'edge_43', 'edge_131', 'edge_114', 'edge_308', 'edge_331', 'edge_95', 'edge_188', 'edge_203', 'edge_116', 'edge_227', 'edge_298', 'edge_74', 'edge_153', 'edge_291', 'edge_16', 'edge_232', 'edge_284', 'edge_146', 'edge_82', 'edge_19', 'edge_24', 'edge_204', 'edge_228', 'edge_307', 'edge_121', 'edge_31

In [29]:
events = G.history()           # list[dict]
df = G.history(as_df=True)     # Polars DF [DataFrame]

In [31]:
print(df.head())
events[:3]

shape: (5, 10)
┌─────────┬─────────────────────────────┬─────────┬─────────────────┬───┬────────┬─────────┬─────────┬────────────┐
│ version ┆ ts_utc                      ┆ mono_ns ┆ op              ┆ … ┆ result ┆ node_id ┆ layer   ┆ attributes │
│ ---     ┆ ---                         ┆ ---     ┆ ---             ┆   ┆ ---    ┆ ---     ┆ ---     ┆ ---        │
│ i64     ┆ str                         ┆ i64     ┆ str             ┆   ┆ str    ┆ str     ┆ str     ┆ struct[1]  │
╞═════════╪═════════════════════════════╪═════════╪═════════════════╪═══╪════════╪═════════╪═════════╪════════════╡
│ 1       ┆ 2025-10-04T17:28:39.150961Z ┆ 2782600 ┆ set_layer_attrs ┆ … ┆ null   ┆ null    ┆ null    ┆ null       │
│ 2       ┆ 2025-10-04T17:28:39.150961Z ┆ 3163600 ┆ set_layer_attrs ┆ … ┆ null   ┆ null    ┆ null    ┆ null       │
│ 3       ┆ 2025-10-04T17:28:39.151953Z ┆ 3573300 ┆ set_layer_attrs ┆ … ┆ null   ┆ null    ┆ null    ┆ null       │
│ 4       ┆ 2025-10-04T17:28:39.152987Z ┆ 4569100 ┆ add_n

[{'version': 1,
  'ts_utc': '2025-10-04T17:28:39.150961Z',
  'mono_ns': 2782600,
  'op': 'set_layer_attrs',
  'layer_id': 'Healthy',
  'attrs': {'condition': 'Healthy'},
  'result': None},
 {'version': 2,
  'ts_utc': '2025-10-04T17:28:39.150961Z',
  'mono_ns': 3163600,
  'op': 'set_layer_attrs',
  'layer_id': 'Stressed',
  'attrs': {'condition': 'Stressed'},
  'result': None},
 {'version': 3,
  'ts_utc': '2025-10-04T17:28:39.151953Z',
  'mono_ns': 3573300,
  'op': 'set_layer_attrs',
  'layer_id': 'Disease',
  'attrs': {'condition': 'Disease'},
  'result': None}]

In [33]:
import sys, pathlib

# add repo root to Python path
sys.path.append(str(pathlib.Path.cwd().parent))

# replace `yourpkg` with the actual package name (folder name containing core/io)
from graphglue.io import csv as graph_csv

In [35]:
csv1_path = "csv1_edges.csv"
pl.DataFrame({
    "source": ["A","A","B","C","D"],
    "target": ["B","C","C","D","A"],
    "weight": [1, 2, 3, 1, 5],
    "directed": [True, True, False, True, True],
    "layer": ["L1","L1","L1","L2","L2"],
}).write_csv(csv1_path)

pl.read_csv(csv1_path).head()


source,target,weight,directed,layer
str,str,i64,bool,str
"""A""","""B""",1,True,"""L1"""
"""A""","""C""",2,True,"""L1"""
"""B""","""C""",3,False,"""L1"""
"""C""","""D""",1,True,"""L2"""
"""D""","""A""",5,True,"""L2"""


In [45]:
G = graph_csv.load_csv_to_graph(
    csv1_path,
    schema="auto",            # or 'edge_list'/'incidence'/'adjacency'/'hyperedge'/'lil'
    default_layer=None,       # fallback if no layer column is present
    default_directed=None,    # fallback if no directed column and cannot infer
    default_weight=1.0,
)

# Quick sanity: show first rows of an edges view (columns depend on your IncidenceGraph implementation)
edges = G.edges_view(layer=None, include_directed=True, resolved_weight=True)
edges.head()


edge_id,kind,directed,global_weight,effective_weight,source,target,edge_type
str,str,bool,f64,f64,str,str,str
"""edge_0""","""binary""",True,1.0,1.0,"""A""","""B""","""regular"""
"""edge_1""","""binary""",True,2.0,2.0,"""A""","""C""","""regular"""
"""edge_2""","""binary""",True,3.0,3.0,"""B""","""C""","""regular"""
"""edge_3""","""binary""",True,1.0,1.0,"""C""","""D""","""regular"""
"""edge_4""","""binary""",True,5.0,5.0,"""D""","""A""","""regular"""


In [47]:
# Count entities and edges (attribute names based on your class; adjust if different)
num_entities = len(G.entity_index)        # nodes + edge-entities
num_edges    = len(G.edge_index)          # binary + hyper

print("entities:", num_entities, "edges:", num_edges)

# A light “degree” summary from edges_view for binary edges only (skip hyper)
df = G.edges_view(include_directed=True, resolved_weight=True, with_attributes=False)

# Try to find endpoint column names robustly
cols = {c.lower(): c for c in df.columns}
# Common possibilities:
src_col = next((cols[c] for c in ["source","src","u","from"]), None)
dst_col = next((cols[c] for c in ["target","dst","v","to"]), None)

if src_col and dst_col:
    # out-degree (directed) / degree (undirected)
    out_deg = df.group_by(src_col).len().rename({src_col: "node", "len": "out_degree"})
    in_deg  = df.group_by(dst_col).len().rename({dst_col: "node", "len": "in_degree"})
    deg = out_deg.join(in_deg, on="node", how="outer").fill_null(0)
    deg = deg.with_columns((pl.col("out_degree")+pl.col("in_degree")).alias("total_degree"))
    deg.sort("total_degree", descending=True).head(10)
else:
    print("Skip degree summary: endpoint columns not found in edges_view output (likely hyperedge-only or different schema).")


AttributeError: 'IncidenceGraph' object has no attribute 'entity_index'

In [49]:
# Add a new node and an edge on layer L3
G.add_node("E")
eid = G.add_edge("E", "A", layer="L3", directed=True, weight=2.5)

# Per-layer weight override example:
G.set_edge_layer_attrs("L3", eid, weight=3.0)

# Inspect the updated edges
G.edges_view(include_directed=True, resolved_weight=True).tail()


edge_id,kind,directed,global_weight,effective_weight,source,target,edge_type
str,str,bool,f64,f64,str,str,str
"""edge_1""","""binary""",True,2.0,2.0,"""A""","""C""","""regular"""
"""edge_2""","""binary""",True,3.0,3.0,"""B""","""C""","""regular"""
"""edge_3""","""binary""",True,1.0,1.0,"""C""","""D""","""regular"""
"""edge_4""","""binary""",True,5.0,5.0,"""D""","""A""","""regular"""
"""edge_5""","""binary""",True,2.5,2.5,"""E""","""A""","""regular"""


In [53]:
csv2_path = "csv2_edges_view.csv"
G.edges_view(layer=None, include_directed=True, resolved_weight=True).write_csv(csv2_path)
csv2_path


'csv2_edges_view.csv'

In [57]:
def export_edge_list_csv(G, path, layer=None):
    df = G.edges_view(layer=layer, include_directed=True, resolved_weight=True)
    cols = {c.lower(): c for c in df.columns}
    src_col = next((cols[c] for c in ["source","src","u","from"]), None)
    dst_col = next((cols[c] for c in ["target","dst","v","to"]), None)
    dir_col = next((cols[c] for c in ["directed"]), None)
    w_eff   = next((cols[c] for c in ["effective_weight","weight","w"]), None)

    if not (src_col and dst_col):
        raise ValueError("No binary endpoint columns found; the view may be hyperedge-only. Try the generic edges_view export.")

    out = pl.DataFrame({
        "source": df[src_col],
        "target": df[dst_col],
        "weight": df[w_eff] if w_eff else pl.Series([1.0]*df.height),
        "directed": df[dir_col] if dir_col in df.columns else pl.Series([None]*df.height),
        "layer": pl.Series([layer]*df.height) if layer else pl.Series([None]*df.height),
    })
    out.write_csv(path)

# Usage:
csv2_edge_list_path = "csv2_edge_list.csv"
export_edge_list_csv(G, csv2_edge_list_path, layer=None)
csv2_edge_list_path


'csv2_edge_list.csv'

In [59]:
# In-memory look at last few events
hist = G.history(as_df=True)   # DF [DataFrame]
hist.tail()

# Save to Parquet/CSV/JSON [JavaScript Object Notation]/NDJSON [Newline-Delimited JSON]
G.export_history("graph_history.parquet")


24