In [1]:
import networkx as nx
import networkit as nk

from pathlib import Path
from panricci import RicciFlow
from panricci.distributions.variation_graph import DistributionNodes
# from panricci.distributions.sequence_graph import DistributionNodes
from panricci.utils import GFALoader

In [2]:
dirsave_graphs = "../output/ricci-flow-examples-2"

In [6]:
# DATA:
# path_gfa = "../data/test1.gfa"
# path_chkpt = "../output/test3/ricci-flow/test3-ricciflow-5.edgelist"

path_gfa = "/data/analysis-paper/experiments/mini-experiment-standard-not_alpha_consistent-not_fixblock/gfa-unchop/nodes/penalization0-min_len0-min_coverage0-alpha1/DQA1-3117.gfa"
path_chkpt = "../output/graph-alignment/ricci-flow/DQA1-3117-alpha1-ricciflow-5.edgelist"

# path_gfa = "/data/analysis-paper/experiments/mini-experiment-standard-not_alpha_consistent-not_fixblock/gfa-unchop/nodes/penalization0-min_len0-min_coverage0-alpha10/DQA1-3117.gfa"
# path_chkpt = "../output/graph-alignment/ricci-flow/DQA1-3117-alpha10-ricciflow-5.edgelist"



___
## Apply Ricci-Flow to a Variation Graph

In [7]:
# load graph
gfa_loader = GFALoader(undirected=False)
G = gfa_loader(path_gfa)

# compute distribution of nodes
distribution = DistributionNodes(G, alpha=0.5)

# Initialize ricci-flow
ricci_flow = RicciFlow(G, distribution, dirsave_graphs=dirsave_graphs, save_last=False, save_intermediate_graphs=True)
G_ricci = ricci_flow.run(iterations=15, name=Path(path_gfa).stem)

RicciFlow:   0%|          | 0/15 [00:00<?, ?it/s]

RicciFlow: 100%|██████████| 15/15 [00:10<00:00,  1.40it/s]


In [8]:
G_ricci.edges.data()

OutEdgeDataView([('1', '1305', {'curvature': 0.0, 'weight': 0.5}), ('2', '909', {'curvature': 0.0, 'weight': 0.5}), ('3', '1182', {'curvature': 0.0, 'weight': 0.5}), ('3', '1439', {'curvature': 0.0, 'weight': 0.5}), ('4', '1055', {'curvature': 0.0, 'weight': 0.5}), ('4', '1464', {'curvature': 0.0, 'weight': 0.5}), ('5', '1474', {'curvature': 0.0, 'weight': 0.5}), ('6', '409', {'curvature': 0.0, 'weight': 0.5}), ('7', '293', {'curvature': 0.0, 'weight': 0.5}), ('7', '878', {'curvature': 0.0, 'weight': 0.5}), ('8', '664', {'curvature': 0.0, 'weight': 0.5}), ('9', '10', {'curvature': 0.0, 'weight': 0.5}), ('9', '968', {'curvature': 0.0, 'weight': 0.5}), ('10', '946', {'curvature': 0.0, 'weight': 0.5}), ('11', '351', {'curvature': 0.0, 'weight': 0.5}), ('12', '13', {'curvature': 0.0, 'weight': 0.5}), ('13', '14', {'curvature': 0.0, 'weight': 0.5}), ('13', '488', {'curvature': 0.0, 'weight': 0.5}), ('13', '378', {'curvature': 0.0, 'weight': 0.5}), ('14', '462', {'curvature': 0.0, 'weight': 

____
## plot embeddings


In [None]:
import seaborn as sns
import pandas as pd
from panricci.alignment.node_embeddings import NodeEmbeddings

compute_node_embeddings = NodeEmbeddings(
                                        ricci_embedding=True, # two coordinates considering shortest path [d(source, node),d(node,sink)]  
                                        seq_embedding=False   # 2 x 4^k coordinates, k-mer distributions for sequence spelled by the shortest paths between (source,node) and (node,sink) 
                                        )
node_emb = compute_node_embeddings(G_ricci)
coords = pd.DataFrame.from_dict(node_emb,orient="index",columns=["from_source","to_sink"])
sns.scatterplot(coords, x="from_source",y="to_sink",)

___

## Results

**Checkpoints can be loaded**

In [None]:
from sklearn.cluster import DBSCAN

In [None]:
G_chkpt = gfa_loader(path_gfa, path_chkpt)

In [None]:
node_emb = compute_node_embeddings(G_chkpt)
coords = pd.DataFrame.from_dict(node_emb,orient="index",columns=["from_source","to_sink"])
sns.scatterplot(coords, x="to_sink",y="from_source",)

In [None]:
# Cluster coordinates using DBSCAN
X = coords
clustering = DBSCAN(eps=1.5, min_samples=5).fit(X)
coords["cluster"] = clustering.labels_
coords.reset_index(names="node", inplace=True)
# coords.to_csv("alpha50-labels.csv")

In [None]:
sns.scatterplot(coords, x="to_sink",y="from_source", hue="cluster")

In [None]:
coords.sort_values(by="cluster")