In [1]:
from panricci import RicciFlow
from panricci.distributions.variation_graph import DistributionNodes
from panricci.alignment import GraphAlignment, parse_alignment
from panricci.utils import GFALoader

___
# Alignment of two variation graphs 

### 1. Load graphs and apply Ricci-Flow

In [2]:
path1 = "/data/analysis-paper/experiments/mini-experiment-standard-not_alpha_consistent-not_fixblock/gfa-unchop/nodes/penalization0-min_len0-min_coverage0-alpha1/DQA1-3117.gfa"
path2 = "/data/analysis-paper/experiments/mini-experiment-standard-not_alpha_consistent-not_fixblock/gfa-unchop/nodes/penalization0-min_len0-min_coverage0-alpha50/DQA1-3117.gfa"

In [3]:
def run_ricci_flow(path_gfa, name):
    # load graph
    gfa_loader = GFALoader(undirected=False)
    G = gfa_loader(path_gfa)

    # compute distribution of nodes
    distribution = DistributionNodes(G, alpha=0.5)

    # Initialize ricci-flow
    ricci_flow = RicciFlow(G, distribution, dirsave_graphs="../output/graph-alignment/ricci-flow")
    G_ricci = ricci_flow.run(iterations=5, save_last=False, save_intermediate_graphs=True, name=name)

    return G_ricci

In [4]:
G1 = run_ricci_flow(path1, "DQA1-3117-alpha1")
G2 = run_ricci_flow(path2, "DQA1-3117-alpha50")

RicciFlow:   0%|          | 0/5 [00:00<?, ?it/s]

RicciFlow: 100%|██████████| 5/5 [00:03<00:00,  1.40it/s]
RicciFlow: 100%|██████████| 5/5 [00:00<00:00,  6.42it/s]


### 2. Align them 

In [5]:
aligner = GraphAlignment(dirsave="../output/graph-alignment")
alignment = aligner(G1, G2, name="DQA1-3117-alpha1-alpha50")

In [6]:
parse_alignment(alignment, G1, G2)

Unnamed: 0,edge,cost_alignment,node1,node2,label1,label2,node_depth1,node_depth2
0,"[965-1, 99-2]",168.936823,965,99,C,C,0.2,0.2
1,"[999-1, 93-2]",159.289894,999,93,TTTAT,AATATAA,1.0,1.0
2,"[865-1, 89-2]",169.041005,865,89,AGAC,AAGTCC,1.0,1.0
3,"[91-1, 84-2]",165.869217,91,84,A,AC,0.8,0.8
4,"[961-1, 82-2]",169.317525,961,82,TG,TG,0.2,0.2
...,...,...,...,...,...,...,...,...
336,"[1001-1, 237-2]",168.870152,1001,237,A,TGATGAGATTTATGACTGCAAGGTGGAGCACTGGGGCCTGGATGAG...,0.8,0.8
337,"[247-1, 100-2]",167.121765,247,100,A,A,0.4,0.4
338,"[100-1, 163-2]",166.453387,100,163,AA,AACTTGGATGGCCAGATGTTGAGA,1.0,1.0
339,"[939-1, 10-2]",164.843641,939,10,G,G,0.6,0.6


### 3. Compute Similarity between graphs

In [7]:
from panricci.similarity import Jaccard

In [12]:
jaccard = Jaccard(threshold_cost=500)
jaccard(alignment, G1, G2)

0.3583815028901734