In [1]:
from panricci import RicciFlow
from panricci.distributions.variation_graph import DistributionNodes
from panricci.alignment import GraphAlignment, parse_alignment
from panricci.utils import GFALoader

___
# Alignment of two variation graphs 

### 1. Load graphs and apply Ricci-Flow

In [2]:
path1 = "/data/analysis-paper/experiments/mini-experiment-standard-not_alpha_consistent-not_fixblock/gfa-unchop/nodes/penalization0-min_len0-min_coverage0-alpha1/DQA1-3117.gfa"
path2 = "/data/analysis-paper/experiments/mini-experiment-standard-not_alpha_consistent-not_fixblock/gfa-unchop/nodes/penalization0-min_len0-min_coverage0-alpha50/DQA1-3117.gfa"

In [3]:
def run_ricci_flow(path_gfa, name):
    # load graph
    gfa_loader = GFALoader(undirected=False)
    G = gfa_loader(path_gfa)

    # compute distribution of nodes
    distribution = DistributionNodes(G, alpha=0.5)

    # Initialize ricci-flow
    ricci_flow = RicciFlow(G, distribution, dirsave_graphs="../output/graph-alignment/ricci-flow")
    G_ricci = ricci_flow.run(iterations=5, save_last=False, save_intermediate_graphs=True, name=name)

    return G_ricci

In [4]:
G1 = run_ricci_flow(path1, "DQA1-3117-alpha1")
G2 = run_ricci_flow(path2, "DQA1-3117-alpha50")

RicciFlow:   0%|          | 0/5 [00:00<?, ?it/s]

RicciFlow: 100%|██████████| 5/5 [00:03<00:00,  1.41it/s]
RicciFlow: 100%|██████████| 5/5 [00:00<00:00,  6.38it/s]


### 2. Align them 

In [5]:
aligner = GraphAlignment(dirsave="../output/graph-alignment")
alignment = aligner(G1, G2, name="DQA1-3117-alpha1-alpha50")

In [6]:
alignment

[(('965-1', '99-2'), 168.9368234842379),
 (('93-2', '999-1'), 159.28989361198998),
 (('865-1', '89-2'), 169.04100460424564),
 (('84-2', '91-1'), 165.86921679789688),
 (('82-2', '961-1'), 169.31752544825812),
 (('817-1', '88-2'), 159.75253746261237),
 (('815-1', '9-2'), 165.39574980481464),
 (('78-2', '868-1'), 159.39900011601034),
 (('7-2', '921-1'), 164.185403661256),
 (('67-2', '775-1'), 166.0354286016337),
 (('63-2', '787-1'), 169.2961103318189),
 (('62-2', '840-1'), 169.0187061981841),
 (('6-2', '97-1'), 165.50697546831464),
 (('597-1', '97-2'), 164.94605075867105),
 (('580-1', '72-2'), 165.82200919479527),
 (('58-2', '788-1'), 164.02233252597466),
 (('57-2', '862-1'), 165.96931834797354),
 (('56-2', '600-1'), 166.45961339304338),
 (('558-1', '76-2'), 169.60374020212632),
 (('556-1', '90-2'), 160.05593769690054),
 (('54-2', '962-1'), 165.76102318901007),
 (('53-2', '671-1'), 160.13521460867574),
 (('5-2', '605-1'), 165.09256822536364),
 (('498-1', '87-2'), 163.6048733166414),
 (('4

In [7]:
parse_alignment(alignment, G1, G2)

Unnamed: 0,edge,cost_alignment,node1,node2,label1,label2,node_depth1,node_depth2
0,"[965-1, 99-2]",168.936823,965,99,C,C,0.2,0.2
1,"[999-1, 93-2]",159.289894,999,93,TTTAT,AATATAA,1.0,1.0
2,"[865-1, 89-2]",169.041005,865,89,AGAC,AAGTCC,1.0,1.0
3,"[91-1, 84-2]",165.869217,91,84,A,AC,0.8,0.8
4,"[961-1, 82-2]",169.317525,961,82,TG,TG,0.2,0.2
...,...,...,...,...,...,...,...,...
336,"[1001-1, 303-2]",165.952288,1001,303,A,GTGCCAAAAAATGAAGCCCATAATATTTGAAAGTCAGTTTCTTCCA...,0.8,0.8
337,"[247-1, 100-2]",167.121765,247,100,A,A,0.4,0.4
338,"[100-1, 163-2]",166.453387,100,163,AA,AACTTGGATGGCCAGATGTTGAGA,1.0,1.0
339,"[939-1, 10-2]",164.843641,939,10,G,G,0.6,0.6


### 3. Compute Similarity between graphs

In [8]:
from panricci.similarity import Jaccard

In [9]:
jaccard = Jaccard(threshold_cost=500)
jaccard(alignment, G1, G2)

0.3583815028901734

In [10]:
len(G1)

1562

In [11]:
len(G2)

341

In [12]:
len(alignment)

341