In [1]:
from panricci import RicciFlow
from panricci.distributions.variation_graph import DistributionNodes
from panricci.alignment import GraphAlignment, parse_alignment
from panricci.utils import GFALoader

___
# Alignment of two variation graphs 

### 1. Load graphs and apply Ricci-Flow

In [2]:
path1 = "/data/analysis-paper/experiments/mini-experiment-standard-not_alpha_consistent-not_fixblock/gfa-unchop/nodes/penalization0-min_len0-min_coverage0-alpha1/DQA1-3117.gfa"
path2 = "/data/analysis-paper/experiments/mini-experiment-standard-not_alpha_consistent-not_fixblock/gfa-unchop/nodes/penalization0-min_len0-min_coverage0-alpha10/DQA1-3117.gfa"

In [3]:
def run_ricci_flow(path_gfa, name):
    # load graph
    gfa_loader = GFALoader(undirected=False)
    G = gfa_loader(path_gfa)

    # compute distribution of nodes
    distribution = DistributionNodes(G, alpha=0.5)

    # Initialize ricci-flow
    ricci_flow = RicciFlow(G, distribution, dirsave_graphs="../output/graph-alignment/ricci-flow")
    G_ricci = ricci_flow.run(iterations=5, save_last=False, save_intermediate_graphs=True, name=name)

    return G_ricci

In [4]:
G1 = run_ricci_flow(path1, "DQA1-3117-alpha1")
G2 = run_ricci_flow(path2, "DQA1-3117-alpha10")

RicciFlow: 100%|██████████| 5/5 [00:03<00:00,  1.41it/s]
RicciFlow: 100%|██████████| 5/5 [00:01<00:00,  2.87it/s]


### 2. Align them 

In [14]:
aligner = GraphAlignment(dirsave="../output/graph-alignment", ricci_embedding = False, seq_embedding = True, kmer_size=4)
alignment = aligner(G1, G2, name="DQA1-3117-alpha1-alpha10")

In [16]:
parse_alignment(alignment, G1, G2).sort_values(by="cost_alignment")

Unnamed: 0,edge,cost_alignment,node1,node2,label1,label2,node_depth1,node_depth2
327,"[300-1, 188-2]",0.002893,300,188,AAAAATATCAAAAGTAAAAATGTATTCTCAAAACTTTAAATTTATG...,AAAAATATCAAAAGTAAAAATGTATTCTCAAAACTTTAAATTTATG...,0.1,0.1
395,"[1473-1, 187-2]",0.008055,1473,187,TTTGAATC,ATTTGAATCTC,1.0,1.0
559,"[1179-1, 353-2]",0.027863,1179,353,TACAGTACATTGTATCTGTTCCCTTACCTACCTGACTCTTCCACTA...,CGATATCCTCCTCAGAGAACTTACTACAGTACATTGTATCTGTTCC...,0.3,0.3
63,"[870-1, 507-2]",0.028338,870,507,G,CATGGGCAATGCAGAGAACCCTCCAT,0.4,0.4
465,"[1341-1, 260-2]",0.028390,1341,260,T,CTTTTAAGATTAGAAAAAAATGAAATTTCAGAGCCTAAGAAAATG,0.9,0.9
...,...,...,...,...,...,...,...,...
170,"[354-1, 439-2]",0.160643,354,439,G,CAGAGAAATAAGTTA,0.8,0.8
240,"[502-1, 279-2]",0.167273,502,279,TG,TGGAGAAATAAATTA,0.4,0.4
464,"[1349-1, 580-2]",0.171804,1349,580,T,CACAATTGCTCTACAGCTCAGAACAGCAACTGCTG,0.4,0.4
307,"[210-1, 400-2]",0.216239,210,400,A,GGATAAAGGAA,0.8,0.8


### 3. Compute Similarity between graphs

In [8]:
from panricci.similarity import Jaccard

In [9]:
jaccard = Jaccard(threshold_cost=500)
jaccard(alignment, G1, G2)

0.5997310623038996

In [10]:
len(G1)

1562

In [11]:
len(G2)

669

In [12]:
len(alignment)

669

In [13]:
df_align

NameError: name 'df_align' is not defined