In [13]:
from pathlib import Path
from panricci import RicciFlow, NormalizedRicciFlow
from panricci.distributions.variation_graph import DistributionNodes
from panricci.alignment import GraphAlignment, parse_alignment
from panricci.utils import GFALoader

___
# Alignment of two variation graphs 

### 1. Load graphs and apply Ricci-Flow

In [14]:
LOSS_FUNCTION = "depth_and_len"
NEW_LOSS_NAME =  "pen_strings"
ALPHA = 243
PATH_SAVE = Path(f"../output/pangeblocks-strings-{NEW_LOSS_NAME}-alpha{ALPHA}")
PATH_SAVE.mkdir(exist_ok=True, parents=True)
NORMALIZED=False
# path1 = "/data/analysis-paper/experiments/vg/sars-cov-2/50-SARS-CoV2-MSA.gfa"
path2 = f"/data/analysis-paper/experiments-decompositions/sars-cov-2-50-row_maximal-not_alpha_consistent-not_fixblock/gfa-unchop/strings/penalization0-min_len0-min_coverage0-alpha{ALPHA}/50-SARS-CoV2-MSA.gfa"
path2 = f"/data/analysis-paper/experiments-decompositions/sars-cov-2-50-row_maximal-not_alpha_consistent-not_fixblock/gfa-unchop/{LOSS_FUNCTION}/penalization0-min_len0-min_coverage0-alpha{ALPHA}/50-SARS-CoV2-MSA.gfa"

In [15]:
# path1 = "/data/analysis-paper/experiments/mini-experiment-standard-not_alpha_consistent-not_fixblock/gfa-unchop/nodes/penalization0-min_len0-min_coverage0-alpha1/DQA1-3117.gfa"
# path2 = "/data/analysis-paper/experiments/mini-experiment-standard-not_alpha_consistent-not_fixblock/gfa-unchop/nodes/penalization0-min_len0-min_coverage0-alpha10/DQA1-3117.gfa"

In [16]:
def run_ricci_flow(path_gfa, name , normalized=False):
    # load graph
    gfa_loader = GFALoader(undirected=False)
    G = gfa_loader(path_gfa)

    # compute distribution of nodes
    distribution = DistributionNodes(G, alpha=0.5)

    # Initialize ricci-flow / normalized-ricci-flow
    if normalized is True:
        ricci_flow = NormalizedRicciFlow(G,
                                         distribution,
                                         sigma=10,
                                         save_last=False, 
                                         save_intermediate_graphs=True, 
                                         dirsave_graphs=PATH_SAVE.joinpath("alignment/normalized-ricci-flow")
                                         )
    else:
        ricci_flow = RicciFlow(G, 
                               distribution, 
                               save_last=False, 
                               save_intermediate_graphs=True, 
                               dirsave_graphs=PATH_SAVE.joinpath("alignment/ricci-flow")
                               )
    G_ricci = ricci_flow.run(iterations=5, name=name)

    return G_ricci

In [17]:
G1 = run_ricci_flow(path1, "pangeblocks-strings", normalized=NORMALIZED)
G2 = run_ricci_flow(path2, "pangeblocks-pos_strings", normalized=NORMALIZED)

RicciFlow: 100%|██████████| 5/5 [00:03<00:00,  1.62it/s]
RicciFlow: 100%|██████████| 5/5 [00:03<00:00,  1.67it/s]


### 2. Align them 

In [18]:
aligner = GraphAlignment(
    dirsave="../output/graph-alignment", 
    ricci_embedding = True, 
    seq_embedding = False, 
    kmer_size=4)
alignment = aligner(G1, G2, name="pangeblocks-ricci_embedding")

In [19]:
parse_alignment(alignment, G1, G2).\
    sort_values(by="cost_alignment").\
    to_csv(PATH_SAVE.joinpath("alignment-pangeblocks-strings-pos_strings.tsv"),sep="\t")

### 3. Compute Similarity between graphs

In [20]:
from panricci.similarity import Jaccard

In [21]:
jaccard = Jaccard(threshold_cost=500)
jaccard(alignment, G1, G2)

1.0

In [22]:
len(G1)

1358

In [23]:
len(G2)

1358

In [24]:
len(alignment)

1358