In [2]:
from pathlib import Path

In [3]:
from panricci import RicciFlow, NormalizedRicciFlow
from panricci.distributions.variation_graph import DistributionNodes
from panricci.alignment import GraphAlignment, parse_alignment
from panricci.utils import GFALoader

[Ricci-Flow] 2024-06-24@11:36:06.784 | Environment variable FAISS_OPT_LEVEL is not set, so let's pick the instruction set according to the current CPU
[Ricci-Flow] 2024-06-24@11:36:06.786 | Loading faiss with AVX2 support.
[Ricci-Flow] 2024-06-24@11:36:07.325 | Successfully loaded faiss with AVX2 support.


___
# Alignment of two variation graphs 

### 1. Load graphs and apply Ricci-Flow

In [5]:
LOSS_FUNCTION = "depth_and_len"
NEW_LOSS_NAME =  "pen_strings"
ALPHA = 243
PATH_SAVE = Path(f"../output-prueba-no-cost-label/100-sars-cov-2")
PATH_SAVE.mkdir(exist_ok=True, parents=True)
NORMALIZED=False
# path1 = "/data/analysis-paper/experiments/vg/sars-cov-2/50-SARS-CoV2-MSA.gfa"
# path2 = f"/data/analysis-paper/experiments-decompositions/sars-cov-2-50-row_maximal-not_alpha_consistent-not_fixblock/gfa-unchop/strings/penalization0-min_len0-min_coverage0-alpha{ALPHA}/50-SARS-CoV2-MSA.gfa"
# path2 = f"/data/analysis-paper/experiments-decompositions/sars-cov-2-50-row_maximal-not_alpha_consistent-not_fixblock/gfa-unchop/{LOSS_FUNCTION}/penalization0-min_len0-min_coverage0-alpha{ALPHA}/50-SARS-CoV2-MSA.gfa"

In [6]:
path1 = "/home/avila/panricci/data/100-sars-cov-2-ena.gfa"
path2 = "/home/avila/panricci/data/100-sars-cov-2-ena.gfa" #-duplicated.gfa"

In [7]:
def run_ricci_flow(path_gfa, name , normalized=False):
    # load graph
    gfa_loader = GFALoader(undirected=False)
    G = gfa_loader(path_gfa)

    # compute distribution of nodes
    distribution = DistributionNodes(G, alpha=0.5)

    # Initialize ricci-flow / normalized-ricci-flow
    if normalized is True:
        ricci_flow = NormalizedRicciFlow(G,
                                         distribution,
                                         sigma=10,
                                         save_last=False, 
                                         save_intermediate_graphs=True, 
                                         dirsave_graphs=PATH_SAVE.joinpath("alignment/normalized-ricci-flow")
                                         )
    else:
        ricci_flow = RicciFlow(G, 
                               distribution, 
                               save_last=False, 
                               save_intermediate_graphs=True, 
                               dirsave_graphs=PATH_SAVE.joinpath("alignment/ricci-flow")
                               )
    G_ricci = ricci_flow.run(iterations=5, name=name)

    return G_ricci

In [8]:
G1 = run_ricci_flow(path1, "pangeblocks-1", normalized=NORMALIZED)

RicciFlow:   0%|          | 0/5 [00:00<?, ?it/s][Ricci-Flow] 2024-06-24@11:37:25.183 | iteration 1
[Ricci-Flow] 2024-06-24@11:37:25.184 | Ricci-Flow iteration 1
[Ricci-Flow] 2024-06-24@11:37:26.111 | curvature of edge K(10,1045)0.0001249843769529635 is > tol=1e-11
RicciFlow:  20%|██        | 1/5 [00:00<00:03,  1.08it/s][Ricci-Flow] 2024-06-24@11:37:26.113 | iteration 2
[Ricci-Flow] 2024-06-24@11:37:26.113 | Ricci-Flow iteration 2
[Ricci-Flow] 2024-06-24@11:37:27.018 | curvature of edge K(10,1045)9.920634920224103e-07 is > tol=1e-11
RicciFlow:  40%|████      | 2/5 [00:01<00:02,  1.09it/s][Ricci-Flow] 2024-06-24@11:37:27.019 | iteration 3
[Ricci-Flow] 2024-06-24@11:37:27.020 | Ricci-Flow iteration 3
[Ricci-Flow] 2024-06-24@11:37:27.936 | curvature of edge K(10,1045)7.874019769538165e-09 is > tol=1e-11
RicciFlow:  60%|██████    | 3/5 [00:02<00:01,  1.09it/s][Ricci-Flow] 2024-06-24@11:37:27.937 | iteration 4
[Ricci-Flow] 2024-06-24@11:37:27.938 | Ricci-Flow iteration 4
[Ricci-Flow] 2024-06

In [9]:
G2 = run_ricci_flow(path2, "pangeblocks-2", normalized=NORMALIZED)

RicciFlow:   0%|          | 0/5 [00:00<?, ?it/s][Ricci-Flow] 2024-06-24@11:37:54.982 | iteration 1
[Ricci-Flow] 2024-06-24@11:37:54.982 | Ricci-Flow iteration 1
[Ricci-Flow] 2024-06-24@11:37:55.895 | curvature of edge K(10,1045)0.0001249843769529635 is > tol=1e-11
RicciFlow:  20%|██        | 1/5 [00:00<00:03,  1.09it/s][Ricci-Flow] 2024-06-24@11:37:55.897 | iteration 2
[Ricci-Flow] 2024-06-24@11:37:55.898 | Ricci-Flow iteration 2
[Ricci-Flow] 2024-06-24@11:37:56.809 | curvature of edge K(10,1045)9.920634920224103e-07 is > tol=1e-11
RicciFlow:  40%|████      | 2/5 [00:01<00:02,  1.09it/s][Ricci-Flow] 2024-06-24@11:37:56.811 | iteration 3
[Ricci-Flow] 2024-06-24@11:37:56.812 | Ricci-Flow iteration 3
[Ricci-Flow] 2024-06-24@11:37:57.718 | curvature of edge K(10,1045)7.874019769538165e-09 is > tol=1e-11
RicciFlow:  60%|██████    | 3/5 [00:02<00:01,  1.10it/s][Ricci-Flow] 2024-06-24@11:37:57.720 | iteration 4
[Ricci-Flow] 2024-06-24@11:37:57.720 | Ricci-Flow iteration 4
[Ricci-Flow] 2024-06

### 2. Align them 

In [10]:
aligner = GraphAlignment(
    dirsave="../output/graph-alignment", 
    ricci_embedding = True, 
    seq_embedding = False, 
    kmer_size=4)
alignment = aligner(G1, G2, name="pangeblocks-ricci_embedding")

[Ricci-Flow] 2024-06-24@11:39:01.648 | Creating bipartite graph
[Ricci-Flow] 2024-06-24@11:39:01.651 | start - create_bipartite_graph
[Ricci-Flow] 2024-06-24@11:39:02.443 | end - compute_node_embeddings
[Ricci-Flow] 2024-06-24@11:39:03.090 | end - compute_node_embeddings
[Ricci-Flow] 2024-06-24@11:39:22.345 | end - create_bipartite_graph
[Ricci-Flow] 2024-06-24@11:39:22.350 | Saving bipartite graph
[Ricci-Flow] 2024-06-24@11:39:28.364 | Starting alignment on bipartite graph: minimum-weight-full-matching
[Ricci-Flow] 2024-06-24@11:39:34.424 | filtering optimal alignment
[Ricci-Flow] 2024-06-24@11:39:34.425 | start - parse_optimal_alignment
[Ricci-Flow] 2024-06-24@11:39:34.431 | end - parse_optimal_alignment
[Ricci-Flow] 2024-06-24@11:39:34.434 | Done!


In [11]:
parse_alignment(alignment, G1, G2).\
    sort_values(by="cost_alignment").\
    to_csv(PATH_SAVE.joinpath("alignment-pangeblocks.tsv"),sep="\t")

### 3. Compute Similarity between graphs

In [None]:
from panricci.similarity import Jaccard

In [None]:
jaccard = Jaccard(threshold_cost=500)
jaccard(alignment, G1, G2)

In [None]:
len(G1)

In [None]:
len(G2)

In [None]:
len(alignment)