In [1]:
import logging
import os

import pandas as pd

from napistu import utils
from napistu.gcs import downloads

# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger("napistu")

import httpx

NAPISTU_DATA_DIR = os.path.expanduser("~/Desktop/DATA/napistu_data")
NAPISTU_ASSET = "human_consensus_w_distances"


In [2]:
napistu_graph_path = "napistu_data/test_pathway/test_pathway/napistu_graph.pkl"

In [None]:
# this will download the sbml_dfs, napistu_graph, and species_identifiers from a public GCS bucket
# or if they already exist in the NAPISTU_DATA_DIR, it will just set the path to the existing asset
sbml_dfs_path = downloads.load_public_napistu_asset(
    asset = NAPISTU_ASSET,
    data_dir = NAPISTU_DATA_DIR,
    subasset = "sbml_dfs",
)

napistu_graph_path = downloads.load_public_napistu_asset(
    asset = NAPISTU_ASSET,
    data_dir = NAPISTU_DATA_DIR,
    subasset = "napistu_graph"
)

species_identifiers_path = downloads.load_public_napistu_asset(
    asset = NAPISTU_ASSET,
    data_dir = NAPISTU_DATA_DIR,
    subasset = "species_identifiers"
)

precomputed_distances_path = downloads.load_public_napistu_asset(
    asset = NAPISTU_ASSET,
    data_dir = NAPISTU_DATA_DIR,
    subasset = "precomputed_distances"
)



In [3]:
from napistu import utils as napistu_utils
from napistu.network.ng_core import NapistuGraph
from napistu.sbml_dfs_core import SBML_dfs

In [4]:
from napistu import source
from napistu import identifiers
from napistu.network import neighborhoods

In [5]:
#sbml_dfs = SBML_dfs.from_pickle(sbml_dfs_path)
napistu_graph = NapistuGraph.from_pickle(napistu_graph_path)

In [6]:
x, y = napistu_graph.to_pandas_dfs()

In [1]:
x

NameError: name 'x' is not defined

In [None]:
species_identifiers = pd.read_csv(species_identifiers_path, sep = "\t")
precomputed_distances = utils.load_parquet(precomputed_distances_path)

In [None]:
from napistu.network.neighborhoods import find_and_prune_neighborhoods

SC_ID = "SC00002412"

neighborhood = find_and_prune_neighborhoods(
    sbml_dfs,
    napistu_graph,
    compartmentalized_species = SC_ID,
    precomputed_distances = precomputed_distances,
    top_n = 200,
    order = 4,
    verbose = True
)

In [None]:
sbml_dfs.species_status("S00002412").sort_values("r_name")

In [None]:
import math

compartmentalized_species = ["SC00002412"]
order = 4
top_n = 200

precomputed_neighbors = neighborhoods._precompute_neighbors(
    compartmentalized_species,
    precomputed_distances,
    sbml_dfs,
    network_type="hourglass",
    order=order,
    top_n=math.ceil(top_n * 1.1),  # ties when using head()?
)

neighborhoods = neighborhoods.find_neighborhoods(
    sbml_dfs,
    napistu_graph,
    compartmentalized_species = compartmentalized_species,
    precomputed_neighbors = precomputed_neighbors,
    order = 4,
    verbose = True
)

In [None]:
from napistu.network.constants import NEIGHBORHOOD_DICT_KEYS, NAPISTU_GRAPH_EDGES, NAPISTU_GRAPH_VERTICES, NAPISTU_GRAPH_NODE_TYPES, DISTANCES
from napistu.network import neighborhoods as neighborhood_module
from napistu.constants import SBML_DFS


In [None]:
pruned_neighborhood_dicts = dict()

for an_sc_id in neighborhoods.keys():
    one_neighborhood = neighborhoods[an_sc_id]
    raw_graph = one_neighborhood[NEIGHBORHOOD_DICT_KEYS.GRAPH]
    
    # filter to the desired number of vertices w/ lowest path_weight (from focal node)
    # filter neighborhood to high-weight vertices
    pruned_vertices = neighborhood_module._prune_vertex_set(one_neighborhood, top_n=top_n)

    print("pruned vertices")
    print(pruned_vertices)

    # reduce neighborhood to this set of high-weight vertices
    all_neighbors = pd.DataFrame(
        {
            NAPISTU_GRAPH_VERTICES.NAME: raw_graph.vs[NAPISTU_GRAPH_VERTICES.NAME]
        }
    )

    print("all neighbors")
    print(all_neighbors)

    pruned_vertices_indices = all_neighbors[
        all_neighbors[NAPISTU_GRAPH_VERTICES.NAME].isin(
            pruned_vertices[NAPISTU_GRAPH_VERTICES.NAME]
        )
    ].index.tolist()

    pruned_neighborhood = raw_graph.subgraph(
        raw_graph.vs[pruned_vertices_indices],
        implementation="auto",
    )

    pruned_edges = pd.DataFrame([e.attributes() for e in pruned_neighborhood.es])

    print(pruned_vertices)
    print("node type")
    print(pruned_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE])
    print("reaction spec")
    print(NAPISTU_GRAPH_NODE_TYPES.REACTION)

    pruned_reactions = pruned_vertices[
        pruned_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
        == NAPISTU_GRAPH_NODE_TYPES.REACTION
    ][NAPISTU_GRAPH_VERTICES.NAME]

    print("prune reactions")
    print(pruned_reactions)
    if pruned_reactions.shape[0] != 0:

        logger.info(f"Removing {pruned_reactions.shape[0]} reactions from reaction_sources")

        if one_neighborhood[NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES] is None:

            logger.info("No reaction sources found in one_neighborhood")
            # allow for missing source information since this is currently optional
            pruned_reaction_sources = one_neighborhood[
                NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES
            ]
        else:
            source_filtering_mask = one_neighborhood[NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES][
                    SBML_DFS.R_ID
                ].isin(pruned_reactions)
            
            logger.info(f"source_filtering_mask contains {sum(source_filtering_mask)} reaction sources of {source_filtering_mask.shape[0]} total sources")

            pruned_reaction_sources = one_neighborhood[
                NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES
            ][source_filtering_mask]
    else:
        pruned_reaction_sources = one_neighborhood[
            NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES
        ]

    pruned_neighborhood_dict = {
        NEIGHBORHOOD_DICT_KEYS.GRAPH: pruned_neighborhood,
        NEIGHBORHOOD_DICT_KEYS.VERTICES: pruned_vertices,
        NEIGHBORHOOD_DICT_KEYS.EDGES: pruned_edges,
        NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES: pruned_reaction_sources,
    }

    neighborhood_module._validate_neighborhood_consistency(pruned_neighborhood_dict, an_sc_id)
    pruned_neighborhood_dicts[an_sc_id] = pruned_neighborhood_dict


In [None]:
one_neighborhood = neighborhoods[an_sc_id]
vertices = one_neighborhood[NEIGHBORHOOD_DICT_KEYS.VERTICES]
raw_graph = one_neighborhood[NEIGHBORHOOD_DICT_KEYS.GRAPH]

pruned_reactions = set(pruned_vertices.loc[pruned_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == NAPISTU_GRAPH_NODE_TYPES.REACTION, NAPISTU_GRAPH_VERTICES.NAME].tolist())
original_reactions = set(vertices.loc[vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == NAPISTU_GRAPH_NODE_TYPES.REACTION, NAPISTU_GRAPH_VERTICES.NAME].tolist())
reactions_to_remove = original_reactions - pruned_reactions

reactions_to_remove







In [None]:

neighborhood_vertices = one_neighborhood[NEIGHBORHOOD_DICT_KEYS.VERTICES]

indexed_neighborhood_species = neighborhood_vertices[
    neighborhood_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
    == NAPISTU_GRAPH_NODE_TYPES.SPECIES
].set_index("node_orientation")

pruned_oriented_neighbors = list()
for a_node_orientation in indexed_neighborhood_species.index.unique().tolist():
    vertex_subset = indexed_neighborhood_species.loc[a_node_orientation]
    if type(vertex_subset) is pd.Series:
        # handle cases where only one entry exists to DF->series coercion occurs
        vertex_subset = vertex_subset.to_frame().T

    sorted_vertex_set = vertex_subset.sort_values(DISTANCES.PATH_WEIGHT)
    weight_cutoff = sorted_vertex_set[DISTANCES.PATH_WEIGHT].iloc[
        min(top_n - 1, sorted_vertex_set.shape[0] - 1)
    ]

    top_neighbors = sorted_vertex_set[
        sorted_vertex_set[DISTANCES.PATH_WEIGHT] <= weight_cutoff
    ][NAPISTU_GRAPH_VERTICES.NAME].tolist()

    # include reactions and other species necessary to reach the top neighbors
    # by pulling in the past solutions to weighted shortest paths problems
    if a_node_orientation in one_neighborhood["neighborhood_path_entities"].keys():
        # path to/from focal node to each species
        neighborhood_path_entities = one_neighborhood["neighborhood_path_entities"][
            a_node_orientation
        ]

        top_neighbors = set().union(
            *[neighborhood_path_entities[p] for p in top_neighbors]
        )

    pruned_oriented_neighbors.append(top_neighbors)

# combine all neighbors
pruned_neighbors = set().union(*pruned_oriented_neighbors)

print("pruned neighbors")
print(pruned_neighbors)

pruned_vertices = neighborhood_vertices[
    neighborhood_vertices[NAPISTU_GRAPH_VERTICES.NAME].isin(pruned_neighbors)
].reset_index(drop=True)


In [None]:
pruned_reactions = set(pruned_vertices.loc[pruned_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == NAPISTU_GRAPH_NODE_TYPES.REACTION, NAPISTU_GRAPH_VERTICES.NAME].tolist())
original_reactions = set(vertices.loc[vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == NAPISTU_GRAPH_NODE_TYPES.REACTION, NAPISTU_GRAPH_VERTICES.NAME].tolist())
        

In [None]:
a_node_orientation = "downstream"

vertex_subset = indexed_neighborhood_species.loc[a_node_orientation]
if type(vertex_subset) is pd.Series:
    # handle cases where only one entry exists to DF->series coercion occurs
    vertex_subset = vertex_subset.to_frame().T

sorted_vertex_set = vertex_subset.sort_values(DISTANCES.PATH_WEIGHT)
weight_cutoff = sorted_vertex_set[DISTANCES.PATH_WEIGHT].iloc[
    min(top_n - 1, sorted_vertex_set.shape[0] - 1)
]

top_neighbors = sorted_vertex_set[
    sorted_vertex_set[DISTANCES.PATH_WEIGHT] <= weight_cutoff
][NAPISTU_GRAPH_VERTICES.NAME].tolist()

# include reactions and other species necessary to reach the top neighbors
# by pulling in the past solutions to weighted shortest paths problems
if a_node_orientation in one_neighborhood["neighborhood_path_entities"].keys():
    # path to/from focal node to each species
    neighborhood_path_entities = one_neighborhood["neighborhood_path_entities"][
        a_node_orientation
    ]

    top_neighbors = set().union(
        *[neighborhood_path_entities[p] for p in top_neighbors]
    )


In [None]:
vertex_subset

In [None]:
??neighborhoods.find_neighborhoods