# Network propagation development

- connecting results to graphs
- applying PPR

In [4]:
import os

import pandas as pd
import numpy as np

from napistu.ingestion import sbml
from napistu import sbml_dfs_core
from napistu import mechanism_matching

In [5]:
PATH_TO_TEST_DATA = os.path.expanduser("~/Desktop/GITHUB/napistu/lib/napistu-py/src/tests/test_data")
example_pathway = os.path.join(PATH_TO_TEST_DATA, "reactome_glucose_metabolism.sbml")
assert os.path.exists(example_pathway)

In [6]:
sbml_dfs = sbml_dfs_core.SBML_dfs(sbml.SBML(example_pathway))

species_identifiers = sbml_dfs.get_identifiers("species").query("bqb == 'BQB_IS'").query("ontology != 'reactome'")

INFO:napistu.utils:creating an edgelist linking index levels s_id, entry and linking it to levels defined by ontology, identifier
DEBUG:napistu.utils:label is not defined in table_schema; adding a constant (1)


In [7]:
# create a table whose index is s_ids and columns are faux-measurements
example_data = species_identifiers.groupby("ontology").head(10)[["ontology", "identifier"]]

example_data["results_a"] = np.random.randn(len(example_data))
example_data["results_b"] = np.random.randn(len(example_data))

example_data

Unnamed: 0,ontology,identifier,results_a,results_b
0,chebi,17925,0.609637,-0.533822
5,uniprot,Q9NQR9,0.084097,-0.627968
13,chebi,58225,0.670558,0.017215
16,chebi,15377,0.522962,-0.449284
19,chebi,18367,0.93228,0.483269
23,uniprot,O43826,1.379095,0.27731
46,chebi,57540,0.155778,-1.23979
49,chebi,30797,0.01817,-0.882434
52,chebi,57945,-0.208488,-0.59194
55,chebi,30744,-0.37365,1.207034


In [8]:
# pivot (identifier, ontology) to columns for each ontology
example_data_wide = example_data.pivot(columns = "ontology", values = "identifier", index = ["results_a", "results_b"]).reset_index().rename_axis(None, axis = 1)

example_data_wide

Unnamed: 0,results_a,results_b,chebi,uniprot
0,-1.178641,2.468686,,Q9UBX3
1,-1.119519,-0.615802,,P35557
2,-0.703161,-0.134402,,Q9BUM1
3,-0.404701,-1.620773,,P35575
4,-0.37365,1.207034,30744.0,
5,-0.208488,-0.59194,57945.0,
6,-0.074808,-1.055856,,Q16822
7,-0.041608,1.847863,,P53007
8,0.01817,-0.882434,30797.0,
9,0.036413,1.058785,16810.0,


In [9]:
results_tables = dict()
for ont in example_data["ontology"].unique():
    results_tables[ont] = example_data.query("ontology == @ont").drop(columns = "ontology")

results_tables

{'chebi':    identifier  results_a  results_b
 0       17925   0.609637  -0.533822
 13      58225   0.670558   0.017215
 16      15377   0.522962  -0.449284
 19      18367   0.932280   0.483269
 46      57540   0.155778  -1.239790
 49      30797   0.018170  -0.882434
 52      57945  -0.208488  -0.591940
 55      30744  -0.373650   1.207034
 58      15378   0.547976  -0.580348
 89      16810   0.036413   1.058785,
 'uniprot':     identifier  results_a  results_b
 5       Q9NQR9   0.084097  -0.627968
 23      O43826   1.379095   0.277310
 61      Q9UBX3  -1.178641   2.468686
 127     P53007  -0.041608   1.847863
 178     P35558   0.598112  -1.938725
 241     Q16822  -0.074808  -1.055856
 316     P35575  -0.404701  -1.620773
 399     Q9BUM1  -0.703161  -0.134402
 469     P35557  -1.119519  -0.615802
 522     Q14397   0.511818  -0.686399}

In [12]:
import utils

# options, for matching
# 1. match by identifier and a set of ontologies (provided by arg).
matched_s_ids = mechanism_matching.features_to_pathway_species(
    feature_identifiers = example_data.drop(columns = "ontology"), 
    species_identifiers = species_identifiers,
    ontologies = {"uniprot", "chebi"},
    feature_id_var = "identifier",
)

# 2. match by identifier and ontology.
utils.match_by_ontology_and_identifier(
    feature_identifiers = example_data, 
    species_identifiers = species_identifiers,
    ontologies = {"uniprot", "chebi"},
    feature_id_var = "identifier",
)

# 3. format wide identifier sets into a table with a single identifier column and apply strategy #2.
utils.match_features_to_wide_pathway_species(
    example_data_wide,
    species_identifiers,
    ontologies = {"uniprot", "chebi"},
    feature_id_var = "identifier",
)

# 4. format multiple tables by applying strategy #3 multiple times; storing results as separate `species_data` tables.

DEBUG:utils:Matching 10 features to 10 species for ontology uniprot
DEBUG:utils:Matching 10 features to 38 species for ontology chebi
INFO:utils:Found 20 total matches across 2 ontologies
DEBUG:utils:Validated ontology columns: {'uniprot', 'chebi'}
INFO:utils:Using columns as results: ['results_a', 'results_b']
DEBUG:utils:Final long format shape: (20, 4)
DEBUG:utils:Matching 10 features to 10 species for ontology uniprot
DEBUG:utils:Matching 10 features to 38 species for ontology chebi
INFO:utils:Found 20 total matches across 2 ontologies


Unnamed: 0,results_a,results_b,identifier,s_id,entry,ontology,url,bqb,s_name,s_Source
0,-1.178641,2.468686,Q9UBX3,S00000012,0,uniprot,https://purl.uniprot.org/uniprot/Q9UBX3,BQB_IS,SLC25A10,<napistu.source.Source object at 0x15b6f4b10>
1,-1.119519,-0.615802,P35557,S00000057,0,uniprot,https://purl.uniprot.org/uniprot/P35557,BQB_IS,GCK,<napistu.source.Source object at 0x15b621ad0>
2,-0.703161,-0.134402,Q9BUM1,S00000051,0,uniprot,https://purl.uniprot.org/uniprot/Q9BUM1,BQB_IS,G6PC3,<napistu.source.Source object at 0x15b620910>
3,-0.404701,-1.620773,P35575,S00000042,0,uniprot,https://purl.uniprot.org/uniprot/P35575,BQB_IS,G6PC,<napistu.source.Source object at 0x15b6ed610>
4,-0.074808,-1.055856,Q16822,S00000036,0,uniprot,https://purl.uniprot.org/uniprot/Q16822,BQB_IS,PCK2,<napistu.source.Source object at 0x15b6ea250>
5,-0.041608,1.847863,P53007,S00000019,0,uniprot,https://purl.uniprot.org/uniprot/P53007,BQB_IS,SLC25A1,<napistu.source.Source object at 0x15b65fcd0>
6,0.084097,-0.627968,Q9NQR9,S00000001,0,uniprot,https://purl.uniprot.org/uniprot/Q9NQR9,BQB_IS,G6PC2,<napistu.source.Source object at 0x15ba1aad0>
7,0.511818,-0.686399,Q14397,S00000058,0,uniprot,https://purl.uniprot.org/uniprot/Q14397,BQB_IS,GCKR,<napistu.source.Source object at 0x15b621050>
8,0.598112,-1.938725,P35558,S00000028,0,uniprot,https://purl.uniprot.org/uniprot/P35558,BQB_IS,PCK1,<napistu.source.Source object at 0x15ba04190>
9,1.379095,0.27731,O43826,S00000005,0,uniprot,https://purl.uniprot.org/uniprot/O43826,BQB_IS,SLC37A4,<napistu.source.Source object at 0x15b66fa10>


In [1]:
import test_utils

test_utils.test_validate_wide_ontologies()
test_utils.test_match_by_ontology_and_identifier()

INFO:utils:Auto-detected ontology columns: {'uniprot', 'chebi'}
DEBUG:utils:Validated ontology columns: {'uniprot', 'chebi'}
DEBUG:utils:Validated ontology columns: {'chebi'}
DEBUG:utils:Validated ontology columns: {'chebi'}
DEBUG:utils:Validated ontology columns: {'uniprot', 'chebi'}
DEBUG:utils:Validated ontology columns: {'reactome', 'ensembl_gene'}
DEBUG:utils:Matching 2 features to 2 species for ontology chebi
INFO:utils:Found 1 total matches across 1 ontologies
DEBUG:utils:Matching 2 features to 2 species for ontology uniprot
DEBUG:utils:Matching 2 features to 2 species for ontology chebi
INFO:utils:Found 2 total matches across 2 ontologies
DEBUG:utils:Matching 2 features to 2 species for ontology uniprot
DEBUG:utils:Matching 2 features to 2 species for ontology chebi
INFO:utils:Found 2 total matches across 2 ontologies
DEBUG:utils:Matching 1 features to 2 species for ontology chebi
DEBUG:utils:Matching 2 features to 2 species for ontology chebi
INFO:utils:Found 1 total matches a

<function napistu.mechanism_matching.features_to_pathway_species(feature_identifiers: 'pd.DataFrame', species_identifiers: 'pd.DataFrame', ontologies: 'set', feature_id_var: 'str') -> 'pd.DataFrame'>