# Demo for path context retriever

In [1]:
from path_cont_retr import PathContextRetrieval

In [2]:
path_cont_retr_obj = PathContextRetrieval(
    config_path='/lustre/acslab/shared/LLM_stuff/llm_expl_shared_data/hgcr_config_051024_shared.json'
)

Init ranker model...
Init node embeddings...
Init co-occur graph...
graph-tool network constructed!
Init context embeddings...
Using MedCPT


Opening np chunks: 100%|█████████████████████████████████████████████████████████████| 38/38 [00:00<00:00, 722.21it/s]
Opening json index chunks: 100%|██████████████████████████████████████████████████████| 38/38 [00:04<00:00,  9.34it/s]
Constructing PMID lookup index: 100%|█████████████████████████████████████████████████| 38/38 [00:17<00:00,  2.22it/s]


In [None]:
# Optional cell
# Only run if you want to switch to abstract-level co-occurrence graph

path_cont_retr_obj.config_dict['mcg_path'] = (
    '/lustre/acslab/shared/LLM_stuff/llm_expl_shared_data/2021_11_22_cooc_graph_mcl.pkl'
)

path_cont_retr_obj.init_mcg()

Init co-occur graph...


## Querying

In [3]:
source_cui = 'C1414263' # EDNRB
target_cui = 'C0252643' # Bosentan

In [4]:
sp_df = path_cont_retr_obj.construct_shortest_paths_df(
    source_cui,
    target_cui,
    n_eval_runs=3,
    n_paths_sample_size=20, # for abstract-lvl co-oc graph the number of paths may be huge
)

Evaluating paths: 100%|█████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.10s/it]


In [5]:
sp_df

Unnamed: 0,path,score_std,score_mean,dec_path,context_pmids
0,"[C1414263, C0252643]",0.000242,0.993591,"[EDNRB gene, bosentan]","{('C1414263', 'C0252643'): ['14506620', '79216..."


### Removing trivial (existing) edge between source and target

In [6]:
sp_df = path_cont_retr_obj.construct_shortest_paths_df(
    source_cui,
    target_cui,
    n_eval_runs=3,
    n_paths_sample_size=20, # for abstract-lvl co-oc graph the number of paths may be huge
    remove_trivial_edge=True,
)

Attempting to mask out trivial edge C1414263 - C0252643


Evaluating paths: 100%|███████████████████████████████████████████████████████████████| 20/20 [00:14<00:00,  1.42it/s]


In [7]:
sp_df

Unnamed: 0,path,score_std,score_mean,dec_path,context_pmids
9,"[C1414263, C4320607, C0216784, C0252643]",0.00041,0.998342,"[EDNRB gene, PGR-AS1 gene, valsartan, bosentan]","{('C1414263', 'C4320607'): ['19353416'], ('C43..."
17,"[C1414263, C0045283, C0041956, C0252643]",0.0,0.997743,"[EDNRB gene, tempol, Ureteral obstruction, bos...","{('C1414263', 'C0045283'): ['15452035'], ('C00..."
16,"[C1414263, C0533668, C0343084, C0252643]",0.0,0.994953,"[EDNRB gene, ANGPT1 protein, human, Capillary ...","{('C1414263', 'C0533668'): ['31957020'], ('C05..."
2,"[C1414263, C0045283, C0030493, C0252643]",0.0,0.994007,"[EDNRB gene, tempol, Paraquat, bosentan]","{('C1414263', 'C0045283'): ['15452035'], ('C00..."
4,"[C1414263, C0040057, C3887559, C0252643]",0.0,0.993401,"[EDNRB gene, Thromboxane A2, Recombinant Human...","{('C1414263', 'C0040057'): ['8832063', '788928..."
6,"[C1414263, C4320607, C0730345, C0252643]",0.0,0.991829,"[EDNRB gene, PGR-AS1 gene, Microalbuminuria, b...","{('C1414263', 'C4320607'): ['19353416'], ('C43..."
1,"[C1414263, C0127082, C0241910, C0252643]",0.0,0.989649,"[EDNRB gene, Interstitial Collagenase, Autoimm...","{('C1414263', 'C0127082'): ['17555880'], ('C01..."
0,"[C1414263, C0207072, C0074554, C0252643]",0.0,0.987545,"[EDNRB gene, Glial Cell Line-Derived Neurotrop...","{('C1414263', 'C0207072'): ['12925014', '11302..."
14,"[C1414263, C0014356, C0004943, C0252643]",0.0,0.985537,"[EDNRB gene, Enterocolitis, Behcet Syndrome, b...","{('C1414263', 'C0014356'): ['35302172'], ('C00..."
3,"[C1414263, C0383327, C0206061, C0252643]",0.0,0.984322,"[EDNRB gene, Interleukin-18, Pneumonia, Inters...","{('C1414263', 'C0383327'): ['15075358'], ('C03..."


In [8]:
sp_df[:5]['dec_path'].to_list()

[['EDNRB gene', 'PGR-AS1 gene', 'valsartan', 'bosentan'],
 ['EDNRB gene', 'tempol', 'Ureteral obstruction', 'bosentan'],
 ['EDNRB gene',
  'ANGPT1 protein, human',
  'Capillary Leak Syndrome',
  'bosentan'],
 ['EDNRB gene', 'tempol', 'Paraquat', 'bosentan'],
 ['EDNRB gene',
  'Thromboxane A2',
  'Recombinant Human Macrophage Inflammatory Protein-1 Beta',
  'bosentan']]

In [9]:
sp_df[:5]['context_pmids'].to_list()

[{('C1414263', 'C4320607'): ['19353416'],
  ('C4320607', 'C0216784'): ['16103268', '12023686', '26175121', '22392065'],
  ('C0216784', 'C0252643'): ['20392896']},
 {('C1414263', 'C0045283'): ['15452035'],
  ('C0045283', 'C0041956'): ['24550650'],
  ('C0041956', 'C0252643'): ['25157662']},
 {('C1414263', 'C0533668'): ['31957020'],
  ('C0533668', 'C0343084'): ['33351228'],
  ('C0343084', 'C0252643'): ['31012372']},
 {('C1414263', 'C0045283'): ['15452035'],
  ('C0045283', 'C0030493'): ['17640563'],
  ('C0030493', 'C0252643'): ['24155875']},
 {('C1414263', 'C0040057'): ['8832063', '7889281'],
  ('C0040057', 'C3887559'): ['2119043'],
  ('C3887559', 'C0252643'): ['8587341']}]