# MEDLINE co-occurrence graph demo

In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm

In [28]:
from util.co_oc_graph import MedlineCoocGraph
from util import text_util

## Opening co-occurrence graph

In [4]:
mcg_title_only_obj = MedlineCoocGraph(verbose=True)

In [5]:
# loading co-occurrence graph 

mcg_title_only_obj.load_graph(
    '/lustre/acslab/shared/LLM_stuff'
    '/subgraph_retrieval/medline_cooc_graphs/'
    '2021_11_22_cooc_graph_titles_mcl.pkl'
)

Loading MedlineCoocGraph object from checkpoint...
Done!


In [7]:
# number of pmids

len(mcg_title_only_obj.idx_to_pmid_list)

28439282

In [22]:
# getting a list of PMIDs for a given co-occurrence

len(mcg_title_only_obj.get_cooc_pmids('C0017725', 'C0333668'))

147

## Performing shortest path search

In [9]:
mcg_title_only_obj.construct_gt_network()

graph-tool network constructed!


In [15]:
sp_cui_list = mcg_title_only_obj.find_shortest_paths(
    'C0733530', 'C0006873'
)

In [21]:
sp_cui_list

[['C0733530', 'C0184758', 'C0006873'],
 ['C0733530', 'C0205177', 'C0006873'],
 ['C0733530', 'C1280500', 'C0006873'],
 ['C0733530', 'C0205314', 'C0006873'],
 ['C0733530', 'C1628982', 'C0006873']]

In [24]:
pmids_list_per_sp_edge_dict = mcg_title_only_obj.retrieve_abstr_ids_from_sp_nodes(
    sp_cui_list[2],
    cut_date='1977', # retrieve abstracts only prior to or equal a given threshold (optional)
)

In [25]:
pmids_list_per_sp_edge_dict

{('C0733530', 'C1280500'): ['212990', '280160'],
 ('C1280500', 'C0006873'): ['924695',
  '5481568',
  '17743585',
  '822452',
  '4708887',
  '4435127',
  '4532020',
  '4415376',
  '972997',
  '1071389',
  '1071398',
  '1126112',
  '1235156']}

## Obtaining abstract texts

In [15]:
from agatha.util.sqlite3_lookup import Sqlite3LookupTable

In [26]:
agatha_sent_db_path = (
    '/lustre/acslab/shared/Agatha_shared/2021_11_22_full/sentences.sqlite3'
)

In [27]:
sents_db = Sqlite3LookupTable(agatha_sent_db_path)

In [35]:
# get an abstract text for a given pmid

abstr_text = text_util.get_abstr_text('212990', sents_db)
abstr_text

'Effect of a novel adenosine deaminase inhibitor (co-vidarabine, co-V) upon the antiviral activity in vitro and in vivo of vidarabine (Vira-Atm) for DNA virus replication. A new potent inhibitor of adenosine deaminase (co-vidarabine) was used in combination studies with adenine arabinoside (vidarabine, Vira-ATM) to protect this purine nucleoside from enzymatic deamination to the more weakly active metabolite, hypoxanthine arabinoside. Comparing the combination to vidarabine alone, a significant increase (10-fold) of the antiviral activity of the combined drugs was observed against herpes and vaccinia viruses in tissue culture and subcutaneously, against cranial herpesvirus infections in mice. Several other investigators have also recently reported several-fold enhancement of vidarabine activity by newly described deaminase inhibitors. They observed that plaque formation by several large DNA-containing viruses (herpes, vaccinia, varicella zoster) and an RNA-containing oncogenic virus wa

In [40]:
# get all abstract texts along a specific path

path_context_dict = (
    text_util.get_path_context(
        pmids_list_per_sp_edge_dict,
        sents_db
    )
)

In [41]:
path_context_dict[('C0733530','C1280500')]

{'212990': 'Effect of a novel adenosine deaminase inhibitor (co-vidarabine, co-V) upon the antiviral activity in vitro and in vivo of vidarabine (Vira-Atm) for DNA virus replication. A new potent inhibitor of adenosine deaminase (co-vidarabine) was used in combination studies with adenine arabinoside (vidarabine, Vira-ATM) to protect this purine nucleoside from enzymatic deamination to the more weakly active metabolite, hypoxanthine arabinoside. Comparing the combination to vidarabine alone, a significant increase (10-fold) of the antiviral activity of the combined drugs was observed against herpes and vaccinia viruses in tissue culture and subcutaneously, against cranial herpesvirus infections in mice. Several other investigators have also recently reported several-fold enhancement of vidarabine activity by newly described deaminase inhibitors. They observed that plaque formation by several large DNA-containing viruses (herpes, vaccinia, varicella zoster) and an RNA-containing oncogen