In [None]:
import sys
import os
import pandas as pd
from tqdm import tqdm

# Add the parent directory of this notebook to the Python path
sys.path.append(os.path.abspath('..'))

import search_set
import indicator
import literature


## Example Code for Generating Literature Indicators
Example notebook showing how to use this clinical score package to generate literature indicators for genes and drugs present in DGIdb data. This notebook demonstrates three of the four search strategies: *Raw String*, *NCBI GeneID*, *PubTator3*

### Generate DGIdb Search Set
Trim the DGIdb interactions database file to just the set of relevant interactions for a given gene.

In [None]:
gene = 'BCL2'
search_set.generate_search_set(gene)

### Grab PMIDs, Fetch Abstracts
Use a desired search method to obtain a list of relevant PubMed IDs to retrieve abstracts for downstream indicator analysis.  
  
Currently defined search methods are: *Raw String*, *NCBI GeneID*, *PubTator3*, *PubTator3+Drug*  
  
After PMIDs are identified, fetch all the abstracts.

In [None]:
pmids = literature.fetch_pmids_by_pubtator3(gene)
abstracts = literature.fetch_abstracts(pmids)

In [None]:
abstracts

### Generate Scores
Use retrieved abstracts to determine the literature indicators for each gene, drug interaction pair


In [None]:
dgidb_df = pd.read_csv('search/2025-08-13_BCL2_clin_score.csv')
dgidb_df = dgidb_df.drop_duplicates(subset=['Drug','Gene'], keep='first')
dgidb_df.head()

In [None]:
indicator.generate_indicators(abstracts, dgidb_df, stop=100)