In [1]:
import os
import time
import json

import concurrent
import numpy as np
import pandas as pd
from tqdm import tqdm
from evaluation_functions import evaluate
from openai_functions import extract_ade_terms
from sentence_transformers import SentenceTransformer

## Set Up

### Variables

In [2]:
# train
drug_file = 'data/train_drug_label_text.csv'
manual_file = 'data/train_drug_label_text_manual_ades.csv'

# test
# drug_file = 'data/test_drug_label_text.csv'
# manual_file = 'data/test_drug_label_text_manual_ades.csv'

# my_max = 10000

In [3]:
drugs = pd.read_csv(drug_file)
manual_ades = pd.read_csv(manual_file)
set_type = drug_file.split('/')[1].split('_')[0] # assuming file follows format "train_..." or "test...."

all_sections = drugs.query("section_name != 'all-concat'").groupby('drug_name')['section_text'].apply(' '.join).reset_index()
all_sections.insert(1, "section_name", ["all-concat" for _ in range(all_sections.shape[0])])
drugs = pd.concat([drugs, all_sections])

In [4]:
drugs.shape

(340, 3)

## Run GPT

In [5]:
outputs = {}

In [6]:
config = json.load(open('./config.json'))

organization = ""

api_source = 'OpenAI'

api_key = config[api_source]['openai_api_key'] #constants.AZURE_OPENAI_KEY
api_endpoint = config[api_source]['openai_api_endpoint'] 

gpt_model = config[api_source]["gpt_model"]
# gpt_model = "gpt-4-turbo-preview"
# gpt_model = "gpt-3.5-turbo-0125"

temperature = 0

In [7]:
nruns = 3

system_options = {
    "no-system-prompt": "",
    "pharmexpert-v0": "You are an expert in pharmacology.",
    "pharmexpert-v1": "You are an expert in medical natural language processing, adverse drug reactions, pharmacology, and clinical trials."
}

prompt_options = {
    "fatal-prompt-v2": """
Extract all adverse reactions as they appear, including all synonyms.
mentioned in the text and provide them as a comma-separated list.
If a fatal event is listed add 'death' to the list.
The text is :'{}' 
""",
    "fatal-prompt-v3": """
Extract all adverse reactions as they appear, including all synonyms.
mentioned in the text and provide them as a comma-separated list. If a
negated adverse reaction appears extract it and include a <negated> tag.
If a fatal event is listed add 'death' to the list.
The text is :'{}' 
"""
}

system_name = "pharmexpert-v0"
system_content = system_options[system_name]

prompt_name = "fatal-prompt-v2"
prompt = prompt_options[prompt_name]

gpt_params = [f"temp{temperature}"]

output_file_basename = '{}_{}_{}_{}_{}_{}'.format(api_source, gpt_model, prompt_name, system_name, '-'.join(gpt_params), set_type)
output_file_basename

'OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train'

In [8]:
# run GPT
for i in range(nruns):
    run_key = "{}_run{}".format(output_file_basename, i)
    print(run_key)
    if run_key in outputs:
        print(f"Run {run_key} already started will pick up from where it was left off.")
    elif os.path.exists('results/{}.csv'.format(run_key)):
        gpt_output = pd.read_csv('results/{}.csv'.format(run_key))
        outputs[run_key] = gpt_output
        print(f"Run {run_key} started, loading from disk and pick up from where it was left off.")
    
    start = time.time()
    results = list()
    rows_to_run = list()
    for _, row in drugs.iterrows():
        name, section = row['drug_name'], row['section_name']

        if run_key in outputs:
            prev_run_results = outputs[run_key].query(f"drug_name == '{name}'").query(f"section_name == '{section}'")
            if prev_run_results.shape[0]==1:
                results.append([name, section, prev_run_results.gpt_output.values[0]])
            else:
                rows_to_run.append(row)
        else:
            rows_to_run.append(row)
        
    print(f"Loaded {len(results)} rows from file since they were already run.")
    print(f"There remains {len(rows_to_run)} rows to run.")

    def run_iteration(row):
        name, section = row['drug_name'], row['section_name']
        text = row['section_text']
        try:
            gpt_out = extract_ade_terms(api_source, config, gpt_model, system_content, prompt, text, temperature)
            return [name, section, gpt_out]
        except Exception as err:
            print(f"Encountered an exception for row: {name} {section}. Error message below:")
            print(err)
            return None
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as exec:
        results.extend(list(tqdm(
            exec.map(run_iteration, rows_to_run), 
            total=len(rows_to_run)
        )))
    
    gpt_output = pd.DataFrame(
        [r for r in results if r is not None],
        columns=['drug_name', 'section_name', 'gpt_output']
    )
    end = time.time()
    
    if gpt_output.shape[0] > 0:
        outputs[run_key] = gpt_output
        gpt_output.to_csv('results/{}.csv'.format(run_key))
    
    print(f"Run: {run_key}, time elapsed: {end-start}s.")

OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run0
Run OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run0 started, loading from disk and pick up from where it was left off.
Loaded 340 rows from file since they were already run.
There remains 0 rows to run.


0it [00:00, ?it/s]


Run: OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run0, time elapsed: 0.2753791809082031s.
OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run1
Run OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run1 started, loading from disk and pick up from where it was left off.
Loaded 340 rows from file since they were already run.
There remains 0 rows to run.


0it [00:00, ?it/s]


Run: OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run1, time elapsed: 0.25836968421936035s.
OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run2
Run OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run2 started, loading from disk and pick up from where it was left off.
Loaded 340 rows from file since they were already run.
There remains 0 rows to run.


0it [00:00, ?it/s]

Run: OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run2, time elapsed: 0.2555088996887207s.





## Evaluation

In [9]:
for run_key in sorted(outputs.keys()):
    print(run_key)

OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run0
OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run1
OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run2


In [10]:
gpt = {'thrombocytopenia', 'anemia', 'death', 'acute renal failure', 'pulmonary toxicity', 'hepatic failure', 'nausea', 'infusion reactions', 'pulmonary hypertension', 'hepatic toxicity', 'decreased lymphocyte', 'dyspnea', 'tumor lysis syndrome', 'diarrhea', 'fatigue', 'pyrexia', 'edema peripheral', 'hypertension', 'decreased platelets', 'cardiac toxicities', 'hemolytic uremic syndrome', 'headache', 'venous thrombosis', 'thrombotic thrombocytopenic purpura', 'cough', 'posterior reversible encephalopathy syndrome'}
man = {'cardiac failure congestive', 'died', 'thrombocytopenia', 'decreased total white blood cell count', 'leukopenia', 'dizziness', 'deafness', 'pulmonary toxicity', 'hepatic failure', 'pain in extremity', 'asthenia', 'cataract', 'rash', 'bronchopneumonia', 'hypophosphatemia', 'hypoalbuminemia', 'bronchitis', 'tumor lysis syndrome', 'diarrhea', 'decreased absolute neutrophil count', 'pyrexia', 'edema peripheral', 'musculoskeletal pain', 'muscle spasm', 'hypertension', 'ttp', 'cardiac toxicities', 'decreased phosphorus', 'myocardial ischemia', 'infection adverse events', 'anxiety', 'hypomagnesemia', 'decreased potassium', 'posterior reversible encephalopathy syndrome', 'insomnia', 'mortality', 'decreased appetite', 'hypoesthesia', 'dysphonia', 'viral infection', 'renal failure', 'hypercalcemia', 'febrile neutropenia', 'pain', 'lymphopenia', 'renal disorders', 'respiratory tract infection', 'infusion reactions', 'disease progression', 'congestive heart failure', 'pruritus', 'pneumonia', 'upper respiratory tract infection', 'hepatic toxicity', 'renal impairment', 'chills', 'muscle spasms', 'back pain', 'deaths', 'epistaxis', 'decreased sodium', 'hemolytic uremic syndrome', 'oropharyngeal pain', 'thrombotic thrombocytopenic purpura', 'dehydration', 'hyperkalemia', 'musculoskeletal chest pain', 'hypocalcemia', 'hyperuricemia', 'myalgia', 'dyspnea exertional', 'peripheral neuropathies nec', 'blurred vision', 'erythema', 'delirium', 'acute renal failure', 'renal failure acute', 'nausea', 'vomiting', 'nasopharyngitis', 'sepsis', 'hypotension', 'decreased lymphocyte', 'fatigue', 'fatal', 'hypertensive crisis', 'vision blurred', 'deep vein thrombosis', 'infusion site reaction', 'embolic and thrombotic events, venous', 'headache', 'cardiac failure', 'decreased lymphocytes', 'cardiac adverse events', 'hypertensive emergency', 'pulmonary edema', 'pulmonary embolism', 'myocardial infarction', 'decreased hemoglobin', 'influenza', 'paresthesia', 'hyperglycemia', 'neutropenia', 'abdominal pain upper', 'anemia', 'toothache', 'cardiac disorders', 'multi-organ failure', 'cardiac arrest', 'pres', 'hyperhidrosis', 'pulmonary hypertension', 'hypokalemia', 'dyspnea', 'dyspepsia', 'urinary tract infection', 'abdominal pain', 'decreased platelets', 'renal adverse events', 'venous thrombosis', 'muscular weakness', 'infections', 'cough', 'hus', 'arthralgia', 'multiple myeloma', 'constipation', 'hyponatremia'}
len(man)

128

In [74]:
evaluate(outputs, manual_ades, 'strict')
evaluate(outputs, manual_ades, 'lenient')

Running strict evaluation and saving results to disk.
OpenAI_gpt-3.5-turbo-0125_fatal-prompt-v2_pharmexpert-v0_temp0.5_train_run0


100%|██████████| 101/101 [00:02<00:00, 39.27it/s]


OpenAI_gpt-3.5-turbo-0125_fatal-prompt-v2_pharmexpert-v0_temp0.5_train_run1


100%|██████████| 101/101 [00:02<00:00, 40.66it/s]


OpenAI_gpt-3.5-turbo-0125_fatal-prompt-v2_pharmexpert-v0_temp0.5_train_run2


100%|██████████| 101/101 [00:02<00:00, 40.83it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0.5_train_run0


100%|██████████| 101/101 [00:02<00:00, 40.23it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0.5_train_run1


100%|██████████| 101/101 [00:02<00:00, 40.55it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0.5_train_run2


100%|██████████| 101/101 [00:02<00:00, 40.45it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp1.0_train_run0


100%|██████████| 101/101 [00:02<00:00, 40.64it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp1.0_train_run1


100%|██████████| 101/101 [00:02<00:00, 40.37it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp1.0_train_run2


100%|██████████| 101/101 [00:02<00:00, 40.61it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp1.5_train_run0


100%|██████████| 101/101 [00:02<00:00, 39.17it/s]


Running lenient evaluation and saving results to disk.
OpenAI_gpt-3.5-turbo-0125_fatal-prompt-v2_pharmexpert-v0_temp0.5_train_run0


100%|██████████| 101/101 [00:12<00:00,  8.34it/s]


OpenAI_gpt-3.5-turbo-0125_fatal-prompt-v2_pharmexpert-v0_temp0.5_train_run1


100%|██████████| 101/101 [00:13<00:00,  7.71it/s]


OpenAI_gpt-3.5-turbo-0125_fatal-prompt-v2_pharmexpert-v0_temp0.5_train_run2


100%|██████████| 101/101 [00:12<00:00,  8.39it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0.5_train_run0


100%|██████████| 101/101 [00:21<00:00,  4.73it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0.5_train_run1


100%|██████████| 101/101 [00:20<00:00,  4.84it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0.5_train_run2


100%|██████████| 101/101 [00:20<00:00,  4.95it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp1.0_train_run0


100%|██████████| 101/101 [00:20<00:00,  4.93it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp1.0_train_run1


100%|██████████| 101/101 [00:21<00:00,  4.79it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp1.0_train_run2


100%|██████████| 101/101 [00:20<00:00,  4.84it/s]


OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp1.5_train_run0


 19%|█▉        | 19/101 [00:06<00:27,  2.96it/s]

Unexpected exception formatting exception. Falling back to standard exception



Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/l5/4jn07y290nncb4wnyhsq33hh0000gt/T/ipykernel_65176/1979341734.py", line 2, in <module>
    evaluate(outputs, manual_ades, 'lenient')
  File "/Users/TatonettiN/Projects/onsides-task1/evaluation_functions.py", line 112, in evaluate
    if eval_method == 'embed':
  File "/Users/TatonettiN/Projects/onsides-task1/evaluation_functions.py", line 98, in evaluation_granular
    else:
  File "/Users/TatonettiN/Projects/onsides-task1/evaluation_functions.py", line 68, in evaluation_subtype
  File "/Users/TatonettiN/Projects/onsides-task1/common_string.py", line 42, in common_lenient_performance
    common = [longest_common_substring_percentage(man_out, x) for x in gpt_output]
  File "/Users/TatonettiN/Projects/onsides-task1/common_string.py", 

Encountered an exception for row: SAPHRIS all-concat. Error message below:
Request timed out.


In [11]:
# if using embeddings -- run this once:
# get embeddings for manual annotation --- this part is slow -- but should take <5 min
embed_model_name = 'llmrails/ember-v1'
embed_model = SentenceTransformer(embed_model_name)
man_embeds = embed_model.encode(manual_ades['reaction_string'].tolist())
manual_ades['embeds'] = list(man_embeds)

In [12]:
evaluate(outputs, manual_ades, 'embed', embed_model=embed_model, embed_model_name=embed_model_name)

Running embed evaluation and saving results to disk.
OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run0
saving results to results/OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run0_ember-v1_granular.csv and results/OpenAI_gpt-4-1106-preview_fatal-prompt-v2_pharmexpert-v0_temp0_train_run0_ember-v1_overall.csv


  output['gpt_output'] = output['gpt_output'].str.lower().str.replace('.', '').str.replace('\n-', ', ').str.split(', ')
  9%|▉         | 9/101 [00:15<02:41,  1.75s/it]


KeyboardInterrupt: 