In [2]:
from tqdm import tqdm
import pandas as pd
from openai import OpenAI
import openai
import constants
import csv
import numpy as np
import concurrent
import time

## Set Up

### Functions

In [3]:
# function for running GPT
def extract_ade_terms(gpt_model, prompt, text, openai_api):
  client = OpenAI(api_key=openai_api,)
  chat_completion = client.chat.completions.create(
      messages=[
          {"role": "system", "content": "You are an expert in pharmacology."},
          {
              "role": "user",
              "content": prompt.format(text)
          }
      ],
      model=gpt_model,
  )
  term = chat_completion.choices[0].message.content
  return term

In [4]:
def evaluation(manual_ades, gpt_output, limit = 1000):
    drugs = gpt_output['drug_name'].unique()
    drugs_set = set()
    results = []
    for drug in tqdm(drugs):
        drugs_set.add(drug)
        if len(drugs_set) > limit:
            break
        
        drug_df = manual_ades.query("(drug_name == '{}') & (section_name == 'adverse reactions')".format(drug))
        manual = set(drug_df['reaction_string'].to_list())
        gpt_drug = (gpt_output[
            (gpt_output['drug_name'] == drug)
            &
            (gpt_output['section_name'] == "adverse reactions")
            ]["gpt_output"].astype(str)
            .str.lower()
            .str.replace('\n-', ', ')
            .str.split(",").tolist())
    
        try:
            gpt_drug = [x.strip() for x in gpt_drug[0]]
            gpt_drug = set(gpt_drug)
        except:
            results.append([drug, len(manual), len(gpt_drug), np.nan, np.nan,
                             np.nan, np.nan, np.nan, np.nan])
            continue

        TP = len(manual.intersection(gpt_drug))
        FP = len(gpt_drug.difference(manual))
        FN = len(manual.difference(gpt_drug))
        precision = TP/(TP+FP)
        recall = TP/(TP+FN)
        if precision != 0 and recall != 0:
            f1 = (2 * precision * recall)/(precision + recall)# 2*TP/(2*TP+FP+FN)
        else:
            f1 = np.NAN

        results.append([drug, len(manual), len(gpt_drug), TP, FP, FN, precision, recall, f1])
    results = pd.DataFrame(results, columns=['drug_name', 'n_manual', 'n_gpt', 'tp', 'fp', 'fn', 'precision', 'recall', 'f1'])
    return results

In [5]:
openai.organization = ""
openai.api_key = '' #constants.AZURE_OPENAI_KEY

### Variables

In [99]:
drug_file = 'train_drug_label_text_potential_terms.csv'
manual_file = 'train_drug_label_text_manual_ades.csv'
my_max = 10000
gpt_model = 'gpt-4-1106-preview'

In [100]:
prompt_name = 'potential-terms-v3'
prompt = """
Extract all adverse reactions, including all synonyms, mentioned in the text and provide them as a comma-separated list. 
Use the list of potential adverse reactions as a starting point, but add any that are missing and remove any incorrect terms. 
If a fatal event is listed add 'death' to the list.
The list is :{l}. The text is :{t}. 
"""

## Run GPT

In [101]:
drugs = pd.read_csv(drug_file)
import ast
drugs['str_match'] = drugs['str_match'].apply(lambda x: ', '.join(ast.literal_eval(x)))
manual_ades = pd.read_csv(manual_file)
set_type = drug_file.split('_')[0] # assuming file follows format "train_..." or "test...."
print('{}_{}_{}.csv'.format(gpt_model, prompt_name, set_type))

gpt-4-1106-preview_potential-terms-v3_train.csv


In [102]:
# if there is a max
new_rows = list()
unique_drugs = set()
for i, row in drugs.iterrows():
    unique_drugs.add(row["drug_name"])
    if len(unique_drugs) > my_max: 
        break
    if row['section_name'] != 'adverse reactions':
        continue

    new_rows.append(row)

In [103]:
# function for running GPT
def extract_ade_terms(gpt_model, prompt, text, openai_api, potential_terms):
  client = OpenAI(api_key=openai_api,)
  chat_completion = client.chat.completions.create(
      messages=[
          {"role": "system", "content": "You are an expert in pharmacology."},
          {
              "role": "user",
              "content": prompt.format(t = text, l = potential_terms)
          }
      ],
      model=gpt_model,
  )
  term = chat_completion.choices[0].message.content
  return term

import ast
def run_iteration(row):
    name, section = row['drug_name'], row['section_name']
    text = row['section_text']
    potential_terms = row['str_match']
    #try:
    gpt_out = extract_ade_terms(gpt_model, prompt, text, openai.api_key, potential_terms)
    return [name, section, gpt_out]
    #except:
    #    return None

In [104]:
# run GPT    
start = time.time()
        
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as exec:
    results = list(tqdm(
		exec.map(run_iteration, new_rows), 
		total=len(new_rows)
	))

gpt_output = pd.DataFrame(
    [r for r in results if r is not None],
    columns=['drug_name', 'section_name', 'gpt_output']
)
gpt_output.to_csv('{}_{}_{}.csv'.format(gpt_model, prompt_name, set_type))
end = time.time()

  0%|          | 0/101 [00:00<?, ?it/s]

100%|██████████| 101/101 [02:28<00:00,  1.47s/it]


In [105]:
print(end - start)

148.34500002861023


In [106]:
results = evaluation(manual_ades, gpt_output)

100%|██████████| 101/101 [00:00<00:00, 552.90it/s]


In [107]:
[tp_total, fp_total, fn_total] =  results[['tp', 'fp', 'fn']].sum()
precision = tp_total/(tp_total+fp_total)
recall =  tp_total/(tp_total+fn_total)
f1 = (2 * precision * recall)/(precision + recall) # 2*tp_total/(2*tp_total+fp_total+fn_total) 
print("prompt: {}".format(prompt_name))
print("precision: {}\nrecall: {}\nf1: {}".format(precision, recall, f1)) 

prompt: potential-terms-v3
precision: 0.8615080237414816
recall: 0.7343076634813566
f1: 0.7928383572729112


In [39]:
# model, data, prompt, precision, recall, f1
with open('gpt_model_results.csv', 'a') as file:
    file.write('{}, {}, {}, {}, {}, {}\n'.format(gpt_model, set_type, prompt_name, precision, recall, f1))