In [1]:
import os
import time
import json

import concurrent
import numpy as np
import pandas as pd
from tqdm import tqdm
from openai import OpenAI, AzureOpenAI
from sentence_transformers import SentenceTransformer

In [2]:
config = json.load(open('./config.json'))

organization = ""

api_source = 'OpenAI'

api_key = config[api_source]['openai_api_key'] #constants.AZURE_OPENAI_KEY
api_endpoint = config[api_source]['openai_api_endpoint'] 

gpt_model = config[api_source]["gpt_model"]
temperature = 0

In [3]:
nruns = 3

system_options = {
    "medical-expert-examples": """
    You are a medical professional. You will be asked to relate two conditions through 'is a' statements.
    For example, Haematemesis is GI bleeding.
    For example, Acute myocardial infarction is Acute myocardial infarction.
    For example, Hepatitis is Acute Liver Injury.
    You will be given similar statements and asked to respond with only "True" or "False".
    """,
}

prompt_options = {
    "is-a": """
{} is {}.
"""
}

In [4]:
system_name = "medical-expert-examples"
system_content = system_options[system_name]

prompt_name = "is-a"
prompt = prompt_options[prompt_name]

In [6]:
adr_section = pd.read_csv('./data/adverse_reactions_active_labels.csv')
meddra_pts = adr_section[['pt_meddra_term', 'pt_meddra_id']].drop_duplicates()
print(meddra_pts.shape)

(4370, 2)


In [7]:
def gpt_call(api_source, config, gpt_model, system_content, prompt, meddra_term, condition, temperature):
    if api_source == 'OpenAI':
        client = OpenAI(api_key=config[api_source]['openai_api_key'])
    elif api_source == 'Azure':
        client = AzureOpenAI(api_key=config[api_source]['openai_api_key'], api_version="2023-12-01-preview", azure_endpoint=config[api_source]['openai_api_endpoint'])
    else:
        raise Exception(f"Unexpected API source requested: {api_source}")
  
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": system_content},
            {
                "role": "user",
                "content": prompt.format(meddra_term, condition)
            }
        ],
        model=gpt_model,
        temperature=temperature,
    )
    term = chat_completion.choices[0].message.content
    return term

In [8]:
rows_to_run = meddra_pts.values.tolist()

In [9]:
def run_iteration(row):
    meddra_term = row[0]
    meddra_id = row[1]

    response = []
    for condition in ['Acute Kidney Injury', 'Acute Liver Injury',
                       'Acute Myocardial Infarction', 'GI bleeding']:    
        try:
            response.append(gpt_call(api_source, config, gpt_model, system_content,
                                      prompt, meddra_term, condition, temperature))
        except Exception as err:
            print(f"Encountered an exception for row: {meddra_term} {condition}. Error message below:")
            print(err)
            response.append([np.nan])
        
    return [meddra_term, meddra_id] + response

In [10]:
run_iteration(['Haematemesis', 1111111])
run_iteration(['acute liver injury', 1111111])

['acute liver injury', 1111111, 'False', 'True', 'False', 'False']

In [None]:
output = {}
for i in range(nruns):    
    results = list()
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=25) as exec:
        results.extend(list(tqdm(
            exec.map(run_iteration, rows_to_run), 
            total=len(rows_to_run)
        )))

    output[i] = results

 88%|████████████████████████████████▍    | 3826/4370 [1:26:27<10:22,  1.15s/it]

In [12]:
output[1][0]

['Abdominal discomfort', 10000059, 'False', 'False', 'False', 'False']

In [13]:
# ['Acute Kidney Injury', 'Acute Liver Injury',
#                        'Acute Myocardial Infarction', 'GI bleeding']
final = []
for key, r in output.items():
    final.extend([[key] + res for res in r])
gpt_output = pd.DataFrame(final, 
             columns=['run', 'meddra_term', 'meddra_pt_id', 'AKI',  'ALI', 'AMI', 'GIB'])

In [14]:
gpt_output['AKI'] = [1 if 'true' in x.lower() else 0 for x in gpt_output['AKI']]
gpt_output['ALI'] = [1 if 'true' in x.lower() else 0 for x in gpt_output['ALI']]
gpt_output['AMI'] = [1 if 'true' in x.lower() else 0 for x in gpt_output['AMI']]
gpt_output['GIB'] = [1 if 'true' in x.lower() else 0 for x in gpt_output['GIB']]


In [18]:
gpt_output.to_csv('data/gpt_annotation_meddra_pt_isa.csv', index=False)

In [16]:
# group by meddra term and sum the number of true statements
gpt_output.groupby(['meddra_term', 'meddra_pt_id']).sum().reset_index().head()

Unnamed: 0,meddra_term,meddra_pt_id,run,AKI,ALI,AMI,GIB
0,Abdominal abscess,10060921,3,0,0,0,0
1,Abdominal adhesions,10000050,3,0,0,0,0
2,Abdominal discomfort,10000059,3,0,0,0,0
3,Abdominal distension,10000060,3,0,0,0,0
4,Abdominal hernia,10060954,3,0,0,0,0


In [17]:
gpt_output.groupby(['meddra_term', 'meddra_pt_id']).sum().reset_index().query('GIB > 0')

Unnamed: 0,meddra_term,meddra_pt_id,run,AKI,ALI,AMI,GIB
180,Anal haemorrhage,10049555,3,0,0,0,3
1146,Diarrhoea haemorrhagic,10012741,3,0,0,0,3
1177,Diverticulum intestinal haemorrhagic,10013560,3,0,0,0,3
1204,Duodenal ulcer,10013836,3,0,0,0,3
1205,Duodenal ulcer haemorrhage,10013839,3,0,0,0,3
1340,Enterocolitis haemorrhagic,10014896,3,0,0,0,2
1576,Gastric antral vascular ectasia,10051585,3,0,0,0,3
1581,Gastric haemorrhage,10017788,3,0,0,0,3
1587,Gastric ulcer haemorrhage,10017826,3,0,0,0,3
1592,Gastritis haemorrhagic,10017866,3,0,0,0,3
