In [None]:
# Essential libraries for data manipulation and processing
import pandas as pd  # For data manipulation and analysis
import numpy as np  # For numerical operations and computations

# Utility libraries for progress tracking and time management
from tqdm import tqdm  # For progress bars during iterations
import time  # For time-related functions


In [None]:
prompt = f"""
Please review the patient's medical chart as a clinician. Analyze the medical notes provided and identify the primary cause of death. If multiple causes are present, list up to three in order of relevance, separated by commas without spaces.
-    	Use standardized medical diagnoses, avoiding terms that describe states or events (e.g., avoid 'asystole').
-    	When appropriate, generalize specific conditions to broader diagnoses (e.g., use 'sepsis' rather than 'severe sepsis').
-    	Keep diagnoses simple and consistent across all answers.
-    	If the cause of death cannot be determined, return 'NA'.
-    	Provide only the list of causes without any additional explanation.
Medical Notes: {note}
"""

In [None]:
import os
from openai import AzureOpenAI

client = AzureOpenAI(
  azure_endpoint = "",
  api_key="",  
  api_version=""
)


In [None]:
#df = pd.read_csv('../data/early_death_notes.csv') 

In [None]:
# Enable tqdm progress bars for Pandas apply operations
tqdm.pandas()

# Function to extract causes of death from a note using GPT model
def extract_causes_of_death(note):
    """
    Extracts up to three causes of death from a given medical note.
    
    Parameters:
        note (str): The medical note to process.
    
    Returns:
        list: A list of up to three causes of death, padded with None if fewer than three are found.
    """
    retries = 0
    max_retries = 5  # Maximum number of retries for rate-limited requests
    wait_time = 5  # Initial wait time between retries
    prompt = f"Extract up to three causes of death from the following note: {note}"

    while retries < max_retries:
        try:
            # Call the GPT model
            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}]
            )
            
            # Extract the assistant's reply and split into causes
            if response and response.choices[0].message.content:
                assistant_reply = response.choices[0].message.content.strip()
                causes = assistant_reply.split(',')
                # Return the causes, padding with None if fewer than three are found
                return causes if len(causes) == 3 else causes + [None] * (3 - len(causes))
        
        except Exception as e:
            # Handle rate-limiting errors
            if "429" in str(e):
                retries += 1
                print(f"Rate limit hit. Waiting {wait_time} seconds before retrying... (Attempt {retries}/{max_retries})")
                time.sleep(wait_time)
                wait_time *= 2  # Exponential backoff for retries
            else:
                # Handle other exceptions
                print(f"Error processing note: {e}")
                return [None, None, None]
    
    # Return placeholder if all retries fail
    print(f"Failed to process note after {max_retries} retries.")
    return [None, None, None]

In [None]:
# Apply the function to the 'note' column and create new columns for each cause of death
df[['Cause1', 'Cause2', 'Cause3']] = df['note'].progress_apply(
    lambda x: pd.Series(extract_causes_of_death(x))
)

# Add a delay to avoid overwhelming the API
time.sleep(1)  # 1-second delay between API calls


In [None]:
#df.to_csv('medical_data_with_causes.csv', index=False)