In [1]:
# !pip3 install openai

In [2]:
# Importing necessary libraries
import os
import openai
from openai import OpenAI
import json
import pandas as pd

In [3]:
client = OpenAI()
OpenAI.api_key = os.environ.get('OPENAI_API_KEY')

In [4]:
def analyze_conversation(conversation):
    medical_specialties = """
        allergies, skin, emergencies, general, internal, genetics, brain, 
        women, eyes, diseases, children, operations, urinary, heart, hormones, 
        digestion, infections, kidneys, lungs, sports, operations, bones, vessels, throat
    """

    conversation_context = """
        diagnostic, treatment, follow-up, preventive, mental, education, emergency
    """

    system_message = f"""Determine if the following medical case is by picking one from the list:urgent, non-urgent.
                        identify the relevant medical specialty from this list: {medical_specialties}
                        In addition, please also define the context of the conversation from on of the following list: {conversation_context}
                        
                        Please return your response in json format as the following. Make sure all values and keys are in lower case.
                        {{
                            "triage": "non-urgent",
                            "speciality": "general",
                            "context": "diagnostic"
                        }}
                        """

    conversation_example = """
    patient: what will happen after the incubation period for covid 19?
    doctor: in brief: symptoms if you are infected, symptoms will emerge: tiredness, dry cough, fever worsening over 5-14 days. 
    you will also become more infective so self-isolation and good hygiene are vital.
    """

    expected_response = """
    {
        "triage": "non-urgent",
        "speciality": "general",
        "context": "diagnostic"
    }
    """

    try:
        completion = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": conversation_example},
                {"role": "system", "content": expected_response},
                {"role": "user", "content": conversation}
            ]
        )
        raw_response = completion.choices[0].message
        result_dict = json.loads(raw_response.content)

        return result_dict
    except Exception as e:
        print(f"An error occurred: {e}")
        return {
            "triage": "error",
            "speciality": "error",
            "context": "error"
        }

    print(completion.choices[0].message)

In [5]:
# conversation1 = """
#     patient: i have a tight and painful chest with a dry cough, no fever and no headaches. could it possibly be coronavirus?
#     doctor: possible. top symptoms include fever, dry cough and sob. an obvious possibility. 
#     if so, your best step is to self-quarntine. remember at your age low risk of complication and 
#     typically will pass without issue. if worsening sob be seen. call your provider or check with local health department. 
#     these are healthtap guidelines: https://www.healthtap.com/blog/covid-19-care-guidelines/self-quarantine-guide.
# """

# conversation2 = """
#     patient: what will happen after the incubation period for covid 19?
#     doctor: in brief: symptoms if you are infected, symptoms will emerge: tiredness, dry cough, fever worsening over 5-14 days. 
#     you will also become more infective so self-isolation and good hygiene are vital.
#     only be concerned about covid-19 if: - you have been in contact with someone with a conformed diagnosis of covid-19 - you have visited a high risk area - symptoms worsen and include persistent fever and dry cough would you like to video or text chat with me?
# """

# conversation3 = "Hello how are you. I am so happy."

# response = analyze_conversation(conversation3)
# response

In [6]:
# Example of a conversation to analyze
# Uncomment below lines to test the function
conversation_example = """
Patient: I've been experiencing severe chest pain for the last two hours.
Doctor: Do you have any other symptoms?
Patient: Yes, I'm feeling dizzy and my left arm feels numb.
"""

result_dict = analyze_conversation(conversation_example)
result_dict

{'triage': 'urgent', 'speciality': 'heart', 'context': 'emergency'}

In [7]:
# df = pd.read_csv('datasets/final_dataset.csv')
# df.head()

In [8]:
# df.isna().sum()

In [9]:
# df['triage'] = None
# df['speciality'] = None
# df['context'] = None
# df['need_analyze'] = True

In [10]:
# Initialize triage datase.t
# df.to_csv("datasets/triage_dataset_raw.csv", index=False)

In [11]:
df = pd.read_csv('datasets/triage_dataset_raw.csv')
df.head()

  df = pd.read_csv('datasets/triage_dataset.csv')


Unnamed: 0,Description,Patient,Doctor,triage,speciality,context,need_analyze
0,"If you are a doctor, please answer the medical...","I am 35 years old unmarried , i was diagonized...",you should get a few other labs checked if thi...,non-urgent,infections,diagnostic,False
1,What could be the lump in abdominal section?,I have been having abdominal pain and burning ...,Hi...it can be gastroenteritis... take a cours...,non-urgent,digestion,diagnostic,False
2,what are the side effects of thyroxine 100 mg ...,"sir, Day before yesterday i had an oil fried i...","Hi, You suffer from some colon infection due ...",urgent,general,diagnostic,False
3,"If you are a doctor, please answer the medical...","friend has a lump where their coccyx is, has b...","hello and welcome to chatbot, a painful lump o...",urgent,orthopedics,diagnostic,False
4,Answer this question truthfully,Which demographic should raise suspicion of a ...,The demographic that should alert you to possi...,non-urgent,prevention,educational,False


In [12]:
df.shape

(349320, 7)

In [13]:
# Define the chunk size (number of rows per loop)
chunk_size = 100

# Calculate the total number of chunks
total_chunks = len(df) // chunk_size + 1
print("Total chunks: ", total_chunks)

Total chunks:  3494


In [14]:
def extract_info(row):
    
    if row['need_analyze']:
        conversation = "Patient: " + row['Patient'] + "\nDoctor: " + row['Doctor']
        result = analyze_conversation(conversation)
        
        row['triage'] = result['triage']
        row['speciality'] = result
        row['context'] = result['context']
        row['need_analyze'] = False

    return row

In [None]:
# Loop through the DataFrame in chunks
for i in range(total_chunks):
    start_idx = i * chunk_size
    end_idx = (i + 1) * chunk_size
    current_chunk = df.iloc[start_idx:end_idx]

    print(f"Processing chunk {i + 1}/{total_chunks}: ")

    if not current_chunk.iloc[0]['need_analyze']:
        print('Skip.')
        continue;

    current_chunk = current_chunk.apply(lambda row: extract_info(row), axis=1)
    df.iloc[start_idx:end_idx] = current_chunk
    
    df.to_csv("datasets/triage_dataset_raw.csv", index=False)
    print("Complete.")

    # Process the current chunk (e.g., perform computations, transformations, etc.)
    # Replace the following print statement with your desired processing logic

In [61]:
current_chunk.head()

Unnamed: 0,Description,Patient,Doctor,triage,speciality,context,need_analyze
600,"If you are a doctor, please answer the medical...",I took my husband to the ER today with an abce...,hi and thank you so much for this query. i am ...,,,,True
601,"If you are a doctor, please answer the medical...",Hi I am suffering pain in Testicle and also ge...,hit hank you for asking chatdoctori have gone ...,,,,True
602,What causes painful lump near ear?,I had an ultrasound which showed tumors in my ...,HI.There are no known diseases that cause the ...,,,,True
603,What is the best medication for thin and less ...,"My hair have become very thin, and less. I con...",Hello. Thank you for writing to us at healthca...,,,,True
604,Please summerize the given abstract to a title,"With the relative ubiquity of smartphones, con...",Adoption of COVID-19 Contact Tracing Apps: A B...,,,,True


In [129]:
analyzed = df['need_analyze'] == False
triage_data = df[analyzed].rename(columns = {"Patient": "question"})

In [130]:
triage_data['triage'] = triage_data['triage'].str.strip().str.lower()

In [131]:
def make_triage_uniform(row):
    if row['triage'] == 'urgency' or row['triage'] == 'emergency':
        row['triage'] = 'urgent'
    elif row['triage'] == 'preventive':
        row['triage'] = 'non-urgent'
    return row

In [132]:
triage_data = triage_data.apply(lambda row: make_triage_uniform(row), axis=1)

In [133]:
error = triage_data['triage'] == 'error'
triage_data = triage_data.drop(index=triage_data[error].index)

In [134]:
filter = triage_data['triage'] == 'preventive'
triage_data[filter]['question']

Series([], Name: question, dtype: object)

In [135]:
triage_data['triage'].unique()

array(['non-urgent', 'urgent'], dtype=object)

In [136]:
triage_data.drop(columns=['Description', 'Doctor', 'speciality', 'context', 'need_analyze'], inplace=True)

In [139]:
triage_data.shape

(42513, 2)

In [137]:
triage_data.head()

Unnamed: 0,question,triage
0,"I am 35 years old unmarried , i was diagonized...",non-urgent
1,I have been having abdominal pain and burning ...,non-urgent
2,"sir, Day before yesterday i had an oil fried i...",urgent
3,"friend has a lump where their coccyx is, has b...",urgent
4,Which demographic should raise suspicion of a ...,non-urgent


In [138]:
triage_data.to_csv("datasets/triage_dataset.csv", index=False)