In [2]:
# Define the system message for the evaluation
system_message_judge = """
System Message for Auto-Evaluation:

You are tasked with evaluating a doctor-patient dialogue transcript based on predefined criteria. Your evaluation will consist of answering specific questions about the transcript with Yes or No responses. However, your output should be 1 for Yes and 0 for No. Generate a CSV row with the appropriate 1 or 0 for each question in the order specified below.

Instructions:
- Carefully review the transcript.
- For each question, determine if the answer is Yes or No.
- Output 1 for Yes and 0 for No.
- Generate a single CSV row with your responses, maintaining the order of the questions as provided.

Evaluation Criteria and CSV Column Names:

2. Patient Name
    -Was the patient's name mentioned? Dont write the name of the patient. Just 1 or 0 based on instruction
3. Dyspnea (shortness of breath) at rest, while walking or climbing stairs?
    - Did the doctor ask about dyspnea (shortness of breath) at rest, while walking, or climbing stairs?
4. Paroxysmal Nocturnal Dyspnea (PND) (sudden shortness of breath at night)
    - Did the doctor ask about paroxysmal nocturnal dyspnea (PND) (sudden shortness of breath at night)?
5. Orthopnea (shortness of breath while lying flat)
    - Did the doctor ask about orthopnea (shortness of breath while lying flat)?
6. Ankle edema or general lower extremity swelling (swelling in ankles or legs)
    - Did the doctor ask about ankle edema or general lower extremity swelling (swelling in ankles or legs)?
7. Nocturnal cough (coughing at night)
    - Did the doctor ask about nocturnal cough (coughing at night)?
8. Chest pain
    - Did the doctor ask about chest pain?
9. Fatigue
    - Did the doctor ask about fatigue?
10. Worsening mental status that is acute (sudden confusion or mental changes)
    - Did the doctor ask about worsening mental status that is acute (sudden confusion or mental changes)?
11. Temperature (body temperature)
    - Did the doctor measure the patient's temperature (body temperature)?
12. Heart rate (pulse)
    - Did the doctor measure the patient's heart rate (pulse)?
13. Respiratory rate (number of breaths per minute)
    - Did the doctor measure the patient's respiratory rate (number of breaths per minute)?
14. Oxygen saturation (oxygen level in the blood)
    - Did the doctor measure the patient's oxygen saturation (oxygen level in the blood)?
15. Blood pressure (systolic and diastolic) (upper and lower blood pressure numbers)
    - Did the doctor measure the patient's blood pressure (systolic and diastolic) (upper and lower blood pressure numbers)?
16. Weight (body weight)
    - Did the doctor measure the patient's weight (body weight)?
17. Did the doctor ask the patient what medications they are on?
    - Did the doctor ask the patient what medications they are on?
18. Be sympathetic when the patient reports symptoms or pain.
    - Was the doctor sympathetic when the patient reported symptoms or pain?
19. Reminder: "Please remember to contact your healthcare provider if you notice any significant changes or worsening of symptoms."
    - Did the doctor remind the patient to contact their healthcare provider if they notice any significant changes or worsening of symptoms?
20. End the Conversation: Express care and encourage the patient to reach out if they need further assistance.
    - Did the doctor express care and encourage the patient to reach out if they need further assistance at the end of the conversation?
21. Premature end to conversation?
    - Was there a premature end to the conversation?
22. Conversation flows naturally: no repetition
    - Did the conversation flow naturally without repetition?
23. Patient uses plain language
    - Did the patient use plain language?
24. Patient is consistent about their symptoms
    - Was the patient consistent about their symptoms?
25. Patient doesn’t invent information that contradicts the prompt (confabulations)
    - Did the patient avoid inventing information that contradicts the prompt (confabulations)?
26. Patient allows the doctor to ask questions and doesn’t take over direction of the conversation (offering information before it’s asked for)
    - Did the patient allow the doctor to ask questions and not take over the direction of the conversation (offering information before it’s asked for)?


Please review the transcript and provide a CSV row with 1 for Yes and 0 for No for each of the above criteria in the specified order.

Here is the Transcript:
"""


# took out this 

# 1. Transcript Number: 
#     -insert the Transcript Number here. this is located at the top of the prompt

# 27. Any additional observations or suggestions for improvement?
#     - Are there any additional observations or suggestions for improvement?

In [3]:
import os
import json
import csv
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from dotenv import load_dotenv

# Load API keys from .env file
load_dotenv()

os.environ["LANGCHAIN_ENDPOINT"] = os.getenv("LANGCHAIN_ENDPOINT")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

# Specify the path to your JSON file
json_file_path = '/Users/faridgholitabar/Coding/reco/reco_analysis/data/patients/patients_1.0_with_transcripts.json'

# Open and read the JSON file
with open(json_file_path, 'r') as json_file:
    patients = json.load(json_file)

# List of transcript numbers to process
transcript_numbers = [
    '19557627', '19516114', '18136989', '17707918', '17566649',
    '15345003', '15338322', '14807966', '14540393', '14185111',
    '13912736', '13727871', '13727153', '13166275', '12807868',
    '12390274', '12305811', '11242742', '11080025', '10339317'
]

# Specify the CSV file path (make sure it is a file, not a directory)
csv_file_path = '/Users/faridgholitabar/Coding/reco/reco_analysis/notebooks/judge/output.csv'  

# Create the ChatOpenAI model instance
# model = ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo-1106')
model = ChatOpenAI(temperature=0.7, model_name='gpt-4-turbo')


# Function to validate and parse the response
def parse_response(response_content, expected_fields=25):
    response_list = response_content.split(',')

    # Ensure the response has the expected number of fields
    if len(response_list) < expected_fields:
        # Handle the case where there are missing fields
        response_list += [''] * (expected_fields - len(response_list))
    elif len(response_list) > expected_fields:
        # Handle the case where there are extra fields
        response_list = response_list[:expected_fields]

    # Function to validate and convert to int (0 or 1)
    def validate_and_convert(value):
        try:
            # Convert to int and ensure it's either 0 or 1
            int_value = int(value)
            return int_value if int_value in [0, 1] else 0
        except ValueError:
            # Return 0 if conversion fails
            return 0

    # Apply validation and conversion to each field
    response_list = [validate_and_convert(value.strip()) for value in response_list]

    return response_list



# Write the header to the CSV file
with open(csv_file_path, 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    header = [
        "Transcript Number",
        "Patient Name",
        "Dyspnea (shortness of breath) at rest, while walking or climbing stairs?",
        "Paroxysmal Nocturnal Dyspnea (PND) (sudden shortness of breath at night)",
        "Orthopnea (shortness of breath while lying flat)",
        "Ankle edema or general lower extremity swelling (swelling in ankles or legs)",
        "Nocturnal cough (coughing at night)",
        "Chest pain",
        "Fatigue",
        "Worsening mental status that is acute (sudden confusion or mental changes)",
        "Temperature (body temperature)",
        "Heart rate (pulse)",
        "Respiratory rate (number of breaths per minute)",
        "Oxygen saturation (oxygen level in the blood)",
        "Blood pressure (systolic and diastolic) (upper and lower blood pressure numbers)",
        "Weight (body weight)",
        "Did the doctor ask the patient what medications they are on?",
        "Be sympathetic when the patient reports symptoms or pain.",
        "Reminder: \"Please remember to contact your healthcare provider if you notice any significant changes or worsening of symptoms.\"",
        "End the Conversation: Express care and encourage the patient to reach out if they need further assistance.",
        "Premature end to conversation?",
        "Conversation flows naturally: no repetition",        
        "Patient uses plain language",
        "Patient is consistent about their symptoms",
        "Patient doesn’t invent information that contradicts the prompt (confabulations)",
        "Patient allows the doctor to ask questions and doesn’t take over direction of the conversation (offering information before it’s asked for)"
    ]
    csv_writer.writerow(header)

    # Loop through each transcript number, invoke the model, and write the results
    for transcript_number in transcript_numbers:
        if transcript_number in patients:
            transcript = patients[transcript_number]['chat_transcript']
            full_prompt = f"Transcript Number: {transcript_number}\n\n{system_message_judge}\n{transcript}"
            # print(full_prompt)
            # print("____________________________________")
            # Define the messages for the chat model
            messages = [
                SystemMessage(content=full_prompt),
            ]
            
            # Get the response
            response = model.invoke(messages)
            response_content = response.content.strip()
            
            # Process the response to extract the CSV row
            csv_row = parse_response(response_content)
            csv_row.insert(0, transcript_number)  # Add transcript number as the first column
            
            # Write the CSV row to the file
            csv_writer.writerow(csv_row)

print(f"CSV file has been created at: {csv_file_path}")


CSV file has been created at: /Users/faridgholitabar/Coding/reco/reco_analysis/notebooks/judge/output.csv
