## Import and Setup

In [47]:
import os
import json
import csv
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from dotenv import load_dotenv

In [48]:
# Load API keys from .env file
load_dotenv()

os.environ["LANGCHAIN_ENDPOINT"] = os.getenv("LANGCHAIN_ENDPOINT")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [49]:
# Create the ChatOpenAI model instance
model = ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo-1106')
# model = ChatOpenAI(temperature=0.7, model_name='gpt-4-turbo')

In [50]:
### Specify patient transcript file to read in
transcripts_version = 1.0

## System Prompt

In [51]:
# Define the system message for the evaluation
system_message_summary_judge = """
System Message for Auto-Evaluation:

You are tasked with evaluating a summary of a doctor-patient dialogue transcript based on predefined criteria. Your evaluation will consist of answering specific questions about the summary with Yes or No responses. However, your output should be 1 for Yes and 0 for No. Generate a CSV row with the appropriate 1 or 0 for each question in the order specified below.

Instructions:
- Carefully review the transcript and accompanying summary.
- For each question, determine if the answer is Yes or No.
- Output 1 for Yes and 0 for No.
- Generate a single CSV row with your responses, maintaining the order of the questions as provided.

Evaluation Criteria and CSV Column Names:

2. Summary introduces patient by name and reports and reports their primary symptom(s)
    - Does the summary introduce patient by name and report their primary symptom(s)? Don't write the name of the patient. Just 1 or 0 based on instruction
3. Summary reports patient's current symptoms
    - Does the summary reports the patient's current symptoms?
4. Summary reports patient's vital signs
    - Did the summary report the patient's vital signs?
5. Summary reports patient's medications
    - Did the summary report the patient's medications?
6. Summary includes summarizing bullet points at the bottom that give an overview of the content of the transcript
    - Does the summary include summarizing bullet points at the bottom that give an overview of the content of the transcript?
7. No disagreement between symptoms in summary and symptoms in transcript
    - Can you confirm that there is no disagreement between symptoms in the summary and symptoms in the transcript?
8. No disagreement between vital signs in summary and vital signs in transcript
    - Can you confirm that there is no disagreement between vital signs in the summary and vital signs in the transcript?
9. No disagreement between meds in summary and meds in transcript
    - Can you confirm that there is no disagreement between meds in the summary and meds in the transcript?
10. Formatting of summary is consistent and appropriate
    - Is the formatting of the summary consistent and appropriate?
11. Language in summary is fitting for the audience (doctor)
    - Is the language in the summary fitting for the audience (a doctor)?
12. Summarization engine does not try to diagnose
    - Can you confirm that the summary does include any attempt to diagnose?


Please review the summary and transcript and provide a CSV row with 1 for Yes and 0 for No for each of the above criteria in the specified order.

Here is the Summary, followed by the Transcript:
"""


# took out this 

# 1. Transcript Number: 
#     -insert the Transcript Number here. this is located at the top of the prompt

# 13. Any additional observations or suggestions for improvement?
#     - Are there any additional observations or suggestions for improvement?

## Import Transcript & Summary Files

In [52]:
# Specify the path to your JSON file
json_file_path = f"../data/patients/patients_{transcripts_version}_with_transcripts.json"

# Open and read the JSON file
with open(json_file_path, 'r') as json_file:
    patients = json.load(json_file)

# Specify the path to your summaries JSON file
summaries_json_file_path = f"../data/patients/patients_{transcripts_version}_summaries.json"

# Open and read the JSON file
with open(summaries_json_file_path, 'r') as json_file:
    summaries = json.load(json_file)

# Specify the CSV file path (make sure it is a file, not a directory)
csv_file_path = f"../data/evaluations/summaries_{transcripts_version}_evaluation.csv"

In [53]:
# Function to validate and parse the response
def parse_response(response_content, expected_fields=11):
    response_list = response_content.split(',')

    # Ensure the response has the expected number of fields
    if len(response_list) < expected_fields:
        # Handle the case where there are missing fields
        response_list += [''] * (expected_fields - len(response_list))
    elif len(response_list) > expected_fields:
        # Handle the case where there are extra fields
        response_list = response_list[:expected_fields]

    # Function to validate and convert to int (0 or 1)
    def validate_and_convert(value):
        try:
            # Convert to int and ensure it's either 0 or 1
            int_value = int(value)
            return int_value if int_value in [0, 1] else 0
        except ValueError:
            # Return 0 if conversion fails
            return 0

    # Apply validation and conversion to each field
    response_list = [validate_and_convert(value.strip()) for value in response_list]

    return response_list

In [54]:
# Write the header to the CSV file
with open(csv_file_path, 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    header = [
        "Transcript Number",
        "Summary introduces patient by name and reports and reports their primary symptom(s)",
        "Summary reports patient's current symptoms",
        "Summary reports patient's vital signs",
        "Summary reports patient's medications",
        "Summary includes summarizing bullet points at the bottom that give an overview of the content of the transcript",
        "No disagreement between symptoms in summary and symptoms in transcript",
        "No disagreement between vital signs in summary and vital signs in transcript",
        "No disagreement between meds in summary and meds in transcript",
        "Formatting of summary is consistent and appropriate",
        "Language in summary is fitting for the audience (doctor)",
        "Summarization engine does not try to diagnose"
    ]

    csv_writer.writerow(header)

    # Loop through each transcript number, invoke the model, and write the results
    for transcript_number in patients.keys():
        if transcript_number in patients:
            transcript = patients[transcript_number]['chat_transcript']
            summary = summaries[transcript_number]['summary']
            full_prompt = f"Transcript Number: {transcript_number}\n\n{system_message_summary_judge}\n{summary}\n{transcript}"
            # print(full_prompt)
            # print("____________________________________")
            # Define the messages for the chat model
            messages = [
                SystemMessage(content=full_prompt),
            ]
            
            # Get the response
            response = model.invoke(messages)
            response_content = response.content.strip()
            
            # Process the response to extract the CSV row
            csv_row = parse_response(response_content)
            csv_row.insert(0, transcript_number)  # Add transcript number as the first column
            
            # Write the CSV row to the file
            csv_writer.writerow(csv_row)

print(f"CSV file has been created at: {csv_file_path}")


CSV file has been created at: ../data/evaluations/summaries_1.0_evaluation.csv
