In [None]:
import json
import os
from pathlib import Path

import jsonschema
import pandas as pd
from dotenv import load_dotenv

from discharge_summaries.openai_llm.chat_models import AzureOpenAIChatModel
from discharge_summaries.openai_llm.message import Message, Role
from discharge_summaries.openai_llm.token_count import (
    num_tokens_from_messages_azure_engine,
)
from discharge_summaries.schemas.mimic import PhysicianNote
from discharge_summaries.schemas.rcp_guidelines import RCPGuidelines

In [None]:
load_dotenv()

In [None]:
AZURE_ENGINE = "gpt-4-32k"
AZURE_API_VERSION = "2023-07-01-preview"

In [None]:
rcp_schema = RCPGuidelines.schema()
example = json.loads((Path.cwd() / "example.json").read_text())
jsonschema.validate(example, rcp_schema)

In [None]:
notes_df = pd.read_excel(
    Path.cwd().parent
    / "data"
    / "rcp"
    / "5. Activity-practice discharge summary writing task_0.xlsx",
    sheet_name="Notes",
    header=4,
)
notes_df.rename({"Unnamed: 0": "timestamp", "Unnamed: 1": "text"}, axis=1, inplace=True)
notes_df.head()

In [None]:
blank_rows = notes_df.isnull().all(axis=1)
consecutive_blank_rows = blank_rows & blank_rows.shift(-1)

split_dfs = []
start_index = 0
for end_index in consecutive_blank_rows[consecutive_blank_rows].index:
    split_dfs.append(notes_df.iloc[start_index:end_index])
    start_index = end_index + 2
split_dfs.append(notes_df.iloc[start_index:])

In [None]:
notes = []

for split_df in split_dfs:
    notes.append(
        PhysicianNote(
            timestamp=split_df["timestamp"].tolist()[0],
            text="\n".join(split_df["text"].dropna().tolist()),
            hadm_id="0",
        )
    )

In [None]:
system_message = Message(
    role=Role.SYSTEM,
    content=f"""You are a consultant doctor tasked with writing a patients discharge summary.
Only the information in the clinical notes provided by the user can be used for this task.
Each clinical note has a title of the format Physician Note [number]: [timestamp].

The discharge summary must be written in accordance with the following json schema.
{json.dumps(RCPGuidelines.schema_json())}
If the information is not present to fill in a field, answer it with an empty string or list.

An example of a valid discharge summary is provided below.
{json.dumps(example)}
""",
)

In [None]:
notes_string = "\n\n".join(
    f"Clinical Note {idx+1}: {note.timestamp}\n{note.text}"
    for idx, note in enumerate(notes)
)
user_message = Message(
    role=Role.USER,
    content=f"""Generate a discharge summary json for the following clinical notes.
{notes_string}""",
)

In [None]:
llm = AzureOpenAIChatModel(
    api_base=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_KEY"),
    api_version=AZURE_API_VERSION,
    engine=AZURE_ENGINE,
    temperature=0,
    timeout=20,
)

In [None]:
response = llm.query([system_message, user_message])

In [None]:
(Path.cwd() / "output.json").write_text(
    json.dumps(json.loads(response.content), indent=4)
)

In [None]:
num_tokens_from_messages_azure_engine([system_message], AZURE_ENGINE, AZURE_API_VERSION)