In [1]:
import os
import ast
import concurrent
import json
import os
import pandas as pd
import tiktoken
from csv import writer
from IPython.display import display, Markdown, Latex
from openai import OpenAI
from PyPDF2 import PdfReader
from scipy import spatial
from tenacity import retry, wait_random_exponential, stop_after_attempt
from tqdm import tqdm
from termcolor import colored

GPT_MODEL = "gpt-4o-mini"
EMBEDDING_MODEL = "text-embedding-ada-002"
client = OpenAI()

In [2]:
# chat completion request
@retry(wait=wait_random_exponential(min=1, max=40), stop=stop_after_attempt(3))
def chat_completion_request(messages, tools=None, model=GPT_MODEL):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            tools=tools,
            tool_choice="required",
        )
        return response
    except Exception as e:
        print("Unable to generate ChatCompletion response")
        print(f"Exception: {e}")
        return e

In [3]:
# conversation completion request
class Conversation:
    def __init__(self):
        self.conversation_history = []

    def add_message(self, role, content):
        message = {"role": role, "content": content}
        self.conversation_history.append(message)

    def display_conversation(self, detailed=False):
        role_to_color = {
            "system": "red",
            "user": "green",
            "assistant": "blue",
            "function": "magenta",
        }
        for message in self.conversation_history:
            print(
                colored(
                    f"{message['role']}: {message['content']}\n\n",
                    role_to_color[message["role"]],
                )
            )

In [4]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "read_conversation_and_summarize_into_electronic_medical_record",
            "description": """This function reads a conversation between a doctor and a patient or the patient's family, 
            extracts key information, and generates an electronic medical record in standard written format.""",
            # "strict": True,
            "parameters": {
                "type": "object",
                "properties": {
                    "presenting_complaint": {
                        "type": "array",
                        "description": "A list of the patient's main health concerns or reasons for seeking medical attention, which may include multiple complaints.",
                        "items": {
                            "type": "string",
                            "description": "A presenting complaint.",
                        },
                    },
                    "symptoms": {  
                        "type": "array",
                        "description": "A list of symptoms reported by the patient, which may include multiple distinct symptoms.",
                        "items": {
                            "type": "string",
                            "description": "A symptom.",
                        },
                    },
                    "physical_examination_findings": {
                        "type": "array",
                        "description": "Results from the physical examination, including vital signs (e.g., blood pressure, temperature, pulse) and evaluations of organ and body functions.",
                        "items": {
                            "type": "string",
                            "description": "A specific finding from the physical examination.",
                        },
                    },
                    "test_results": {
                        "type": "array",
                        "description": "Results from diagnostic tests such as blood tests, urine tests, or imaging (e.g., X-rays, CT, MRI).",
                        "items": {
                            "type": "string",
                            "description": "A specific test result.",
                        },
                    },
                    "diagnosis": {
                        "type": "string",
                        "description": "The diagnosis made by the doctor, including the name of the disease or condition.",
                    },
                    "treatment_plan": {
                        "type": "array",
                        "description": "The treatment plan prescribed by the doctor, which may include medications, surgeries, rehabilitation, or lifestyle advice.",
                        "items": {
                            "type": "string",
                            "description": "A specific treatment recommendation or action plan.",
                        },
                    },
                    "prescription_info": {
                        "type": "array",
                        "description": "Details of prescribed medications, including the drug name, dosage, route of administration, and duration of treatment.",
                        "items": {
                            "type": "string",
                            "description": "Details of a prescribed medication.",
                        },
                    },
                    "follow_up": {
                        "type": "array",
                        "description": "Information about follow-up plans, including the next scheduled appointment and observations of the patient's progress or changes in their condition.",
                        "items": {
                            "type": "string",
                            "description": "A specific follow-up detail or plan.",
                        },
                    },
                },
                "required": ["presenting_complaint", "symptoms", "diagnosis", "treatment_plan"],  
                "additionalProperties": False,
            },
        },
    },
]


In [5]:
system_message = "You are a medical assistant. You are talking to a patient who is describing their symptoms to you. You need to summarize the conversation into the patient's electronic medical record."
conversation=Conversation()
conversation.add_message("system", system_message)

In [6]:
test_message = """
1
00:00:00,000 --> 00:00:03,600
SPEAKER_01|うつ病の診断ってどうやるんですか?具体的に

2
00:00:03,600 --> 00:00:08,140
SPEAKER_00|うつ病の診断は、普通に症状がどれぐらいあるかだよね

3
00:00:08,140 --> 00:00:12,439
SPEAKER_00|落ち込んでるとか、調子が悪いとか、寝れてないとか、食べれてないとか

4
00:00:12,439 --> 00:00:14,339
SPEAKER_00|そういうことやる気が出てないとか

5
00:00:14,339 --> 00:00:18,379
SPEAKER_01|患者さんが来てそれを言ったら、じゃああなたうつ病ですって診断するんですか?

6
00:00:18,580 --> 00:00:21,300
SPEAKER_00|まあそうだよね、うつ症状だよね

7
00:00:21,300 --> 00:00:25,400
SPEAKER_00|それであとは時系列を聞いたりとか、いつ頃からなったのかで

8
00:00:25,400 --> 00:00:30,900
SPEAKER_00|うつ病なのか、PTSDなのか、発達障害、認知障害としてうつ状態なのか

9
00:00:30,900 --> 00:00:35,679
SPEAKER_00|一過性のストレスなのか、適応障害なのか、まあいろいろ判断したりするけどね

10
00:00:35,679 --> 00:00:36,020
SPEAKER_00|ありがとうございます


"""

In [7]:
conversation.add_message("user", test_message)
chat_response = chat_completion_request(conversation.conversation_history, tools=tools)
print(chat_response)

ChatCompletion(id='chatcmpl-ACgoWYU83FEXf8UejwI0vtQMjjhzR', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_IQFhtlKu4N9sLSMTjOV6HBmt', function=Function(arguments='{"presenting_complaint":["うつ病の診断","うつ症状","調子が悪い","睡眠障害","食事障害"],"symptoms":["落ち込んでいる","やる気が出ていない","寝れていない","食べれていない"],"diagnosis":"うつ病","treatment_plan":["時系列の評価","他の精神障害との鑑別診断を行う"]}', name='read_conversation_and_summarize_into_electronic_medical_record'), type='function')]))], created=1727587876, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_f85bea6784', usage=CompletionUsage(completion_tokens=118, prompt_tokens=857, total_tokens=975, completion_tokens_details={'reasoning_tokens': 0}))


In [8]:
print(chat_response.choices[0].message.tool_calls[0].function.arguments)

{"presenting_complaint":["うつ病の診断","うつ症状","調子が悪い","睡眠障害","食事障害"],"symptoms":["落ち込んでいる","やる気が出ていない","寝れていない","食べれていない"],"diagnosis":"うつ病","treatment_plan":["時系列の評価","他の精神障害との鑑別診断を行う"]}


In [1]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "read_conversation_and_summarize_into_electronic_medical_record",
            "description": """
            あなたは医療アシスタントです。医師と患者、または患者の家族との対話から、以下の重要な医療情報を抽出し、電子カルテに記録してください:
            - 主訴 (presenting_complaint)
            - 症状 (symptoms)
            - 身体所見 (physical_examination_findings)
            - 検査結果 (test_results)
            - 診断 (diagnosis)
            - 治療方針 (treatment_plan)
            - 処方内容 (prescription_info)
            - フォローアップ計画 (follow_up)

            **重要な注意事項**:
            - すでに記録された情報がある場合、それを再度記録しないでください。同じ情報を異なる表現で述べていても、既に記録された情報と同じであれば記録する必要はありません。
            - すでに記録された情報は{previous_summary}に含まれています。以下に示す内容はすでに記録されています:
              - 主訴: {previous_summary['presenting_complaint']}
              - 症状: {previous_summary['symptoms']}
              - 診断: {previous_summary['diagnosis']}
              - 治療方針: {previous_summary['treatment_plan']}
            
            **新しい情報のみ**を以下の対話から抽出してください:
            {current_sentence}
            """,
            "parameters": {
                "type": "object",
                "properties": {
                    "current_sentence": {
                        "type": "string",
                        "description": "新しい対話内容。この対話から新しい医療情報を抽出します。",
                    },
                    "previous_summary": {
                        "type": "object",
                        "description": "すでに記録された電子カルテの情報。",
                        "properties": {
                            "presenting_complaint": {
                                "type": "array",
                                "description": "すでに記録された主訴のリスト。",
                                "items": {
                                    "type": "string"
                                }
                            },
                            "symptoms": {
                                "type": "array",
                                "description": "すでに記録された症状のリスト。",
                                "items": {
                                    "type": "string"
                                }
                            },
                            "physical_examination_findings": {
                                "type": "array",
                                "description": "すでに記録された身体所見のリスト。",
                                "items": {
                                    "type": "string"
                                }
                            },
                            "test_results": {
                                "type": "array",
                                "description": "すでに記録された検査結果のリスト。",
                                "items": {
                                    "type": "string"
                                }
                            },
                            "diagnosis": {
                                "type": "string",
                                "description": "すでに記録された診断内容。",
                            },
                            "treatment_plan": {
                                "type": "array",
                                "description": "すでに記録された治療計画のリスト。",
                                "items": {
                                    "type": "string"
                                }
                            },
                            "prescription_info": {
                                "type": "array",
                                "description": "すでに記録された処方内容のリスト。",
                                "items": {
                                    "type": "string"
                                }
                            },
                            "follow_up": {
                                "type": "array",
                                "description": "すでに記録されたフォローアップ計画のリスト。",
                                "items": {
                                    "type": "string"
                                }
                            },
                        }
                    },
                    "presenting_complaint": {
                        "type": "array",
                        "description": "新しく抽出された患者が訴えている主な健康上の問題や、診察を求める理由。",
                        "items": {
                            "type": "string"
                        },
                    },
                    "symptoms": {  
                        "type": "array",
                        "description": "新しく抽出された患者が報告する症状。",
                        "items": {
                            "type": "string"
                        },
                    },
                    "physical_examination_findings": {
                        "type": "array",
                        "description": "新しく抽出された身体検査の所見。",
                        "items": {
                            "type": "string"
                        },
                    },
                    "test_results": {
                        "type": "array",
                        "description": "新しく抽出された検査結果。",
                        "items": {
                            "type": "string"
                        },
                    },
                    "diagnosis": {
                        "type": "string",
                        "description": "新しく抽出された診断。",
                    },
                    "treatment_plan": {
                        "type": "array",
                        "description": "新しく抽出された治療計画。",
                        "items": {
                            "type": "string"
                        },
                    },
                    "prescription_info": {
                        "type": "array",
                        "description": "新しく抽出された処方の詳細。",
                        "items": {
                            "type": "string"
                        },
                    },
                    "follow_up": {
                        "type": "array",
                        "description": "新しく抽出されたフォローアップの詳細。",
                        "items": {
                            "type": "string"
                        },
                    },
                },
                "required": ["previous_summary", "current_sentence", "presenting_complaint", "symptoms", "diagnosis", "treatment_plan"],  
                "additionalProperties": False,
            },
        },
    },
]


import openai
import pandas as pd

class Conversation:
    def __init__(self):
        self.conversation_history = []

    def add_message(self, role, content):
        message = {"role": role, "content": content}
        self.conversation_history.append(message)

    def display_conversation(self, detailed=False):
        role_to_color = {
            "system": "red",
            "user": "green",
            "assistant": "blue",
            "function": "magenta",
        }
        for message in self.conversation_history:
            print(
                colored(
                    f"{message['role']}: {message['content']}\n\n",
                    role_to_color[message["role"]],
                )
            )
conv = Conversation()
system_prompt = """
あなたは医療アシスタントとして、医師と患者の会話から病歴に関する情報を抽出する役割を担っています。次の項目に基づいて情報を整理してください（すべての項目が必ずしも含まれているわけではありませんが、存在する情報は抽出してください）：
- 主訴 (presenting_complaint)
- 症状 (symptoms)
- 身体所見 (physical_examination_findings)
- 検査結果 (test_results)
- 診断 (diagnosis)
- 治療方針 (treatment_plan)
- 処方内容 (prescription_info)
- フォローアップ計画 (follow_up)

以下の会話から、それぞれの項目に該当する情報を抽出し、順次電子カルテ(MER)を作成してください:
"""
conv.add_message("system", system_prompt)
# 假设我们有一个 DataFrame 存储表格内容
data = {
    "Speaker": ["SPEAKER_01", "SPEAKER_00", "SPEAKER_00", "SPEAKER_01"],
    "Dialogue": ["うつ病の診断ってどうやるんですか?具体的に", "うつ病の診断は、普通に症状がどれぐらいあるかだよね", "落ち込んでるとか、調子が悪いとか、寝れてないとか、食べれてないとか", "そういうことやる気が出てないとか"],
    "EMR": ["", "", "", ""]
}

# 转换为 DataFrame
df = pd.DataFrame(data)

In [2]:
def process_table(conv, df, tools):
    mer_records = []

    for index, row in df.iterrows():
        # 获取当前的说话者和对话内容
        dialogue = row['Dialogue']

        # 添加到上下文中
        conv.add_message("user", dialogue)

        # 生成当前对话的 MER
        mer = generate_mer_from_dialogue(conv, dialogue, tools)

        # 存储生成的 MER
        mer_records.append(mer)

        # 将 MER 填入 DataFrame
        df.at[index, 'EMR'] = mer

    return df
# 调用 OpenAI 的 API 进行聊天总结
def generate_mer_from_dialogue(conv, current_sentence, tools):
    # 添加当前对话
    conv.add_message("user", current_sentence)
    
    # 进行 API 请求，生成聊天补全
    response = openai.chat.completions.create(
        model="gpt-4o",
        messages=conv.conversation_history,
        tools=tools,
        tool_choice="required",
    )
    
    # 获取 API 生成的结果
    print(response.choices[0].message.tool_calls[0].function.arguments)
    
    tool_call_id = response.choices[0].message.tool_calls[0].id
    tool_function_name = response.choices[0].message.tool_calls[0].function.name
    content = response.choices[0].message.tool_calls[0].function.arguments
    
    conv.conversation_history.append(
        {
            "role": "assistant",
            "tool_call_id": tool_call_id,
            "tool_function_name": tool_function_name,
            "content": content,
        }
    )
    
    mer_summary = response.choices[0].message.content
    print(f"{mer_summary=}")
    
    # 返回提取出的电子病历记录
    return mer_summary

In [3]:
print(conv.conversation_history)
df_with_mer = process_table(conv, df, tools)
df_with_mer

[{'role': 'system', 'content': '\nあなたは医療アシスタントとして、医師と患者の会話から病歴に関する情報を抽出する役割を担っています。次の項目に基づいて情報を整理してください（すべての項目が必ずしも含まれているわけではありませんが、存在する情報は抽出してください）：\n- 主訴 (presenting_complaint)\n- 症状 (symptoms)\n- 身体所見 (physical_examination_findings)\n- 検査結果 (test_results)\n- 診断 (diagnosis)\n- 治療方針 (treatment_plan)\n- 処方内容 (prescription_info)\n- フォローアップ計画 (follow_up)\n\n以下の会話から、それぞれの項目に該当する情報を抽出し、順次電子カルテ(MER)を作成してください:\n'}]
{"current_sentence": "うつ病の診断ってどうやるんですか?具体的に", "previous_summary": {"presenting_complaint": [], "symptoms": [], "physical_examination_findings": [], "test_results": [], "diagnosis": "", "treatment_plan": [], "prescription_info": [], "follow_up": []}, "presenting_complaint": [], "symptoms": [], "diagnosis": ""}
mer_summary=None
{"current_sentence":"うつ病の診断は、普通に症状がどれぐらいあるかだよね","previous_summary":{"presenting_complaint":[],"symptoms":[],"physical_examination_findings":[],"test_results":[],"diagnosis":"","treatment_plan":[],"prescription_info":[],"follow_up":[]},"presenting_complaint":[

Unnamed: 0,Speaker,Dialogue,EMR
0,SPEAKER_01,うつ病の診断ってどうやるんですか?具体的に,
1,SPEAKER_00,うつ病の診断は、普通に症状がどれぐらいあるかだよね,
2,SPEAKER_00,落ち込んでるとか、調子が悪いとか、寝れてないとか、食べれてないとか,
3,SPEAKER_01,そういうことやる気が出てないとか,
