In [3]:
import os
import re
import json
import uuid
import datetime
import leidenalg
import networkx as nx
from PIL import Image
from typing import TypedDict, List
from langgraph.graph import StateGraph, END
from sentence_transformers import CrossEncoder
from langchain_community.vectorstores import FAISS
from langchain_community.tools import DuckDuckGoSearchRun
from model_loader import get_llm, get_embedding_model, get_vision_pipeline
from langchain_community.document_loaders import DirectoryLoader, UnstructuredMarkdownLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter

os.environ["CUDA_VISIBLE_DEVICES"]="0,2"

  from .autonotebook import tqdm as notebook_tqdm


# RecursiveSplitter

In [None]:
FAISS_INDEX_PATH = "./faiss_index"

text_llm = get_llm()
embedding_model = get_embedding_model()
vision_pipeline = get_vision_pipeline()

web_search_tool = DuckDuckGoSearchRun()

def initialize_retriever() :
    if os.path.exists(FAISS_INDEX_PATH):
        print(f"기존의 faiss 인덱스를 {FAISS_INDEX_PATH}에서 불러옴")
        vectorstore = FAISS.load_local(
            FAISS_INDEX_PATH,
            embeddings=embedding_model,
            allow_dangerous_deserialization=True
        )
        retriever = vectorstore.as_retriever(search_kwargs={"k":5})
        return retriever
    else :
        doc_dir = "./data/split_file/anatomy"
        all_final_chunks = []

        headers_to_split_on = [("####", "PageHeader")]
        markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on, strip_headers=True)
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

        for filename in os.listdir(doc_dir):
            if not filename.endswith(".md"):
                continue
            
            filepath = os.path.join(doc_dir, filename)
            with open(filepath, 'r', encoding='utf-8') as f:
                content = f.read()

            page_level_splits = markdown_splitter.split_text(content)
            
            for doc in page_level_splits:
                header_value = doc.metadata.get('PageHeader', '')
                page_number = ''.join(filter(str.isdigit, header_value))

                if not page_number:
                    continue 

                doc.metadata = {'source': filename, 'page': page_number}
                
                final_chunks = text_splitter.split_documents([doc])
                all_final_chunks.extend(final_chunks)

        if not all_final_chunks:
            print("경고: 처리할 청크가 없습니다.")
            return None

        vectorstore = FAISS.from_documents(documents=all_final_chunks, embedding=embedding_model)
        vectorstore.save_local(FAISS_INDEX_PATH)
        retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
        print("FAISS 인덱스 생성이 완료되었습니다.")
        return retriever


def build_image_map(doc_dir="./data/split_file/anatomy") :
    image_map = {}
    for root, _, files in os.walk(doc_dir) :
        for file in files :
            if file.endswith(".md") :
                try :
                    with open(os.path.join(root, file), 'r', encoding="utf-8") as f :
                        content = f.read()
                        matches = re.findall(r'!\[(.*?)\]\((.*?)\)', content)
                        for alt_txt, path in matches :
                            full_path = os.path.normpath(os.path.join(os.path.dirname(os.path.join(root, file)), path))
                            if os.path.exists(full_path) :
                                image_map[alt_txt.strip().lower()] = full_path
                except Exception as e:
                    print(f"파일 처리 중 오류 : {file}, {e}")
                    continue

    print(f"{len(image_map)}개의 이미지 경로 매핑")
    return image_map

anatomy_retriever = initialize_retriever()
# image_path_map = build_image_map()

def search_anatomy_docs(retriever: any, query: str) -> str :
    if not retriever :
        return "Retriever가 초기화되지 않음"
    retrieved_docs = retriever.invoke(query)

    formatted_docs = []
    for doc in retrieved_docs :
        source_file = doc.metadata.get("source", "알 수 없음")
        page_num = doc.metadata.get("page", "N/A")
        formatted_docs.append(f"--- (출처 : {source_file}, 페이지 : {page_num}) ---\n{doc.page_content}")
    return "\n\n".join(formatted_docs)

def analyze_anatomy_image(topic: str, image_map: dict) -> str :
    if vision_pipeline is None :
        return "비전 모델이 로드되지 않아 이미지 분석을 실험할 수 없음"
    
    image_path = image_map.get(topic.lower().strip())

    if not image_path :
        return f"'{topic}'과 관련된 이미지를 로컬 문서에서 찾을 수 없음"
    
    try :
        image = Image.open(image_path).convert("RGB")
        prompt = f"USER: <image>\n이 해부학 이미지를 상세히 설명해줘. 이미지에 나타난 주요 근육, 뼈, 구조물들의 이름과 특징을 전문가의 시각에서 분석해줘. 이 이미지는 {topic}에 관한 것이다.\nASSISTANT:"
        outputs = vision_pipeline(image, prompt, generate_kwargs={"max_new_tokens": 512})

        if outputs and "generated_text" in outputs[0] :
            return outputs[0]["generated_text"].split("ASSISTANT:")[1].strip()
        return "이미지 분석 실패"
    except Exception as e :
        return f"이미지 분석 중 오류 발생 : {e}"
        
    
class AgentState(TypedDict) :
    original_question: str
    search_query: str
    image_topic: str
    documents: str
    # image_analysis: str
    draft: str
    final_answer: str

def query_analyst_node(state: AgentState) :
    prompt = f"""당신은 자연어 이해 전문가입니다. 다음 사용자 질문을 분석하여, 텍스트 검색을 위한 검색 쿼리와 이미지 분석을 위한 핵심 주제를 추출하세요.
                질문: "{state['original_question']}"
                출력은 반드시 다음 JSON 형식을 따라야 합니다: {{"search_query": "...", "image_topic": "..."}}
            """
    
    response = text_llm.invoke(prompt)
    try :
        clean_response = re.search(r'\{.*\}', response, re.DOTALL)
        if clean_response :
            parsed_response = json.loads(clean_response.group(0))
            return {"search_query": parsed_response.get("search_query", ""), "image_topic": parsed_response.get("image_topic", "")}
        else :
            return {"search_query": state["original_question"], "image_topic": ""}
    except (json.JSONDecodeError, KeyError) :
        return {"search_query": state["original_question"], "image_topic": ""}
    
def research_node(state: AgentState) :
    docs = search_anatomy_docs(anatomy_retriever, state["search_query"])
    if not docs or "찾을 수 없습니다" in docs or len(docs) < 200 :
        web_results = web_search_tool.run(state["search_query"])
        docs += "\n\n--- 웹 검색 결과 ---\n" + web_results
    return {"documents": docs}

# def image_analysis_node(state: AgentState) :
#     topic = state.get("image_topic")
#     if not topic :
#         return {"image_analysis" : "이미지 분석 주제가 없습니다."}
#     analysis = analyze_anatomy_image(topic, image_path_map)
#     return {"image_analysis" : analysis}
        
def content_synthesis_node(state: AgentState) :
    prompt = f"""당신은 의학 콘텐츠 작성가입니다. 아래의 텍스트 정보를 종합하여, 사용자의 원본 질문에 대한 상세한 답변 초안을 작성하세요.

                [원본 질문]
                {state['original_question']}

                [수집된 텍스트 정보]
                {state['documents']}

                [작성할 답변 초안]
            """
    draft = text_llm.invoke(prompt)
    return {"draft" : draft}

def final_review_node(state: AgentState) :
    prompt = prompt = f"""USER :
                    당신은 수석 의학 에디터입니다. 아래의 답변 초안과 원본 컨텍스트를 검토하여 최종 답변을 생성하세요.
                    다음 기준과 예시를 반드시 따라주세요:
                    1. 사실 관계가 정확한지 확인하세요.
                    2. 전문 용어를 최대한 쉬운 말로 풀어 설명하세요.
                    3. **답변 내용의 근거가 되는 출처를 반드시 문장 끝에 '(출처: 파일명, 페이지: X)' 형식으로 명시하세요.**
                    4. 최종 사용자가 읽기 쉽도록 문장을 다듬고 구조화하세요.
                    5. 최종 답변은 반드시 한국어로 작성되어야 합니다.

                    ---
                    [좋은 답변 예시]
                    경골(Tibia)은 정강이뼈라고도 불리며, 다리 하부의 안쪽에 위치한 두 개의 뼈 중 더 크고 튼튼한 뼈입니다. (출처: 2_Osteology.md, 페이지: 158) 이 뼈의 위쪽 끝부분은 넓고 평평한 관절면을 이루어 대퇴골(넙다리뼈)과 무릎 관절을 형성합니다. (출처: 2_Osteology.md, 페이지: 158)

                    ---
                    [나쁜 답변 예시]
                    ## 원본 컨텍스트
                    경골(Tibia)은 다리 하부의 안쪽에 위치한 두 개의 뼈 중 더 크고 튼튼한 뼈이며, 정강이뼈라고도 불린다. 이 뼈의 위쪽 끝부분은 넓고 평평한 관절면을 이루어 대퇴골(넙다리뼈)과 무릎 관절을 형성한다. (출처: 2_Osteology.md, 페이지: 158)

                    ## 답변 초안
                    경골은 다리에 있는 뼈입니다. superior articular surface를 가지고 있습니다.

                    ## 원본 질문
                    경골에 대해 설명해 주세요.

                    ## 수집된 텍스트 정보
                    경골(Tibia)에 대한 정보입니다.

                    ## 이미지 분석 결과
                    경골 이미지가 확인되었습니다.

                    ## 작성할 답변 초안
                    안녕하세요! 경골에 대해 설명해 드릴게요. 경골은 다리에 있는 뼈입니다. superior articular surface를 가지고 있습니다.
                    (-> 이유: 불필요한 정보(원본 컨텍스트, 답변 초안, 원본 질문, 수집된 텍스트 정보, 이미지 분석 결과, 작성할 답변 초안) 포함, 불필요한 인사말 포함, 전문 용어 설명 없음, 출처 누락)
                    ---

                    **[중요 지침]**
                    **위의 '좋은 답변 예시'와 같이, 다른 부가적인 설명이나 제목 없이 오직 '최종 답변'의 본문 내용만 생성해야 합니다.**

                    [원본 컨텍스트]
                    {state['documents']}

                    [답변 초안]
                    {state['draft']}

                    [최종 답변]
                    """
    final_answer = text_llm.invoke(prompt)
    return {"final_answer" : final_answer}

def final_review_node_postprocess(state: AgentState) :
    prompt = f"""USER :
                    당신은 수석 의학 에디터입니다. 아래의 답변 초안과 원본 컨텍스트를 검토하여 최종 답변을 생성하세요.
                    다음 기준과 예시를 반드시 따라주세요:
                    1. 사실 관계가 정확한지 확인하세요.
                    2. 전문 용어를 최대한 쉬운 말로 풀어 설명하세요.
                    3. **답변 내용의 근거가 되는 출처를 반드시 문장 끝에 '(출처: 파일명, 페이지: X)' 형식으로 명시하세요.**
                    4. 최종 사용자가 읽기 쉽도록 문장을 다듬고 구조화하세요.
                    5. 최종 답변은 반드시 영어로 작성되어야 합니다.

                    ---
                    [좋은 답변 예시]
                    The tibia, also known as the shin bone, is the larger and stronger of the two bones located on the medial side of the lower leg. (Source: 2_Osteology.md, page: 158) The upper end of this bone forms a broad, flat articular surface that articulates with the femur (thigh bone) to form the knee joint. (Source: 2_Osteology.md, page: 158)

                    ---
                    [나쁜 답변 예시]
                    ## Original Context
                    The tibia is the larger and stronger of the two bones located on the medial side of the lower leg, also known as the shin bone. The upper end of this bone forms a broad, flat articular surface that articulates with the femur (thigh bone) to form the knee joint. (Source: 2_Osteology.md, page: 158)

                    ## Draft Answer
                    The tibia is a bone in the leg. It has a superior articular surface.

                    ## Original Question
                    Please explain about the tibia.

                    ## Collected Text Information
                    Information about the tibia.

                    ## Image Analysis Result
                    Tibia image confirmed.

                    ## Draft Answer to Generate
                    Hello! I will explain about the tibia. The tibia is a bone in the leg. It has a superior articular surface.
                    (-> Reason: Includes unnecessary information (Original Context, Draft Answer, Original Question, Collected Text Information, Image Analysis Result, Draft Answer to Generate), includes unnecessary greetings, lacks explanation of technical terms, missing source citation)
                    ---

                    **[중요 지침]**
                    **위의 '좋은 답변 예시'와 같이, 다른 부가적인 설명이나 제목 없이 오직 '최종 답변'의 본문 내용만 생성해야 합니다.**

                    [원본 컨텍스트]
                    {state['documents']}

                    [답변 초안]
                    {state['draft']}

                    [최종 답변]
                    """
    raw_output = text_llm.invoke(prompt)
    try:
        final_answer = raw_output.split('[최종 답변]')[-1]
        final_answer = final_answer.strip()
    except Exception:
        final_answer = raw_output
        
    return {"final_answer": final_answer}

workflow = StateGraph(AgentState)
workflow.add_node("query_analyst", query_analyst_node)
workflow.add_node("researcher", research_node)
# workflow.add_node("image_analyzer", image_analysis_node)
workflow.add_node("synthesizer", content_synthesis_node)
workflow.add_node("reviewer", final_review_node_postprocess)

workflow.set_entry_point("query_analyst")
workflow.add_edge("query_analyst", "researcher")
workflow.add_edge("researcher", "synthesizer")
workflow.add_edge("synthesizer", "reviewer")
workflow.add_edge("reviewer", END)

app = workflow.compile()

questions = [
        ############## 1_Embryology.md : 6~36페이지
        "What are the two essential components of a higher organism cell as defined in the text?", # 7페이지
        "Describe the four main phases of indirect cell division (karyokinesis) as outlined in the text.", # 7페이지
        "During the early segmentation of the human ovum, when the blastodermic vesicle is formed, into what two cell layers does it differentiate, and what are their respective roles?", # Page 13
        "What is the primary role of the yolk-sac in the embryo's early development?", # 20페이지
        "How does the embryo separate from the yolk-sac, and what does the enclosed part of the yolk-sac form?", # 19페이지
        "What significant developments occur in a human embryo during the Second Week?", # 33페이지
        "What are the key characteristics of the human embryo by the end of the Third Week?", # 33페이지
        "What significant changes are typically observed in the fetus during the fifth month, and what is its approximate total length by the end of this month?", # Page 34
        
        ############## 2_Osteology.md : 37~173페이지
        "What are the three groups into which the cells of a primitive segment differentiate, and what do they form?", # 38페이지
        "How is each vertebral body formed from primitive segments during development?", # 38페이지
        "What are the sphenoidal air sinuses, and where are they located within the sphenoid bone?", # 88페이지
        "Describe the sphenoidal rostrum and its articulation.",# 88
        "How is the body of the humerus described in relation to the radial fossa, a slight depression that receives the anterior border of the head of the radius when the forearm is flexed?", # Page 129
        "What is the tibia, and where is it located in the human leg?", # 158
        "Describe the superior articular surface of the tibia's upper extremity.", # 158
        "Which of the metatarsal bones is the longest and extends backward into the recess formed by the three cuneiform bones?", # Page 170
        
        ############## 3_Syndesmology.md : 174~242
        "What are joints or articulations, and how are immovable joints characterized?", # 174
        "How does the articular lamella differ from ordinary bone tissue?", # 174
        "What are the four distinct joints that comprise the articulation of the atlas with the axis, and what is the function of each?", # Page 185
        "Where is the synovial membrane located in relation to the glenoid cavity and humerus, and how does it interact with the Biceps brachii tendon?", # 207
        "List some of the bursae located near the shoulder-joint and specify which ones communicate with the synovial cavity.", # 207
        "What types of movements are possible at the metacarpophalangeal joints of the fingers, excluding the thumb, and specifically when are the movements of abduction and adduction limited?", # Page 220
        "What is the function of the plantar calcaneonavicular ligament, and what condition results if it yields?", # 236
        "How are the navicular bone and the three cuneiform bones connected, and what type of movement do they permit?", # 236

        ############## 4_Myology.md : 243~331
        "How does the nervous system serve as an indicator for the origin and migration paths of developing muscles, despite not influencing muscle differentiation?", # 250
        "Describe the structural components of striped or voluntary muscle, from bundles to individual fibers.", # 250
        "How does the angular head of the Quadratus labii superioris, one of the muscles of the mouth, contribute to the function of the nose?", # Page 260
        "Among the muscles of the thorax, where do the Intercostales externi originate and insert, and how does the direction of their fibers differ on the back and front of the thorax?", # Page 275
        "What is the triangular ligament and where is it located?", # 290
        "What structures perforate the superficial layer (inferior fascia) of the urogenital diaphragm?", # 290
        "Where does the Extensor digitorum longus muscle originate, and what structures are located between it and the Tibialis anterior?", # 322
        "What is the Peronæus tertius, and where is it inserted?", # 322

        ############## 5_Angiology.md : 333~360
        "What are the main characteristics of the middle coat (tunica media) of arteries, and how does its composition vary with vessel size?", # 334
        "Describe the composition and variations of the external coat (tunica adventitia) in arteries.", # 334
        "What changes do the vitelline and umbilical veins undergo in the developing liver, and what is the impact of these changes on blood circulation?", # Page 340
        "How do the Vitelline Veins develop into parts of the portal and hepatic veins?", # 345
        "What happens to the Umbilical Veins during embryonic development and after birth?", # 345
        "What are the two main sacs that constitute the pericardium, and what are the characteristics and functions of each?", # Page 350
        "What are the three phases of a cardiac cycle and what happens during each?", # 358
        "What are the main peculiarities observed in the fetal heart's vascular system?" # 359

        ############## 6.md : 362~423
        "What type of division occurs when the aorta divides into the two common iliac arteries?", # Page 362
        "The fourth, or terminal, part of the lingual artery runs along what surface of the tongue to its tip, and what is its other name?", # Page 370
        "What organs does the internal carotid artery supply in adults, and how does its size compare to other arteries?", # Page 377
        "How do the spinal branches of the vertebral artery enter the vertebral canal, and how do they subsequently divide to supply blood?", # Page 385
        "How is the brachial artery positioned at the elbow joint, and by which muscles is it covered or separated?", # Page 392
        "At what vertebral level does the abdominal aorta terminate, and how does it divide at that point?", # Page 400
        "What muscles and structures do the intrapelvic branches of the internal pudendal artery supply, and which artery do they occasionally replace?", # Page 408
        "Which muscle tendons does the second perforating artery pierce, and into what branches does it subsequently divide?", # 415페이지
        
        ############## 7.md : 425~450
        "Compared to arteries, what are the characteristics of veins in terms of size and number, and how does the total capacity of the venous system compare to that of the arterial system?", # Page 425
        "Where does the superficial temporal vein begin, with what veins does it communicate, and how do its branches unite to form the main trunk?", # Page 428
        "Where do the lingual veins originate on the tongue, along what artery's course do they pass, and where do they terminate?", # Page 431
        "Cerebral veins are divided into external and internal groups; which parts of the brain do each of these groups drain, and what are the main external veins belonging to each group?", # Page 434
        "Where are the superficial veins of the upper extremity located directly beneath the skin, and with what other veins do they communicate to return blood to the heart?", # Page 438
        "From what structures do the bronchial veins return blood, and into which veins do the right and left bronchial veins respectively open?", # Page 441
        "Which part of the thigh does the femoral vein accompany the femoral artery through, and near its termination, what major vein does it join?", # Page 444
        "What is the peculiarity concerning the termination point of the inferior vena cava when it occasionally joins the azygos vein, and how does this affect the blood flow in the body?", # Page 447
        
        ############## 8.md : 451~470
        "Where does the lymphatic system transport lymph for entry into the bloodstream, and what role do lymph nodes play in this process?", # Page 451
        "What are the valves of the lymphatic vessels composed of, what shape do they have, and at what intervals are these valves placed within the lymphatic vessels, and where are they most frequently found?", # Page 453
        "From what regions do the posterior auricular lymph glands primarily receive lymphatic drainage, and where do their efferent vessels lead?", # Page 457
        "Where do the lymphatic vessels from the anterior parts of the nasal cavities terminate, and where do those from the posterior two-thirds and accessory air sinuses lead?", # Page 458
        "How are the lymph glands of the upper extremity divided into two main sets, and what are the characteristics of each set?", # Page 460
        "Where are the popliteal lymph glands located, from what vessels and joints do they receive lymphatic drainage, and where do their efferent vessels primarily lead?", # Page 462
        "Where are the right lateral aortic lymph glands located, and from what organs do they receive lymphatic drainage?", # Page 465
        "Where do the lymphatic vessels of the prostate primarily terminate, and where do specific trunks from its posterior and anterior surfaces lead?", # Page 468

        ############## 9.md : 473~622
        "What is the function of neuroglia in the brain and spinal cord, and what is its origin?", # Page 473
        "What is the average depth of the anterior median fissure of the spinal cord, and what structures does it contain?", # Page 490
        "Where is the Pons located in the brain, and what are the characteristics of its dorsal surface?", # Page 506
        "What are the main fissures and sulci used to divide the cerebral hemisphere into lobes, and how is the lateral cerebral fissure structured among them?", # Page 523
        "How do tactile discrimination fibers travel within the spinal cord, and to which area of the cerebral cortex are they ultimately conveyed?", # Page 539
        "Where are the olfactory nerves distributed in the nose, and what are the characteristics of their nerve fibers?", # Page 556
        "Where does the Posterior Auricular Nerve originate, what branches does it divide into, and which muscles do these branches supply?", # Page 572
        "How is the brachial plexus formed from the union of nerves, and how does it divide into its three main cords?", # Page 589
        "Which arteries do the articular branches of the Common Peroneal Nerve accompany to the knee, and where does the recurrent articular nerve branch off and ascend to?", # Page 605
        
        ############## 10.md : 623~674
        "What is the shape of the cartilage of the septum, and with which bones are its anterior, posterior, and inferior margins connected?", # Page 623
        "From which part of the optic cup does the retina develop, and into what cell layers does it differentiate to form the various nervous elements and supporting structures of the retina?", # Page 629
        "Where is the iris located in the eye, and how does it divide the space between the cornea and the lens?", # Page 634
        "What is the structure of the lens, and specifically, how are the radiating lines arranged in a fetal lens?", # Page 640
        "The ear, or organ of hearing, can be divided into three main parts. What are these three parts?", # Page 647
        "What specialized glands are found in the skin lining the external acoustic meatus, and what is their function?", # Page 651
        "What type of cartilage covers the vestibular surface and circumference of the base of the stapes, and to what ligament is it attached at the margin of the fenestra vestibuli?", # Page 657
        "What are the main types of hair cells found on the inner and outer sides of the rods of Corti, and what are the names of the supporting cells associated with them?", # Page 663
        "What layers compose the epidermis, and what characteristics do the cells of the deepest layer, the stratum mucosum, exhibit?", # Page 668
        
        ############## 11.md : 675~815
        "How does the size of the larynx change in males and females after puberty, and what features become particularly prominent in the male larynx?", # Page 675
        "In what way does the fetal lung resemble a gland, and what changes occur in the alveoli after the first respiration?", # Page 691
        "What are the characteristics of the filiform papillae of the tongue, where are they primarily distributed, and what is their function?", # Page 706
        "When tracing the vertical disposition of the omental bursa, how does the loop formed by its wall below the transverse colon relate to the peritoneum of the main cavity, and how many layers does the greater omentum consist of?", # Page 722
        "What are the average length and breadth of the cecum, what organs does it typically contact within the abdominal cavity, and what degree of mobility does it possess?", # Page 737
        "What organs comprise the urogenital apparatus, and what embryonic structures precede the permanent organs during fetal development?", # Page 754
        "What is the shape of the fundus of the bladder, in which direction is it oriented, and by what structures is it separated from the rectum?", # Page 768
        "From where does the body (corpus penis) of the penis extend, and what anatomical structures are intimately bound within it?", # Page 784
        "What is the homologue of the bulb of the vestibule in males, and how is it composed?", # Page 799
        
        ############## 12.md : 816~852
        "What bony structure has its upper border obscured by the attachment of the temporal fascia, and what bone does its anterior end connect to?", # Page 816
        "From what foramen on the face do the supraorbital branches of the ophthalmic nerve emerge, and what other foramina does a line drawn from this foramen to the lower border of the mandible pass over?", # Page 820
        "What is the characteristic of the cervical region where the spinous processes are sunken, and which cervical spinous process sometimes forms a projection?", # Page 825
        "What bones and cartilages form the lower boundary of the front of the thorax, and what body position makes this boundary most plainly visible?", # Page 828
        "What are the smooth, white transverse lines commonly seen on the skin after abdominal distension from pregnancy or other causes, and what is their characteristic feature?", # Page 832
        "Across which two regions of the abdomen does the transverse colon pass, and where is its lower border situated in relation to the umbilicus?", # Page 836
        "What anatomical positions do the three transverse furrows on the front of the wrist correspond to, from above downward?", # Page 841
        "Along which muscle's medial margin can the brachial artery be recognized, and what is its position relative to the humerus in the upper two-thirds versus the lower third of the arm?", # Page 844
        "In what direction does the depression between the Quadriceps femoris and the Adductors extend obliquely from the apex of the femoral triangle, and when is this depression present?", # Page 848
]
today = datetime.date.today()
date_str = f"{today.month}월{today.day}일"
output_dir = f"./result/LangGraph/RecursiveSplitter/{date_str}"
os.makedirs(output_dir, exist_ok=True)

for i, q in enumerate(questions) :
    inputs = {"original_question": q}
    final_state = app.invoke(inputs)

    unique_id = uuid.uuid4()
    filename = f"result_{i+1}_{unique_id}.txt"
    filepath = os.path.join(output_dir, filename)

    question = final_state.get("original_question", "오류 : 최종 답변 생성 못함")
    context = final_state.get("documents", "오류 : 최종 답변 생성 못함")
    answer = final_state.get("final_answer", "오류 : 최종 답변 생성 못함")
    with open(filepath, 'w', encoding="utf-8") as f :
        f.write(f"[[원본 질문]] : \n{question}\n\n")
        f.write("---\n\n")
        f.write(f"[[모델 답변]] : \n{answer}\n\n")
        f.write("---\n\n")
        f.write(f"[[참고 컨텍스트]] : \n{context}\n\n")
        f.write("---\n\n")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards:  58%|█████▊    | 7/12 [01:43<01:21, 16.34s/it]

# Graph Base-Leiden

In [None]:
# import os
# import re
# import json
# import uuid
# import datetime
# import leidenalg
# import networkx as nx
# from PIL import Image
# from typing import TypedDict, List
# from langgraph.graph import StateGraph, END
# from sentence_transformers import CrossEncoder
# from langchain_community.vectorstores import FAISS
# from langchain_community.tools import DuckDuckGoSearchRun
# from model_loader import get_llm, get_embedding_model, get_vision_pipeline
# from langchain_community.document_loaders import DirectoryLoader, UnstructuredMarkdownLoader
# from langchain.text_splitter import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter

# os.environ["CUDA_VISIBLE_DEVICES"]="1,2"

  from .autonotebook import tqdm as notebook_tqdm


: 

In [None]:
# KG_PATH = "./data/knowledge_graph/knowledge_graph.graphml"
# MD_DIR = "./data/split_file/anatomy"
# CROSS_ENCODER_MODEL = "cross-encoder/ms-marco-MiniLM-L6-v2"

# text_llm = get_llm(provider="ollama", ollama_model_name="gemma3:27b")
# embedding_model = get_embedding_model()
# vision_pipeline = get_vision_pipeline()

# web_search_tool = DuckDuckGoSearchRun()

# try :
#     G = nx.read_graphml(KG_PATH)
# except FileNotFoundError :
#     G = None

# if G :
#     print("지식 그래프 샘플")
#     print(list(G.nodes)[0:20])

# cross_encoder = CrossEncoder(CROSS_ENCODER_MODEL)

# md_content_map = {}
# for filename in os.listdir(MD_DIR) :
#     if filename.endswith(".md") :
#         filepath = os.path.join(MD_DIR, filename)
#         with open(filepath, 'r', encoding="utf-8") as f :
#             md_content_map[filename] = f.read()

# node_name_map = {name.lower(): name for name in G.nodes()}

# def extract_entities_from_query(query: str) -> List[str] :
#     prompt =f"""Given the user's question, extract all relevant anatomical or medical entities.
# The entities should be precise and directly mentioned in the question.
# Return the entities as a comma-separated list.

# Question: "{query}"
# Entities:"""
#     response = text_llm.invoke(prompt)
#     match = re.search(r"Entities:\s*(.*)", response, re.DOTALL)

#     if match :
#         entities_str = match.group(1)
#         entities = [e.strip().lower() for e in entities_str.split(',') if e.strip()]
#     else :
#         entities = [e.strip().lower() for e in response.split(',') if e.strip()]
#     print(f"질문 엔티티 : {entities}")
#     return entities

# import igraph as ig
# def expand_entities_with_leiden(graph: nx.Graph, initial_entities: List[str]) -> List[str] :
#     if not initial_entities :
#         return []
    
#     g_ig = ig.Graph.from_networkx(graph)

#     partition = leidenalg.find_partition(g_ig, leidenalg.ModularityVertexPartition)
#     expanded_entities = set()

#     node_name_map_lower = {name.lower(): name for name in g_ig.vs["_nx_name"]}
#     node_index_map = {name: i for i, name in enumerate(g_ig.vs["_nx_name"])}

#     for entity in initial_entities :
#         original_node_name = node_name_map_lower.get(entity.lower())
#         if original_node_name :
#             node_idx = node_index_map[original_node_name]
#             community_id = partition.membership[node_idx]

#             for i, v in enumerate(g_ig.vs) :
#                 if partition.membership[i] == community_id :
#                     expanded_entities.add(v["_nx_name"])
#     return list(expanded_entities)

# def get_pages_from_graph(graph: nx.Graph, entities: List[str]) -> set :
#     pages = set()
#     for entity in entities :
#         normalized_entity = entity.lower()
#         original_node_name = node_name_map.get(normalized_entity)

#         if original_node_name and original_node_name in graph.nodes :
#             page_data = graph.nodes[original_node_name].get("source_page", '')
#             # print(f"##################page_data : {page_data}")
#             pages.update(p.strip() for p in page_data.split(',') if p.strip())
#     return pages

# def get_content_from_pages(source_pages: set) -> List[dict] :
#     page_contents = []
#     for page_num in source_pages :
#         for filename, content in md_content_map.items() :
#             match = re.search(rf"####\s+Page\s+{page_num}\s*\n(.*?)(?=####\s+Page|\Z)", content, re.DOTALL)
#             if match:
#                 page_contents.append({
#                     "source": filename,
#                     "page": page_num,
#                     "content": match.group(1).strip()
#                 })
#                 break
#     return page_contents

# def smart_chunking(pages: List[dict], chunk_size: int=500) -> List[dict] :
#     chunks = []
#     for page in pages :
#         text = page["content"]
#         start = 0
#         while start < len(text) :
#             end = start + chunk_size
#             if end >= len(text) :
#                 chunk_text = text[start:]
#             else :
#                 last_punc_idx = max(text.rfind(p, start, end) for p in ".!?")
#                 if last_punc_idx != -1 :
#                     chunk_text = text[start : last_punc_idx + 1]
#                     start = last_punc_idx + 1
#                 else :
#                     chunk_text = text[start:end]
#                     start = end

#             chunks.append({
#                 "source" : page["source"],
#                 "page" : page["page"],
#                 "content" : chunk_text.strip()
#             })
#     return chunks

# def rerank_with_cross_encoder(query: str, chunks: List[dict], top_k: int = 5) -> str :
#     if not chunks :
#         return ""
    
#     pairs = [(query, chunk["content"]) for chunk in chunks]
#     scores = cross_encoder.predict(pairs)

#     sorted_chunks = sorted(zip(chunks, scores), key=lambda x : x[1], reverse=True)

#     top_chunks = [item[0] for item in sorted_chunks[:top_k]]

#     formatted_docs = []
#     for chunk in top_chunks :
#         source_file = chunk.get("source", "알 수 없음")
#         page_num = chunk.get("page", "N/A")
#         formatted_docs.append(f"---(출처 : {source_file}, 페이지 : {page_num}) ---\n{chunk['content']}")

#     return "\n\n".join(formatted_docs)

# def graph_based_retriever(query: str) -> str :
#     if G is None :
#         return "지식 그래프가 로드되지 않음"
    
#     entities = extract_entities_from_query(query)
#     print(f"1. 추출된 엔티티 : {entities}")
    
#     existing_entities = [e for e in entities if e in G.nodes]
#     print(f"2. 그래프에 존재하는 엔티티 : {existing_entities}")

#     expanded_entities = expand_entities_with_leiden(G, entities)
#     print(f"3. 확장된 엔티티 : {expanded_entities}")

#     pages_to_search = get_pages_from_graph(G, expanded_entities)
#     print(f"4. 검색할 페이지 : {pages_to_search}")
    
#     contents = get_content_from_pages(pages_to_search)
#     chunks = smart_chunking(contents)
#     final_context = rerank_with_cross_encoder(query, chunks)

#     return final_context

# class AgentState(TypedDict) :
#     original_question: str
#     search_query: str
#     image_topic: str
#     documents: str
#     # image_analysis: str
#     draft: str
#     final_answer: str

# def query_analyst_node(state: AgentState) :
#     prompt = f"""당신은 자연어 이해 전문가입니다. 다음 사용자 질문을 분석하여, 텍스트 검색을 위한 검색 쿼리와 이미지 분석을 위한 핵심 주제를 추출하세요.
#                 질문: "{state['original_question']}"
#                 출력은 반드시 다음 JSON 형식을 따라야 합니다: {{"search_query": "...", "image_topic": "..."}}
#             """
    
#     response = text_llm.invoke(prompt)
#     try :
#         clean_response = re.search(r'\{.*\}', response, re.DOTALL)
#         if clean_response :
#             parsed_response = json.loads(clean_response.group(0))
#             return {"search_query": parsed_response.get("search_query", ""), "image_topic": parsed_response.get("image_topic", "")}
#         else :
#             return {"search_query": state["original_question"], "image_topic": ""}
#     except (json.JSONDecodeError, KeyError) :
#         return {"search_query": state["original_question"], "image_topic": ""}
    
# def research_node(state: AgentState) :
#     docs = graph_based_retriever(state["search_query"])
#     if not docs or len(docs) < 200 :
#         web_results = web_search_tool.run(state["search_query"])
#         docs += "\n\n--- 웹 검색 결과 ---\n" + web_results
#     return {"documents": docs}

# def content_synthesis_node(state: AgentState) :
#     prompt = f"""당신은 의학 콘텐츠 작성가입니다. 아래의 텍스트 정보를 종합하여, 사용자의 원본 질문에 대한 상세한 답변 초안을 작성하세요.

#                 [원본 질문]
#                 {state['original_question']}

#                 [수집된 텍스트 정보]
#                 {state['documents']}

#                 [작성할 답변 초안]
#             """
#     draft = text_llm.invoke(prompt)
#     return {"draft" : draft}

# def final_review_node_postprocess(state: AgentState) :
#     prompt = f"""USER :
#                     당신은 수석 의학 에디터입니다. 아래의 답변 초안과 원본 컨텍스트를 검토하여 최종 답변을 생성하세요.
#                     다음 기준과 예시를 반드시 따라주세요:
#                     1. 사실 관계가 정확한지 확인하세요.
#                     2. 전문 용어를 최대한 쉬운 말로 풀어 설명하세요.
#                     3. **답변 내용의 근거가 되는 출처를 반드시 문장 끝에 '(출처: 파일명, 페이지: X)' 형식으로 명시하세요.**
#                     4. 최종 사용자가 읽기 쉽도록 문장을 다듬고 구조화하세요.
#                     5. 최종 답변은 반드시 영어로 작성되어야 합니다.

#                     ---
#                     [좋은 답변 예시]
#                     The tibia, also known as the shin bone, is the larger and stronger of the two bones located on the medial side of the lower leg. (Source: 2_Osteology.md, page: 158) The upper end of this bone forms a broad, flat articular surface that articulates with the femur (thigh bone) to form the knee joint. (Source: 2_Osteology.md, page: 158)

#                     ---
#                     [나쁜 답변 예시]
#                     ## Original Context
#                     The tibia is the larger and stronger of the two bones located on the medial side of the lower leg, also known as the shin bone. The upper end of this bone forms a broad, flat articular surface that articulates with the femur (thigh bone) to form the knee joint. (Source: 2_Osteology.md, page: 158)

#                     ## Draft Answer
#                     The tibia is a bone in the leg. It has a superior articular surface.

#                     ## Original Question
#                     Please explain about the tibia.

#                     ## Collected Text Information
#                     Information about the tibia.

#                     ## Image Analysis Result
#                     Tibia image confirmed.

#                     ## Draft Answer to Generate
#                     Hello! I will explain about the tibia. The tibia is a bone in the leg. It has a superior articular surface.
#                     (-> Reason: Includes unnecessary information (Original Context, Draft Answer, Original Question, Collected Text Information, Image Analysis Result, Draft Answer to Generate), includes unnecessary greetings, lacks explanation of technical terms, missing source citation)
#                     ---

#                     **[중요 지침]**
#                     **위의 '좋은 답변 예시'와 같이, 다른 부가적인 설명이나 제목 없이 오직 '최종 답변'의 본문 내용만 생성해야 합니다.**

#                     [원본 컨텍스트]
#                     {state['documents']}

#                     [답변 초안]
#                     {state['draft']}

#                     [최종 답변]
#                     """
#     raw_output = text_llm.invoke(prompt)
#     try:
#         final_answer = raw_output.split('[최종 답변]')[-1]
#         final_answer = final_answer.strip()
#     except Exception:
#         final_answer = raw_output
        
#     return {"final_answer": final_answer}

# workflow = StateGraph(AgentState)
# workflow.add_node("query_analyst", query_analyst_node)
# workflow.add_node("researcher", research_node)
# # workflow.add_node("image_analyzer", image_analysis_node)
# workflow.add_node("synthesizer", content_synthesis_node)
# workflow.add_node("reviewer", final_review_node_postprocess)

# workflow.set_entry_point("query_analyst")
# workflow.add_edge("query_analyst", "researcher")
# workflow.add_edge("researcher", "synthesizer")
# workflow.add_edge("synthesizer", "reviewer")
# workflow.add_edge("reviewer", END)

# app = workflow.compile()

# questions = [
#         ############## 1_Embryology.md
#         "What are the two essential components of a higher organism cell as defined in the text?", # 7페이지
#         "Describe the four main phases of indirect cell division (karyokinesis) as outlined in the text.", # 7페이지
#         "What is the primary role of the yolk-sac in the embryo's early development?", # 20페이지
#         "How does the embryo separate from the yolk-sac, and what does the enclosed part of the yolk-sac form?", # 19페이지
#         "What significant developments occur in a human embryo during the Second Week?", # 33페이지
#         "What are the key characteristics of the human embryo by the end of the Third Week?", # 33페이지
        
#         ############## 2_Osteology.md
#         "What are the three groups into which the cells of a primitive segment differentiate, and what do they form?", # 38페이지
#         "How is each vertebral body formed from primitive segments during development?", # 38페이지
#         "What are the sphenoidal air sinuses, and where are they located within the sphenoid bone?", # 88페이지
#         "Describe the sphenoidal rostrum and its articulation.",# 88
#         "What is the tibia, and where is it located in the human leg?", # 158
#         "Describe the superior articular surface of the tibia's upper extremity.", # 158

#         ############## 3_Syndesmology.md
#         "What are joints or articulations, and how are immovable joints characterized?", # 174
#         "How does the articular lamella differ from ordinary bone tissue?", # 174
#         "Where is the synovial membrane located in relation to the glenoid cavity and humerus, and how does it interact with the Biceps brachii tendon?", # 207
#         "List some of the bursae located near the shoulder-joint and specify which ones communicate with the synovial cavity.", # 207
#         "What is the function of the plantar calcaneonavicular ligament, and what condition results if it yields?", # 236
#         "How are the navicular bone and the three cuneiform bones connected, and what type of movement do they permit?", # 236

#         ############## 4_Myology.md
#         "How does the nervous system serve as an indicator for the origin and migration paths of developing muscles, despite not influencing muscle differentiation?", # 250
#         "Describe the structural components of striped or voluntary muscle, from bundles to individual fibers.", # 250
#         "What is the triangular ligament and where is it located?", # 290
#         "What structures perforate the superficial layer (inferior fascia) of the urogenital diaphragm?", # 290
#         "Where does the Extensor digitorum longus muscle originate, and what structures are located between it and the Tibialis anterior?", # 322
#         "What is the Peronæus tertius, and where is it inserted?", # 322

#         ############## 5_Angiology.md
#         "What are the main characteristics of the middle coat (tunica media) of arteries, and how does its composition vary with vessel size?", # 334
#         "Describe the composition and variations of the external coat (tunica adventitia) in arteries.", # 334
#         "How do the Vitelline Veins develop into parts of the portal and hepatic veins?", # 345
#         "What happens to the Umbilical Veins during embryonic development and after birth?", # 345
#         "What are the three phases of a cardiac cycle and what happens during each?", # 358
#         "What are the main peculiarities observed in the fetal heart's vascular system?" # 359
# ]
# output_dir = "./result/LangGraph/Leiden"
# os.makedirs(output_dir, exist_ok=True)

# for i, q in enumerate(questions) :
#     inputs = {"original_question": q}
#     final_state = app.invoke(inputs)

#     today = datetime.date.today()
#     date_str = f"{today.month}월{today.day}일"
#     unique_id = uuid.uuid4()
#     filename = f"{date_str}_{i+1}_{unique_id}.txt"
#     filepath = os.path.join(output_dir, filename)

#     question = final_state.get("original_question", "오류 : 최종 답변 생성 못함")
#     context = final_state.get("documents", "오류 : 최종 답변 생성 못함")
#     answer = final_state.get("final_answer", "오류 : 최종 답변 생성 못함")
#     with open(filepath, 'w', encoding="utf-8") as f :
#         f.write(f"[[원본 질문]] : \n{question}\n\n")
#         f.write("---\n\n")
#         f.write(f"[[모델 답변]] : \n{answer}\n\n")
#         f.write("---\n\n")
#         f.write(f"[[참고 컨텍스트]] : \n{context}\n\n")
#         f.write("---\n\n")

  return Ollama(model=ollama_model_name)
  return HuggingFaceEmbeddings(
Loading checkpoint shards: 100%|██████████| 4/4 [00:01<00:00,  2.31it/s]
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.19s/it]
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cuda:0


지식 그래프 샘플
['- Organ: testes', 'mitochondria', 'germinal cells', 'ovaries', 'granules', 'nucleus', 'hyaloplasm', 'Embryology', 'cell', 'Field of Study', 'cytoplasm', 'spongioplasm', 'tissues', 'somatic cells', '- Node: monaster', 'chromatin filaments', 'man', 'diaster', 'nuclear membrane', 'protoplasm']
질문 엔티티 : ['cell', 'organism']
1. 추출된 엔티티 : ['cell', 'organism']
2. 그래프에 존재하는 엔티티 : ['cell']
3. 확장된 엔티티 : ['Early Development and Imbedding of the Human Ovum', 'Body-stalk', 'ameboid movement', 'granules', 'Teacher', 'ovarian pregnancy', 'human ovum', 'chromosomes', 'protoplasm', 'sarcous element and the membrane of Krause', 'follicle', 'nuclear membrane', 'pores of spongioplasm', 'eosinophil corpuscles', 'nucleus', 'uterine mucous membrane', 'uterine tube', 'centrosome', 'attraction sphere', 'cell', 'segmentation', 'decidua', 'line of meeting of the embryonic and amniotic parts of the ectoderm', 'thromboplastin', 'mitochondria', 'mature ovum', 'ovum', 'female pronucleus', 'lymphocyte', '

In [None]:
# text_llm = get_llm()
# embedding_model = get_embedding_model()
# vision_pipeline = get_vision_pipeline()

# web_search_tool = DuckDuckGoSearchRun()

# def search_anatomy_docs(query: str) -> str :
#     loader = DirectoryLoader("./data/split_file/anatomy", glob="**/*.md", loader_cls=UnstructuredMarkdownLoader)
#     docs = loader.load()
#     if not docs :
#         return
#     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
#     splits = text_splitter.split_documents(docs)
#     vectorstore = FAISS.from_documents(documents=splits, embedding=embedding_model)
#     retriever = vectorstore.as_retriever(search_kwargs={"k":5})
#     retrieved_docs = retriever.invoke(query)
#     return "\n\n".join([doc.page_content for doc in retrieved_docs])

# def analyze_anatomy_image(topic: str) -> str :
#     if vision_pipeline is None :
#         return "비전 모델이 로드되지 않아 이미지 분석을 실험할 수 없음"
    
#     image_path = None
#     doc_dir = "./data/split_file/anatomy"
#     for root, _, files in os.walk(doc_dir) :
#         for file in files :
#             if file.endswith(".md") :
#                 try :
#                     with open(os.path.join(root, file), 'r', encoding="utf-8") as f :
#                         content = f.read()
#                         matches = re.findall(r'\[.*?\]\((.*?)\)', content)
#                         for path in matches :
#                             if topic.lower() in path.lower() :
#                                 potential_path = os.path.join(os.path.dirname(os.path.join(root, file)), path)
#                                 if os.path.exists(potential_path) :
#                                     image_path = potential_path
#                                     break

#                 except Exception :
#                     continue
#             if image_path :
#                 break
#         if image_path :
#             break

#     if not image_path : 
#         return f"'{topic}'과 관련된 이미지를 로컬 문서에서 찾을 수 없습니다."
    
#     try :
#         image = Image.open(image_path).convert("RGB")
#         prompt = f"USER : <image>\n이 해부학 이미지를 상세히 설명해줘. 이미지에 나타난 주요 근육, 뼈, 구조물들의 이름과 특징을 전문가의 시각에서 분석해줘. 이 이미지는 {topic}에 관한 것이다.\nASSISTANT:"
#         outputs = vision_pipeline(image, prompt=prompt, generate_kwargs={"max_new_tokens":512})

#         if outputs and "generated_text" in outputs[0] :
#             return outputs[0]["generated_text"].split("ASSISTANT:")[1].strip()
#         return "이미지 분석 실패"
    
#     except Exception as e :
#         return f"이미지 분석 중 오류 발생 : {e}"
    
# class AgentState(TypedDict) :
#     original_question: str
#     search_query: str
#     image_topic: str
#     documents: str
#     image_analysis: str
#     draft: str
#     final_answer: str

# def query_analyst_node(state: AgentState) :
#     prompt = f"""당신은 자연어 이해 전문가입니다. 다음 사용자 질문을 분석하여, 텍스트 검색을 위한 검색 쿼리와 이미지 분석을 위한 핵심 주제를 추출하세요.
#                 질문: "{state['original_question']}"
#                 출력은 반드시 다음 JSON 형식을 따라야 합니다: {{"search_query": "...", "image_topic": "..."}}
#             """
    
#     response = text_llm.invoke(prompt)
#     try :
#         clean_response = re.search(r'\{.*\}', response, re.DOTALL)
#         if clean_response :
#             parsed_response = json.loads(clean_response.group(0))
#             return {"search_query": parsed_response.get("search_query", ""), "image_topic": parsed_response.get("image_topic", "")}
#         else :
#             return {"search_query": state["original_question"], "image_topic": ""}
#     except (json.JSONDecodeError, KeyError) :
#         return {"search_query": state["original_question"], "image_topic": ""}
    
# def research_node(state: AgentState) :
#     docs = search_anatomy_docs(state["search_query"])
#     if "찾을 수 없습니다" in docs or len(docs) < 200 :
#         web_results = web_search_tool.run(state["search_query"])
#         docs += "\n\n--- 웹 검색 결과 ---\n" + web_results
#     return {"documents": docs}

# def image_analysis_node(state: AgentState) :
#     if not state.get("image_topic") :
#         return {"image_analysis" : "이미지 분석 주제가 없습니다."}
#     analysis = analyze_anatomy_image(state["image_topic"])
#     return {"image_analysis" : analysis}
        
# def content_synthesis_node(state: AgentState) :
#     prompt = f"""당신은 의학 콘텐츠 작성가입니다. 아래의 텍스트 정보와 이미지 분석 결과를 종합하여, 사용자의 원본 질문에 대한 상세한 답변 초안을 작성하세요.

#                 [원본 질문]
#                 {state['original_question']}

#                 [수집된 텍스트 정보]
#                 {state['documents']}

#                 [이미지 분석 결과]
#                 {state['image_analysis']}

#                 [작성할 답변 초안]
#             """
#     draft = text_llm.invoke(prompt)
#     return {"draft" : draft}

# def final_review_node(state: AgentState) :
#     prompt = f"""당신은 수석 의학 에디터입니다. 아래의 답변 초안을 검토하고 최종 답변을 생성하세요. 
# 다음 기준을 따라주세요:
# 1. 사실 관계가 정확한지 확인하세요.
# 2. 전문 용어를 최대한 쉬운 말로 풀어 설명하세요.
# 3. 최종 사용자가 읽기 쉽도록 문장을 다듬고 구조화하세요.
# 4. 최종 답변은 반드시 한국어로 작성되어야 합니다.

# [답변 초안]
# {state['draft']}

# [최종 답변]
# """
#     final_answer = text_llm.invoke(prompt)
#     return {"final_answer" : final_answer}

# workflow = StateGraph(AgentState)
# workflow.add_node("query_analyst", query_analyst_node)
# workflow.add_node("researcher", research_node)
# workflow.add_node("image_analyzer", image_analysis_node)
# workflow.add_node("synthesizer", content_synthesis_node)
# workflow.add_node("reviewer", final_review_node)

# workflow.set_entry_point("query_analyst")
# workflow.add_edge("query_analyst", "researcher")
# workflow.add_edge("researcher", "image_analyzer")
# workflow.add_edge("image_analyzer", "synthesizer")
# workflow.add_edge("synthesizer", "reviewer")
# workflow.add_edge("reviewer", END)

# app = workflow.compile()

# questions = [
#         ############## 1_Embryology.md
#         "What are the two essential components of a higher organism cell as defined in the text?", # 7페이지
#         "Describe the four main phases of indirect cell division (karyokinesis) as outlined in the text.", # 7페이지
#         "What is the primary role of the yolk-sac in the embryo's early development?", # 20페이지
#         "How does the embryo separate from the yolk-sac, and what does the enclosed part of the yolk-sac form?", # 19페이지
#         "What significant developments occur in a human embryo during the Second Week?", # 33페이지
#         "What are the key characteristics of the human embryo by the end of the Third Week?", # 33페이지
        
#         ############## 2_Osteology.md
#         "What are the three groups into which the cells of a primitive segment differentiate, and what do they form?", # 38페이지
#         "How is each vertebral body formed from primitive segments during development?", # 38페이지
#         "What are the sphenoidal air sinuses, and where are they located within the sphenoid bone?", # 88페이지
#         "Describe the sphenoidal rostrum and its articulation.",# 88
#         "What is the tibia, and where is it located in the human leg?", # 158
#         "Describe the superior articular surface of the tibia's upper extremity.", # 158

#         ############## 3_Syndesmology.md
#         "What are joints or articulations, and how are immovable joints characterized?", # 174
#         "How does the articular lamella differ from ordinary bone tissue?", # 174
#         "Where is the synovial membrane located in relation to the glenoid cavity and humerus, and how does it interact with the Biceps brachii tendon?", # 207
#         "List some of the bursae located near the shoulder-joint and specify which ones communicate with the synovial cavity.", # 207
#         "What is the function of the plantar calcaneonavicular ligament, and what condition results if it yields?", # 236
#         "How are the navicular bone and the three cuneiform bones connected, and what type of movement do they permit?", # 236

#         ############## 4_Myology.md
#         "How does the nervous system serve as an indicator for the origin and migration paths of developing muscles, despite not influencing muscle differentiation?", # 250
#         "Describe the structural components of striped or voluntary muscle, from bundles to individual fibers.", # 250
#         "What is the triangular ligament and where is it located?", # 290
#         "What structures perforate the superficial layer (inferior fascia) of the urogenital diaphragm?", # 290
#         "Where does the Extensor digitorum longus muscle originate, and what structures are located between it and the Tibialis anterior?", # 322
#         "What is the Peronæus tertius, and where is it inserted?", # 322

#         ############## 5_Angiology.md
#         "What are the main characteristics of the middle coat (tunica media) of arteries, and how does its composition vary with vessel size?", # 334
#         "Describe the composition and variations of the external coat (tunica adventitia) in arteries.", # 334
#         "How do the Vitelline Veins develop into parts of the portal and hepatic veins?", # 345
#         "What happens to the Umbilical Veins during embryonic development and after birth?", # 345
#         "What are the three phases of a cardiac cycle and what happens during each?", # 358
#         "What are the main peculiarities observed in the fetal heart's vascular system?" # 359
# ]
# output_dir = "./result/LangGraph"
# os.makedirs(output_dir, exist_ok=True)

# for i, q in enumerate(questions) :
#     inputs = {"original_question": q}
#     final_state = app.invoke(inputs)

#     today = datetime.date.today()
#     date_str = f"{today.month}월{today.day}일"
#     unique_id = uuid.uuid4()
#     filename = f"{date_str}_{i}_{unique_id}.txt"
#     filepath = os.path.join(output_dir, filename)

#     answer = final_state.get("final_answer", "오류 : 최종 답변 생성 못함")
#     with open(filepath, 'w', encoding="utf-8") as f :
#         f.write(answer)