In [5]:
import pickle
import json
import os
import re
from model_loader.config import generation_loader
from neo4j import GraphDatabase

AURA_DB_URI = "neo4j+s://d17d6106.databases.neo4j.io"
AURA_DB_USER = "neo4j"
AURA_DB_PASSWORD = "MlE0wTiwcSsSl58AOlczEfn_oD4FhwF1q-Hf4fn0678"

def load_data_to_neo4j(json_file_path):
    try:
        with GraphDatabase.driver(AURA_DB_URI, auth=(AURA_DB_USER, AURA_DB_PASSWORD)) as driver:
            with driver.session(database="neo4j") as session :
                with open(json_file_path, 'r', encoding="utf-8") as f :
                    data = json.load(f)
        
                    for item in data.get('paragraphs_data', []):
                        paragraph_id = item.get("paragraph_id")
                        original_paragraph_text = item.get("original_paragraph_text")
                        filename = data.get("filename")

                        session.run(
                            "MERGE (d:Document {filename: $filename}) "
                            "SET d.full_text = $full_text",
                            filename=filename,
                            full_text=data.get("full_original_text")
                        )

                        session.run(
                            "MERGE (p:Paragraph {paragraph_id: $paragraph_id, filename: $filename}) "
                            "SET p.text = $text "
                            "MERGE (d:Document {filename: $filename})-[:HAS_PARAGRAPH]->(p)",
                            paragraph_id=paragraph_id,
                            filename=filename,
                            text=original_paragraph_text
                        )

                        for node in item.get('entities', []):
                            session.run(
                                "MERGE (e:Entity {name: $name, type: $type}) " 
                                "MERGE (p:Paragraph {paragraph_id: $paragraph_id, filename: $filename})-[:CONTAINS_ENTITY]->(e)",
                                name=node["name"],
                                type=node["type"],
                                paragraph_id=paragraph_id,
                                filename=filename
                            )
                        
                        for relationship in item.get('relations', []):
                            head_name = relationship["head"]
                            tail_name = relationship["tail"]
                            
                            rel_type = relationship["relation"].replace(" ", "_").upper()

                            session.run(
                                f"MATCH (source:Entity {{name: $head_name}}) "
                                f"MATCH (target:Entity {{name: $tail_name}}) "
                                f"MATCH (p_src:Paragraph {{paragraph_id: $paragraph_id, filename: $filename}})-[:CONTAINS_ENTITY]->(source) "
                                f"MATCH (p_tgt:Paragraph {{paragraph_id: $paragraph_id, filename: $filename}})-[:CONTAINS_ENTITY]->(target) "
                                f"MERGE (source)-[r:{rel_type}]->(target) "
                                f"ON CREATE SET r.source_paragraph_id = $paragraph_id, r.source_filename = $filename", # 관계에 원본 문단 정보 저장 (선택 사항)
                                head_name=head_name,
                                tail_name=tail_name,
                                paragraph_id=paragraph_id,
                                filename=filename
                            )
            print(f"JSON data from {json_file_path} loaded into Neo4j AuraDB.")

    except Exception as e:
        print(f"Error loading {json_file_path} to Neo4j: {e}")

json_directory = "./data/extracted_results"

if not os.path.exists(json_directory):
    print(f"Directory '{json_directory}' not found. Please ensure your JSON files are in this directory.")
    os.makedirs(json_directory, exist_ok=True)
    example_json_path = os.path.join(json_directory, "example_data.json")
    with open(example_json_path, 'w', encoding='utf-8') as f:
        json.dump({
            "filename": "example_doc.md",
            "full_original_text": "This is an example document about cell biology and anatomy.",
            "paragraphs_data": [
                {
                    "paragraph_id": 0,
                    "original_paragraph_text": "The heart is an organ that pumps blood.",
                    "entities": [
                        {"name": "heart", "type": "Organ"},
                        {"name": "blood", "type": "Substance"}
                    ],
                    "relations": [
                        {"head": "heart", "relation": "pumps", "tail": "blood"}
                    ]
                },
                {
                    "paragraph_id": 1,
                    "original_paragraph_text": "Bones are part of the skeleton.",
                    "entities": [
                        {"name": "bones", "type": "Organ"},
                        {"name": "skeleton", "type": "Organ"}
                    ],
                    "relations": [
                        {"head": "bones", "relation": "part_of", "tail": "skeleton"}
                    ]
                }
            ]
        }, f, indent=4, ensure_ascii=False)
    print(f"Example JSON data created at {example_json_path}. Please run the script again after modifying connection details.")


print(f"Loading JSON data from directory: {json_directory}")
for filename in os.listdir(json_directory):
    if filename.endswith(".json"):
        json_file_path = os.path.join(json_directory, filename)
        load_data_to_neo4j(json_file_path)

print("\n---")
print("JSON 데이터 로드 완료. 이제 QASystem을 사용할 준비가 되었습니다.")
print("---\n")

Loading JSON data from directory: ./data/extracted_results
JSON data from ./data/extracted_results/2_Osteology.json loaded into Neo4j AuraDB.
JSON data from ./data/extracted_results/3_Syndesmology.json loaded into Neo4j AuraDB.
JSON data from ./data/extracted_results/4_Myology.json loaded into Neo4j AuraDB.
JSON data from ./data/extracted_results/5_Angiology.json loaded into Neo4j AuraDB.
JSON data from ./data/extracted_results/9_Neurology.json loaded into Neo4j AuraDB.
JSON data from ./data/extracted_results/1_Embryology.json loaded into Neo4j AuraDB.

---
JSON 데이터 로드 완료. 이제 QASystem을 사용할 준비가 되었습니다.
---



In [None]:
import json
import re
from neo4j import GraphDatabase
from model_loader.config import generation_loader 

class QASystem :
    def __init__(self, uri="neo4j+s://d17d6106.databases.neo4j.io", user="neo4j", password="MlE0wTiwcSsSl58AOlczEfn_oD4FhwF1q-Hf4fn0678") :
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
        try:
            self.driver.verify_connectivity()
            print("Neo4j AuraDB database connected successfully.")
        except Exception as e:
            print(f"Failed to connect to Neo4j AuraDB: {e}")
            raise

        self.llm_loader = generation_loader

        self.known_entities = ['Field', 'Field_of_Study', 'Kingdom', 'Life_Stage', 'Organ', 'Organism', 'Process', 'Document', 'Paragraph']
        self.known_relations = ['abducts', 'absent_in', 'absorb', 'absorbs', 'accessories_to', 'accessory_to', 'accommodate', 'accompanied by', 'accompanied_by', 'accompanies', 'accomplished_by', 'accumulates_in', 'accumulates_within', 'acquires', 'act_on', 'act_upon', 'acting_on', 'acts_as', 'acts_from', 'acts_in', 'acts_on', 'acts_upon', 'acts_with', 'adapt', 'adduct', 'adducts', 'adhere_to', 'adherent_to', 'adheres', 'adheres to', 'adheres_to', 'adjacent_to', 'admits', 'admits_of', 'affected_by', 'affects', 'affords', 'affords insertion to', 'affords origin to', 'after', 'allows', 'allows_of', 'allows_passage_of', 'also_known_as', 'alternate_with', 'alternative_name', 'analogous_to', 'analyzes', 'anastomoses', 'and', 'antagonist_of', 'antagonists_of', 'antagonizes', 'appear in', 'appear_in', 'appears', 'appears_at', 'appears_in', 'applied', 'applied_to', 'applies', 'applies_to', 'approaches', 'approximate', 'approximates', 'are', 'are_at', 'are_compressed', 'are_located_on', 'are_made_of', 'are_more_numerous_in', 'are_part_of', 'are_seen_in', 'are_widened', 'aris e from', 'arise', 'arise s', 'arise s from', 'arise_by', 'arise_from', 'arises', 'arises by', 'arises from', 'arises_above', 'arises_below', 'arises_by', 'arises_from', 'arises_through', 'arising_from', 'around', 'arrange', 'arranged_around', 'arranged_in', 'arranged_into', 'arranged_on', 'arranged_parallel_with', 'arterializes', 'articulate with', 'articulates', 'articulates with', 'articulates_with', 'ascend', 'ascend along', 'ascends', 'ascends_to', 'assist', 'assisted_by', 'assists', 'assists in forming', 'assists_in_forming', 'associated', 'associated with', 'associated_with', 'assume', 'assumed', 'assumes', 'atrophies', 'atrophy', 'attached', 'attached to', 'attached_to', 'attaches', 'attaches to', 'attaches_behind', 'attaches_to', 'attachment', 'attachment of', 'attachment_of', 'attachments', 'attends', 'averages', 'based_on', 'bears', 'become', 'becomes', 'begins', 'begins at', 'begins_at', 'behind', 'believes', 'belongs_to', 'below', 'bends', 'beneath', 'best_marked_in', 'between', 'binds', 'blend', 'blend_with', 'blended with', 'blended_with', 'blending_with', 'blends', 'blends with', 'blends_with', 'bound', 'bound to', 'bound_to', 'bounded_by', 'bounds', 'braces', 'branch off from', 'bridges over', 'brought_into', 'called', 'can be separated', 'can_form', 'capable_of', 'carried_by', 'carried_into', 'carried_to', 'carries', 'carry', 'caudal_to', 'caused_by', 'causes', 'central_organ_of', 'changed from', 'characteristic_of', 'checks', 'circulates_through', 'circumscribes', 'close_to', 'closed_by', 'closed_in', 'closes', 'coated with', 'coated_with', 'collects', 'comes into contact with', 'commence_in', 'commences', 'commences_at', 'commences_in', 'common_to', 'communicate', 'communicates', 'communicates with', 'communicates_through', 'communicates_with', 'communicating_with', 'compared_to', 'comparison', 'compensates', 'complete', 'completed', 'completed_by', 'completes', 'completes_in', 'component', 'composed of', 'composed_of', 'compress', 'compressed', 'compresses', 'comprises', 'conceal', 'conceals', 'concerned_in', 'concerns', 'conducts', 'connect', 'connect_to', 'connected', 'connected at', 'connected by', 'connected with', 'connected_by', 'connected_to', 'connected_with', 'connecting', 'connects', 'connects to', 'connects_to', 'connects_with', 'considers', 'consist of', 'consist_of', 'consists', 'consists of', 'consists_in', 'consists_of', 'constitute', 'constitutes', 'constricts', 'contacts', 'contain', 'contained_within', 'contains', 'contains_no', 'continued_from', 'continued_into', 'continued_over', 'continues_from', 'continues_to', 'continuous', 'continuous with', 'continuous_into', 'continuous_over', 'continuous_to', 'continuous_with', 'contributes', 'contributes_to', 'controls', 'converge', 'converge to', 'converges_to', 'converges_toward', 'converted into', 'converted_into', 'converts', 'converts_into', 'convey', 'conveys', 'correspond_to', 'corresponds', 'corresponds to', 'corresponds with', 'corresponds_to', 'corresponds_with', 'corrugates', 'counteracts', 'courses_toward', 'covered', 'covered by', 'covered_by', 'covered_with', 'covering', 'covering over', 'covers', 'crosses', 'crosses_over', 'crossing', 'crossing_over', 'curves_around', 'curves_downward', 'cut_by', 'cuts off', 'deals with', 'decreases', 'decussate', 'decussate_with', 'deepens', 'deeper_than', 'deflected_to', 'degenerates', 'demonstrated', 'denser_than', 'depends_on', 'depicts', 'deposit', 'deposited_on', 'depress', 'depresses', 'derived from', 'derived_from', 'descend', 'descend_on', 'descend_upon', 'descends', 'descends with', 'descends_between', 'descends_from', 'descends_to', 'described_as', 'described_by', 'described_with', 'describes', 'destroys', 'determined_by', 'determines', 'develop_between', 'developed', 'developed from', 'developed in', 'developed_after', 'developed_around', 'developed_from', 'developed_in', 'developed_into', 'developed_when', 'develops from', 'develops to', 'develops_between', 'develops_from', 'develops_in', 'devoid of', 'differ in', 'differentiated_from', 'differentiated_into', 'differentiates_into', 'differs from', 'differs_from', 'dilated_to_form', 'diminished_by', 'diminishes', 'directed', 'directed toward', 'directed_from', 'directed_on', 'directed_to', 'directed_toward', 'directs', 'disappears', 'dissolves', 'distinguished_from', 'distorts', 'distributing', 'diverge_from', 'diverges from', 'divide', 'divide_into', 'divided', 'divided into', 'divided_by', 'divided_into', 'divides', 'divides into', 'divides_opposite', 'divisible_into', 'does_not_belong_to', 'does_not_exist_in', 'does_not_participate_in', 'does_not_relate_to', 'drained_by', 'draining', 'drains', 'drains_to', 'drawn_backward', 'drawn_up', 'draws', 'draws back', 'draws forward', 'draws_down', 'driven from', 'during', 'effected', 'effected_by', 'effects', 'ejects', 'elevates', 'elongate', 'elongates', 'embraces', 'emigrated_from', 'empties', 'empties_into', 'encircle', 'encircles', 'enclose', 'enclosed_between', 'enclosed_by', 'enclosed_in', 'enclosed_within', 'encloses', 'encroaches', 'end', 'end_in', 'end_of', 'end_on', 'ending', 'ends at', 'ends_at', 'ends_in', 'enjoys', 'enlarge', 'enlarges', 'ensheathes', 'enters', 'enters_into', 'enveloped_by', 'envelopes', 'envelops', 'equals', 'equals_in_size', 'especially_in', 'establishes', 'everts', 'example_of', 'excavate', 'except', 'excluded_from', 'excludes', 'exerts', 'exist_in', 'exists_between', 'exists_in', 'exists_on', 'exists_with', 'expands_into', 'expands_over', 'expansion_of', 'expel', 'expels', 'expended_in', 'explains', 'exposed_by', 'exposed_to', 'expressed_in', 'expresses', 'extend', 'extend_above', 'extend_between', 'extend_to', 'extended', 'extends', 'extends from', 'extends into', 'extends to', 'extends_across', 'extends_along', 'extends_between', 'extends_from', 'extends_into', 'extends_on', 'extends_over', 'extends_through', 'extends_to', 'extends_toward', 'exudes', 'facilitates', 'fades_into', 'falls in front of', 'falls_on', 'fill_up', 'filled_by', 'filled_with', 'fills', 'fills up', 'fits_into', 'fix', 'fixed', 'fixed_by', 'fixed_into', 'fixed_to', 'fixes', 'flex', 'flexes', 'flexure', 'flows_from', 'flows_into', 'flows_through', 'follows', 'for', 'for the attachment of', 'for the origins of', 'for_attachment', 'for_insertion_of', 'for_passage_of', 'for_reception_of', 'form', 'formed', 'formed_by', 'formed_from', 'formed_in', 'formed_into', 'formed_on', 'formed_partly_by', 'formed_with', 'forms', 'forms part of', 'forms_concavity_for', 'found_between', 'found_in', 'found_on', 'from', 'furnish', 'furnished with', 'fused_with', 'fuses', 'fuses with', 'fuses_with', 'generate', 'generated', 'generated by', 'generated_by', 'generates', 'give_off', 'given_off_from', 'given_to', 'gives', 'gives attachment to', 'gives insertion to', 'gives origin from', 'gives origin to', 'gives passage to', 'gives up', 'gives_attachment_to', 'gives_entrance_to', 'gives_exit_to', 'gives_insertion', 'gives_insertion_to', 'gives_off', 'gives_origin', 'gives_origin_to', 'gives_rise_to', 'glide_upon', 'glides', 'glides_on', 'glides_over', 'gliding', 'grasped_by', 'grooved', 'grooved_for', 'grouped around', 'groups_around', 'grows', 'grows into', 'grows_from', 'grows_into', 'grows_out_into', 'guarded_by', 'guards', 'harmonizes', 'has', 'has_axis', 'has_capacity', 'has_feature', 'has_part', 'has_part_of', 'has_shape', 'has_structure', 'have', 'held_in_position_by', 'hides', 'holds', 'holds_together', 'homologue_of', 'immersing', 'impregnated_by', 'in', 'in contact with', 'in relation with', 'inclines', 'inclines_through', 'included_as', 'includes', 'incorporated with', 'increase_strength_of', 'increased_by', 'increases', 'increases in quantity', 'increases_in', 'increases_in_quantity', 'indicate', 'indicates', 'influenced_by', 'initiates', 'inserte d into', 'inserted', 'inserted between', 'inserted into', 'inserted_by', 'inserted_in', 'inserted_into', 'insertion', 'inserts_into', 'inserts_on', 'instead_of', 'interlace_with', 'interlaces_in', 'intermingle_with', 'intermingled', 'intermingling', 'interposed between', 'interposed_between', 'intersect', 'intersects', 'intervene_between', 'intervenes between', 'intervenes_between', 'into', 'invades', 'invaginated by', 'invaginates', 'invest', 'invested by', 'invested with', 'invested_by', 'investment_for', 'invests', 'involves', 'is', 'is a', 'is a part of', 'is caused_by', 'is continuous with', 'is described as', 'is directed toward', 'is distinct from', 'is in contact with', 'is in relation', 'is inserted into', 'is positioned on', 'is_a', 'is_a_part_of', 'is_broad', 'is_caused_by', 'is_closed', 'is_composed_of', 'is_connected_with', 'is_continuous_with', 'is_covered_by', 'is_curved_to', 'is_cut_off', 'is_destitute_of', 'is_enlarged', 'is_expansion_from', 'is_extensive', 'is_filled_with', 'is_formed_in', 'is_freest', 'is_grooved_by', 'is_in', 'is_increased_in', 'is_inserted_into', 'is_larger_in', 'is_lodged_in', 'is_measured_in', 'is_more_plentiful_in', 'is_not_much_larger_than', 'is_part_of', 'is_relaxed', 'is_relaxed_during', 'is_seen_on', 'is_separate_from', 'is_strengthened_by', 'is_stretched', 'is_stretched_during', 'is_tendon_of', 'is_tense_during', 'is_the', 'is_thicker_at', 'is_thinner_at', 'is_tightened_by', 'is_united_with', 'join', 'join with', 'join_with', 'joined', 'joined_by', 'joined_to', 'joining', 'joins', 'joins with', 'lacks', 'larger_in', 'lasts', 'lateral_to', 'lead_from', 'leads_from', 'leads_into', 'leads_to', 'lengthened', 'lent_by', 'less_than', 'lessened_during', 'lessens', 'lie_behind', 'lies beneath', 'lies between', 'lies on', 'lies_around', 'lies_behind', 'lies_below', 'lies_in', 'lies_on', 'lifts', 'like', 'limited to', 'limited_by', 'limited_to', 'limits', 'lined', 'lined_by', 'lines', 'located_beyond', 'located_in', 'located_near', 'location_of', 'lodgement_for', 'lodges', 'lodges_in', 'looks_backward', 'loses', 'made_of', 'made_up of', 'maintains', 'marked by', 'marked_by', 'marks', 'marks_end_of', 'may_be', 'measured_between', 'measured_from', 'measured_in', 'measured_to', 'measurement_of', 'measures', 'mediates', 'meets', 'migrate_from', 'migrate_into', 'migrate_over', 'migrates_towards', 'misleads', 'missing_from', 'mistranslated', 'modified by', 'modified_into', 'moved_through', 'moves', 'moves_around', 'moves_toward', 'named', 'nears', 'nourishes', 'obliterated', 'obliterated_after', 'obliterates', 'observed_beneath', 'observed_in', 'obtained_by', 'occludes', 'occupied_by', 'occupies', 'occur during', 'occurs', 'occurs_at', 'occurs_in', 'occurs_on', 'of', 'open_into', 'opening_of', 'opens', 'opens into', 'opens_into', 'opens_on', 'opens_onto', 'opposite', 'origin', 'origin_from', 'originate', 'originate_from', 'originates', 'originates from', 'originates_from', 'ossification', 'ossified', 'ossified from', 'ossified_by', 'ossified_from', 'ossified_in', 'ossifies', 'ossifies_in', 'ossifies_to', 'overhangs', 'overlapped_by', 'overlaps', 'overlies', 'parallel', 'parallel_to', 'part_of', 'pass through', 'pass_across', 'pass_between', 'pass_down', 'pass_from', 'pass_over', 'pass_through', 'pass_to', 'passage_for', 'passes', 'passes along', 'passes from', 'passes in front of', 'passes to', 'passes_along', 'passes_around', 'passes_behind', 'passes_beneath', 'passes_close_to', 'passes_from', 'passes_into', 'passes_out', 'passes_over', 'passes_through', 'passes_to', 'penetrates', 'perforated_by', 'perforates', 'performs', 'permits', 'permitted_by', 'persists_as', 'persists_from', 'persists_throughout', 'pierces', 'placed behind', 'placed in front of', 'placed_between', 'possesses', 'preceded_by', 'precedes', 'presents', 'preserves', 'pressed_against', 'prevented_by', 'prevents', 'proceeds', 'proceeds_to', 'proceeds_toward', 'produced_in', 'produces', 'prohibits', 'projected_from', 'projects', 'projects_as', 'projects_behind', 'projects_to', 'projects_toward', 'prolonged_into', 'prolonged_on', 'prominent', 'prone_to', 'propagates_to', 'property_of', 'proposed', 'protect', 'protected_by', 'protects', 'protrudes', 'proves', 'provides', 'pulls', 'push', 'pushes', 'radiate_from', 'radiate_through', 'radiate_toward', 'radiates', 'radiates_from', 'radiates_to', 'raise', 'raised', 'raises', 'ramifications_in', 'reaches', 'reaches_from', 'reaches_to', 'recedes_into', 'receive', 'received_by', 'received_in', 'received_into', 'receives', 'receiving chambers', 'reception', 'reduced_to', 'reflected_along', 'reflected_over', 'reflected_upon', 'regards', 'regenerates_by', 'regulate', 'regulates', 'reinforced', 'reinforced_by', 'related_to', 'relates', 'relates_to', 'relation_with', 'removes', 'renders', 'replace', 'replaces', 'represents', 'required_by', 'requires', 'resembles', 'resists', 'resolves_into', 'restores', 'rests', 'rests_against', 'rests_on', 'rests_upon', 'result_in', 'results in', 'results_from', 'retains', 'retards', 'retract', 'retracts', 'return', 'returns', 'returns_from', 'returns_to', 'reveals', 'revolves', 'rises_from', 'rolls', 'rolls_along', 'roofs', 'rotate', 'rotated', 'rotates', 'rotates_around', 'rough_for', 'run', 'run_across', 'run_in', 'run_into', 'runs', 'runs from', 'runs_across', 'runs_from', 'runs_in_same_direction_as', 'runs_parallel_with', 'runs_through', 'runs_to', 'same_as', 'secretes', 'seen_in', 'sends', 'sends off', 'separate', 'separated', 'separated_by', 'separated_from', 'separates', 'separates_from', 'separating', 'serves', 'serves as', 'serves for', 'serves_for', 'serves_to', 'serves_to_protect', 'shielded_by', 'shows', 'signifies', 'similar_to', 'situated', 'situated_at', 'situated_behind', 'situated_beneath', 'situated_near', 'smaller_in', 'split_into', 'splits', 'splits_into', 'spread_over', 'spreads', 'spreads_into', 'spreads_over', 'spreads_to', 'spring', 'spring_from', 'springs_from', 'stabilizes', 'stain', 'stains', 'stains with', 'starts', 'starts_in', 'steadies', 'steady', 'straightened', 'strengthen', 'strengthened by', 'strengthened_by', 'strengthens', 'stretches', 'stretches across', 'stretches_across', 'stretches_between', 'stretches_from', 'stretches_to', 'stronger', 'strongest', 'studies', 'subdivided', 'subdivided by', 'subdivided into', 'subdivided_at', 'subdivides', 'subdivides_into', 'subserves', 'substitute_for', 'succeeded_by', 'suffices', 'suggests', 'superficial_to', 'supinates', 'supplied', 'supplied_by', 'supplied_through', 'supplied_with', 'supplies', 'supported_by', 'supports', 'surmounted_by', 'surmounts', 'surround', 'surrounded_by', 'surrounding', 'surrounds', 'swallowed_by', 'takes', 'takes_no_part_in', 'takes_origin', 'takes_origin_from', 'takes_place', 'takes_place_around', 'takes_place_between', 'takes_place_in', 'takes_place_through', 'takes_place_with', 'targets', 'tensor_of', 'tensors_of', 'termed', 'terminates', 'terminates_in', 'thickened', 'thicker_in', 'thicker_over', 'thicker_than_at', 'thickest', 'thickest_in', 'thinned', 'thinner_in', 'thinner_over', 'thinnest', 'through_which_pass', 'tightens', 'tilts', 'to', 'traced_by', 'traced_to', 'transferred_to', 'transformed_into', 'transitions_to', 'transmission', 'transmit', 'transmits', 'transmits_through', 'transmitted to', 'transmitted_by', 'transmitted_through', 'transmitted_to', 'travel', 'travels_to', 'traversed_by', 'traverses', 'turning component', 'turns', 'turns_around', 'under', 'undergoes', 'unite', 'unite_below', 'unite_to', 'unite_to_form', 'united by', 'united with', 'united_by', 'united_in', 'united_to', 'united_with', 'unites', 'unites with', 'unites_at', 'unites_into', 'unites_with', 'uniting', 'unlocks', 'upon', 'used_in', 'uses', 'varies', 'varies_in', 'varies_in_proportion_to', 'varies_with', 'widest_in', 'with', 'withstands', 'worked_out', 'works with', 'wrinkles', 'yields']

        self.entity_relation_extraction_prompt_template = """
            Extract entities and their relations from the following sentence.

            **Entities** should be **unique nouns or concepts**, extracted as **noun phrases** whenever possible. Identify **concrete objects or concepts** rather than complex activities or phenomena as entities.

            **Relations** should clearly describe the connection between two entities, preferring **reusable predicate verbs** for a knowledge graph. Use **concise verbs** or clear, hyphenated forms like **'part_of' or 'includes'**.

            Output the result **only in the following JSON format**, with no other explanations or text:

            ```json
            {{
                "entities": [
                    {{"name": "Entity1", "type": "Type (e.g., Organ, System, Substance, Function, Disease)"}},
                    {{"name": "Entity2", "type": "Type"}}
                ],
                "relations": [
                    {{"head": "Entity1", "relation": "Relation_Type (e.g., part_of, causes)", "tail": "Entity2"}},
                    {{"head": "Entity3", "relation": "generates", "tail": "Entity4"}}
                ]
            }}

            sentence : "{text_to_analyze}"
            JSON result :
        """

        self.cypher_query_generation_prompt_template = """
            Given the following user question and extracted entities/relations, generate a Cypher query to retrieve relevant information from a knowledge graph.
            The knowledge graph contains nodes like 'Entity' with properties 'name', 'type', and 'paragraph_id'. It also has 'Paragraph' nodes with 'text' and 'paragraph_id', and 'Document' nodes with 'filename'.
            Entities are linked to Paragraphs via `CONTAINS_ENTITY`. Paragraphs are linked to Documents via `HAS_PARAGRAPH`.

            Here are some examples of Cypher queries:
            - To find the tail entity related by 'part_of' from 'skeleton': MATCH (n:Entity {{name: 'skeleton'}})-[:part_of]->(m:Entity) RETURN m.name
            - To find the relation between 'bones' and 'skeleton': MATCH (n:Entity {{name: 'bones'}})-[r]->(m:Entity {{name: 'skeleton'}}) RETURN type(r)
            - To find all relations connected to 'heart' and the paragraph text they originate from: MATCH (n:Entity {{name: 'heart'}})-[r]-(m), (p:Paragraph)-[:CONTAINS_ENTITY]->(n) RETURN n.name, type(r), m.name, p.text LIMIT 5
            - To find entities related by 'exists_in' and their paragraph: MATCH (n:Entity)-[:exists_in]->(m:Entity), (p:Paragraph)-[:CONTAINS_ENTITY]->(n) RETURN n.name, m.name, p.text LIMIT 5
            - To find paragraph text discussing 'embryology': MATCH (p:Paragraph)-[:CONTAINS_ENTITY]->(e:Entity {{name: 'embryology'}}) RETURN p.text LIMIT 5
            - To find all entities and relations within a specific paragraph (e.g., paragraph_id 1 from example_doc.md): MATCH (p:Paragraph {{paragraph_id: 1, filename: 'example_doc.md'}})-[:CONTAINS_ENTITY]->(e), (e)-[r]-(m) RETURN e.name, type(r), m.name, p.text LIMIT 5

            User Question: "{question}"
            Extracted Entities: {entities}
            Extracted Relations: {relations}

            Generate only the Cypher query. Do not include any other text or explanation.
            Cypher Query:
        """

        self.answer_generation_prompt_template = """
            Based on the following context and the original question, provide a concise and direct answer.
            If the context does not contain enough information, state that.

            Original Question: "{question}"

            Context:
            {context_text}

            Answer:
        """
    
    def close(self):
        if self.driver:
            self.driver.close()
            print("Neo4j AuraDB database connection closed.")

    def _call_llm_generate(self, prompt) :
        if self.llm_loader:
            if hasattr(self.llm_loader, "tokenizer") and hasattr(self.llm_loader, "model"):
                tokenizer = self.llm_loader.tokenizer
                model = self.llm_loader.model

                input_ids = tokenizer.encode(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)
                attention_mask = (input_ids != tokenizer.pad_token_id).long().to(model.device)

                output = model.generate(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    max_new_tokens=500,
                    temperature=0.0,
                    do_sample=False,
                    top_p=0.85,
                    repetition_penalty=1.2,
                    early_stopping=True,
                    num_beams=3,
                    pad_token_id=tokenizer.pad_token_id,
                    eos_token_id=tokenizer.eos_token_id
                )
                generated_ids = output[0][input_ids.shape[-1]:]
                raw_answer = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
                return raw_answer
            else:
                raw_answer = self.llm_loader.generate(prompt)
                return raw_answer
        else:
            print("generation_loader가 로드되지 않음")


    def _extract_entities_relations(self, question) :
        prompt = self.entity_relation_extraction_prompt_template.format(text_to_analyze=question)
        raw_llm_output = self._call_llm_generate(prompt)

        try :
            json_start = raw_llm_output.find("{")
            json_end = raw_llm_output.rfind("}") + 1
            if json_start != -1 and json_end != -1 and json_end > json_start :
                json_str = raw_llm_output[json_start:json_end]
                extracted_data = json.loads(json_str)
                return extracted_data.get("entities", []), extracted_data.get("relations", [])
            else :
                print(f"LLM 답변에서 유효한 JSON 형태를 찾을 수 없음 : {raw_llm_output}")
                return [], []
            
        except json.JSONDecodeError as e :
            print(f"개체 추출 과정에서 JSON 디코딩 오류 발생: {e}")
            print(f"오류 발생 원문: {raw_llm_output}")
            return [], []
    
    def _calculate_relation_relevance(self, question, relation_tuple):
        head, rel_type, tail = relation_tuple
        score = 0
        question_lower = question.lower()
        if head.lower() in question_lower:
            score += 1
        if tail.lower() in question_lower:
            score += 1
        normalized_rel_type = rel_type.lower().replace('_', ' ')
        if normalized_rel_type in question_lower: 
            score += 0.5 
        return score

    def _generate_cypher_query(self, question, extracted_entities, extracted_relations) :
        ent_names = [e["name"] for e in extracted_entities]
        
        MAX_RELATIONS_FOR_LLM = 10 
        
        scored_relations = []
        for r in extracted_relations:
            rel_tuple = (r["head"], r["relation"], r["tail"])
            score = self._calculate_relation_relevance(question, rel_tuple)
            scored_relations.append((score, rel_tuple))
        
        scored_relations.sort(key=lambda x: x[0], reverse=True)
        
        filtered_rel_tuples = [rel_tuple for _, rel_tuple in scored_relations[:MAX_RELATIONS_FOR_LLM]]

        prompt = self.cypher_query_generation_prompt_template.format(
            question=question,
            entities=ent_names,
            relations=filtered_rel_tuples 
        )

        cypher_query = self._call_llm_generate(prompt)
        cypher_query = cypher_query.split("\n")[0].strip() 
        cypher_query = re.sub(r'```(?:cypher)?\s*', '', cypher_query, 1)
        cypher_query = cypher_query.replace("```", "").strip()

        print(f"Generated Cypher Query : {cypher_query}")
        return cypher_query 

    def _execute_cypher_on_knowledge_graph(self, cypher_query):
        with self.driver.session() as session:
            try:
                result = session.run(cypher_query)
                context_texts = []
                for record in result:
                    record_str = []
                    for key, value in record.items():
                        if value is not None:
                            record_str.append(f"{key}: {value}")
                    if record_str:
                        context_texts.append(", ".join(record_str))
                return context_texts
            except Exception as e:
                print(f"Cypher 쿼리 실행 중 오류 발생: {e}")
                return []

    def _generate_final_answer(self, question, context_texts) :
        if not context_texts :
            return f"관련된 정보를 찾을 수 없습니다."
        
        combined_text = "\n\n".join(context_texts)

        prompt = self.answer_generation_prompt_template.format(
            question=question,
            context_text=combined_text
        )
        final_answer = self._call_llm_generate(prompt)
        return final_answer
    
    def answer_question(self, question) :
        print(f"\n{'='*50}\nUser Question : {question}")

        extracted_entities, extracted_relations = self._extract_entities_relations(question)
        print(f"Extracted Entities : {extracted_entities}")
        print(f"Extracted Relations : {extracted_relations}")

        if not extracted_relations and not extracted_entities :
            return "질문에서 개체나 관계를 추출할 수 없습니다."
        
        cypher_query = self._generate_cypher_query(question, extracted_entities, extracted_relations)
        
        if not cypher_query :
            return "질문에서 Cypher 쿼리를 생성하지 못했습니다."
        
        relevant_texts = self._execute_cypher_on_knowledge_graph(cypher_query)

        if not relevant_texts :
            return "지식 그래프에서 관련된 정보를 찾을 수 없습니다."
        
        final_answer = self._generate_final_answer(question, relevant_texts)
        return final_answer


if __name__ == "__main__":
    qa_system = QASystem(uri=AURA_DB_URI, user=AURA_DB_USER, password=AURA_DB_PASSWORD)
    
    qa_system.llm_loader = generation_loader

    questions = [
    ############## 1_Embryology.md
    "What are the two essential components of a higher organism cell as defined in the text?", # 7페이지
    "Describe the four main phases of indirect cell division (karyokinesis) as outlined in the text.", # 7페이지
    "What is the primary role of the yolk-sac in the embryo's early development?", # 20페이지
    "How does the embryo separate from the yolk-sac, and what does the enclosed part of the yolk-sac form?", # 19페이지
    "What significant developments occur in a human embryo during the Second Week?", # 33페이지
    "What are the key characteristics of the human embryo by the end of the Third Week?", # 33페이지
    
    ############## 2_Osteology.md
    "What are the three groups into which the cells of a primitive segment differentiate, and what do they form?", # 38페이지
    "How is each vertebral body formed from primitive segments during development?", # 38페이지
    "What are the sphenoidal air sinuses, and where are they located within the sphenoid bone?", # 88페이지
    "Describe the sphenoidal rostrum and its articulation.",# 88
    "What is the tibia, and where is it located in the human leg?", # 158
    "Describe the superior articular surface of the tibia's upper extremity.", # 158

    ############## 3_Syndesmology.md
    "What are joints or articulations, and how are immovable joints characterized?", # 174
    "How does the articular lamella differ from ordinary bone tissue?", # 174
    "Where is the synovial membrane located in relation to the glenoid cavity and humerus, and how does it interact with the Biceps brachii tendon?", # 207
    "List some of the bursae located near the shoulder-joint and specify which ones communicate with the synovial cavity.", # 207
    "What is the function of the plantar calcaneonavicular ligament, and what condition results if it yields?", # 236
    "How are the navicular bone and the three cuneiform bones connected, and what type of movement do they permit?", # 236

    ############## 4_Myology.md
    "How does the nervous system serve as an indicator for the origin and migration paths of developing muscles, despite not influencing muscle differentiation?", # 250
    "Describe the structural components of striped or voluntary muscle, from bundles to individual fibers.", # 250
    "What is the triangular ligament and where is it located?", # 290
    "What structures perforate the superficial layer (inferior fascia) of the urogenital diaphragm?", # 290
    "Where does the Extensor digitorum longus muscle originate, and what structures are located between it and the Tibialis anterior?", # 322
    "What is the Peronæus tertius, and where is it inserted?", # 322

    ############## 5_Angiology.md
    "What are the main characteristics of the middle coat (tunica media) of arteries, and how does its composition vary with vessel size?", # 334
    "Describe the composition and variations of the external coat (tunica adventitia) in arteries.", # 334
    "How do the Vitelline Veins develop into parts of the portal and hepatic veins?", # 345
    "What happens to the Umbilical Veins during embryonic development and after birth?", # 345
    "What are the three phases of a cardiac cycle and what happens during each?", # 358
    "What are the main peculiarities observed in the fetal heart's vascular system?" # 359
]

    try:
        for q in questions:
            answer = qa_system.answer_question(q)
            print(f"Answer : {answer}")
            print(f"{'='*50}\n")
    finally:
        qa_system.close()

Neo4j AuraDB database connected successfully.

User Question : What are the two essential components of a higher organism cell as defined in the text?
Extracted Entities : [{'name': 'components', 'type': 'Substance'}, {'name': 'organism cell', 'type': 'Cell'}, {'name': 'text', 'type': 'Document'}]
Extracted Relations : [{'head': 'components', 'relation': 'part_of', 'tail': 'organism cell'}, {'head': 'organism cell', 'relation': 'defined_in', 'tail': 'text'}]
Generated Cypher Query : 
Answer : 질문에서 Cypher 쿼리를 생성하지 못했습니다.


User Question : Describe the four main phases of indirect cell division (karyokinesis) as outlined in the text.
Extracted Entities : [{'name': 'four main phases', 'type': 'Concept'}, {'name': 'indirect cell division', 'type': 'Process'}, {'name': 'karyokinesis', 'type': 'Process'}, {'name': 'text', 'type': 'Document'}]
Extracted Relations : [{'head': 'karyokinesis', 'relation': 'is_a', 'tail': 'indirect cell division'}, {'head': 'four main phases', 'relation': 'part_o



Generated Cypher Query : MATCH (n:Entity {name: 'cells'})-[:part_of]->(s:Entity {name: 'segment'}), (g:Entity)-[:formed_from]->(n) RETURN g.name, s.name LIMIT 3
Answer : 지식 그래프에서 관련된 정보를 찾을 수 없습니다.


User Question : How is each vertebral body formed from primitive segments during development?
Extracted Entities : [{'name': 'vertebral body', 'type': 'Organ'}, {'name': 'primitive segments', 'type': 'Structure'}, {'name': 'development', 'type': 'Process'}]
Extracted Relations : [{'head': 'vertebral body', 'relation': 'formed_from', 'tail': 'primitive segments'}, {'head': 'vertebral body', 'relation': 'occurs_during', 'tail': 'development'}]
Generated Cypher Query : 
Answer : 질문에서 Cypher 쿼리를 생성하지 못했습니다.


User Question : What are the sphenoidal air sinuses, and where are they located within the sphenoid bone?
Extracted Entities : [{'name': 'sphenoidal air sinuses', 'type': 'Organ'}, {'name': 'sphenoid bone', 'type': 'Bone'}]
Extracted Relations : [{'head': 'sphenoidal air sinuses', 'relati



Answer : 지식 그래프에서 관련된 정보를 찾을 수 없습니다.


User Question : What is the function of the plantar calcaneonavicular ligament, and what condition results if it yields?
Extracted Entities : [{'name': 'plantar calcaneonavicular ligament', 'type': 'Ligament'}, {'name': 'function', 'type': 'Function'}, {'name': 'condition', 'type': 'Disease'}]
Extracted Relations : [{'head': 'plantar calcaneonavicular ligament', 'relation': 'has', 'tail': 'function'}, {'head': 'plantar calcaneonavicular ligament', 'relation': 'causes', 'tail': 'condition'}]
Generated Cypher Query : 
Answer : 질문에서 Cypher 쿼리를 생성하지 못했습니다.


User Question : How are the navicular bone and the three cuneiform bones connected, and what type of movement do they permit?
Extracted Entities : [{'name': 'navicular bone', 'type': 'Bone'}, {'name': 'three cuneiform bones', 'type': 'Bone'}, {'name': 'movement', 'type': 'Function'}]
Extracted Relations : [{'head': 'navicular bone', 'relation': 'connected_to', 'tail': 'three cuneiform bones'}, {'h



Answer : 지식 그래프에서 관련된 정보를 찾을 수 없습니다.


User Question : How does the nervous system serve as an indicator for the origin and migration paths of developing muscles, despite not influencing muscle differentiation?
Extracted Entities : [{'name': 'nervous system', 'type': 'System'}, {'name': 'origin', 'type': 'Concept'}, {'name': 'migration paths', 'type': 'Concept'}, {'name': 'developing muscles', 'type': 'Organ'}, {'name': 'muscle differentiation', 'type': 'Function'}]
Extracted Relations : [{'head': 'nervous system', 'relation': 'serves_as', 'tail': 'indicator'}, {'head': 'nervous system', 'relation': 'indicates', 'tail': 'origin'}, {'head': 'nervous system', 'relation': 'indicates', 'tail': 'migration paths'}, {'head': 'developing muscles', 'relation': 'has', 'tail': 'origin'}, {'head': 'developing muscles', 'relation': 'has', 'tail': 'migration paths'}, {'head': 'nervous system', 'relation': 'does_not_influence', 'tail': 'muscle differentiation'}]
Generated Cypher Query : MATCH (n:Enti



Answer : 지식 그래프에서 관련된 정보를 찾을 수 없습니다.


User Question : Describe the structural components of striped or voluntary muscle, from bundles to individual fibers.
Extracted Entities : [{'name': 'structural components', 'type': 'Anatomy'}, {'name': 'striped muscle', 'type': 'Tissue'}, {'name': 'voluntary muscle', 'type': 'Tissue'}, {'name': 'bundles', 'type': 'Anatomy'}, {'name': 'individual fibers', 'type': 'Anatomy'}]
Extracted Relations : [{'head': 'structural components', 'relation': 'part_of', 'tail': 'striped muscle'}, {'head': 'structural components', 'relation': 'part_of', 'tail': 'voluntary muscle'}, {'head': 'bundles', 'relation': 'part_of', 'tail': 'striped muscle'}, {'head': 'bundles', 'relation': 'part_of', 'tail': 'voluntary muscle'}, {'head': 'individual fibers', 'relation': 'part_of', 'tail': 'bundles'}]
Generated Cypher Query : MATCH (e1:Entity {name: 'structural components'})-[r1]->(e2:Entity {name: 'striped muscle'}),
Cypher 쿼리 실행 중 오류 발생: {code: Neo.ClientError.Statement.S



Answer : 지식 그래프에서 관련된 정보를 찾을 수 없습니다.


User Question : Where does the Extensor digitorum longus muscle originate, and what structures are located between it and the Tibialis anterior?
Extracted Entities : [{'name': 'Extensor digitorum longus muscle', 'type': 'Muscle'}, {'name': 'structures', 'type': 'Anatomical_Structure'}, {'name': 'Tibialis anterior', 'type': 'Muscle'}]
Extracted Relations : [{'head': 'Extensor digitorum longus muscle', 'relation': 'originates_from', 'tail': 'origin'}, {'head': 'structures', 'relation': 'located_between', 'tail': 'Extensor digitorum longus muscle'}, {'head': 'structures', 'relation': 'located_between', 'tail': 'Tibialis anterior'}]
Generated Cypher Query : 
Answer : 질문에서 Cypher 쿼리를 생성하지 못했습니다.


User Question : What is the Peronæus tertius, and where is it inserted?
Extracted Entities : [{'name': 'Peronæus tertius', 'type': 'Anatomical Structure'}, {'name': 'insertion point', 'type': 'Location'}]
Extracted Relations : [{'head': 'Peronæus tertius', 'r



Generated Cypher Query : MATCH (n:Entity {name: 'external coat'})-[:is_a]->(m:Entity {name: 'tunica adventitia'}), (t:Entity {name: 'tunica adventitia'})-[:part_of]->(a:Entity {name: 'arteries'}), (p:Paragraph)-[:CONTAINS_ENTITY]->(n), (q:Paragraph)-[:CONTAINS_ENTITY]->(t), (r:Paragraph)-[:CONTAINS_ENTITY]->(a) RETURN n.name, t.name, a.name, p.text, q.text, r.text LIMIT 5
Answer : 지식 그래프에서 관련된 정보를 찾을 수 없습니다.


User Question : How do the Vitelline Veins develop into parts of the portal and hepatic veins?
Neo4j AuraDB database connection closed.


KeyboardInterrupt: 

: 

In [7]:
# import json
# import os

# def extract_unique_types_and_relations(base_path='./data/extracted_results'):
#     unique_entity_types = set()
#     unique_relation_types = set()

#     json_files = [
#         "1_Embryology.json",
#         "2_Osteology.json",
#         "3_Syndesmology.json",
#         "4_Myology.json",
#         "5_Angiology.json"
#     ]

#     for filename in json_files:
#         file_path = os.path.join(base_path, filename)
        
#         if not os.path.exists(file_path):
#             print(f"Warning: File not found at {file_path}. Skipping.")
#             continue

#         try:
#             with open(file_path, 'r', encoding='utf-8') as f:
#                 data = json.load(f)

#                 for paragraph_data in data.get("paragraphs_data", []):
#                     for entity in paragraph_data.get("entities", []):
#                         if "type" in entity and entity["type"]:
#                             unique_entity_types.add(entity["type"])
                    
#                     for relation in paragraph_data.get("relations", []):
#                         if "relation" in relation and relation["relation"]:
#                             unique_relation_types.add(relation["relation"])
                
#         except json.JSONDecodeError as e:
#             print(f"Error decoding JSON from {file_path}: {e}")
#         except Exception as e:
#             print(f"An unexpected error occurred while processing {file_path}: {e}")

#     return sorted(list(unique_entity_types)), sorted(list(unique_relation_types))

# if __name__ == "__main__":
#     entity_types, relation_types = extract_unique_types_and_relations()

#     print(f"unique_entity_types = {entity_types}")
#     print(f"unique_relation_types = {relation_types}")