In [1]:
import oracledb
from dotenv import load_dotenv
import os
from db_func import *
from llm import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv('.env')

oracledb.init_oracle_client(lib_dir=r"C:\instant_client\instantclient_21_19")
conn = oracledb.connect(
    user=os.getenv('user'),
    password=os.getenv('password'),
    dsn=os.getenv('ORACLE_DSN')
)
cur = conn.cursor()

In [3]:
korean_text = 'NE 사용 중 MAP < 65가 발생한 환자 ID를 중복 없이 반환해줘.'

text_en, medical_short_word, medical_real_term = translate(korean_text)

# None 방지
medical_short_word = medical_short_word or []
medical_real_term = medical_real_term or []

# 1️⃣ 약어가 있을 때만 정제
if len(medical_short_word) > 0:
    clean_medical_words = return_clean_medical_words(medical_short_word)
else:
    clean_medical_words = []

# 2️⃣ 매핑 대상 의료 단어 리스트 구성
all_medical_terms = clean_medical_words + medical_real_term

# 3️⃣ 의료 용어가 있을 때만 DB 매핑
if len(all_medical_terms) > 0:
    medical_word_doc, medical_word_meta = medical_word_db_mapping(all_medical_terms)
else:
    medical_word_doc = []
    medical_word_meta = []

# 4️⃣ 테이블 선택 (medical_word_doc가 빈 리스트여도 동작하도록)
selected_tables = return_table_list(text_en, medical_word_doc)

for_query_table_info = return_table_detail_info(selected_tables)
matched_columns = find_match_column(text_en, for_query_table_info)

sql_json = return_json_for_sqlglot(text_en, matched_columns, medical_word_meta)
sql = return_sql(sql_json)

bind_query, bind_dict = change_bind_query(sql)

Batches: 100%|██████████| 1/1 [00:00<00:00,  2.84it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 70.00it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 77.88it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 58.76it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 60.52it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 62.51it/s]


In [4]:
llm_natural_answer = generate_medical_sql_explanation_json(
    korean_text,
    sql_json,
    execution_summary=None
)

llm_natural_answer

'NE 사용 중 MAP 값이 65 미만인 환자 ID를 중복 없이 선택합니다. 조인을 통해 inputevents와 chartevents 테이블을 연결하고, WHERE 조건으로 MAP 값이 65 미만인 데이터를 필터링합니다.'

In [5]:
extract_tables(sql)

['inputevents']

In [6]:
sql

'SELECT DISTINCT inputevents.subject_id FROM inputevents WHERE chartevents.value_numeric < 65'

In [7]:
make_table_from_sql(conn, bind_query, bind_dict, limit=5)

DatabaseError: ORA-00904: "CHARTEVENTS"."VALUE_NUMERIC": invalid identifier

In [None]:
# import re

# def extract_used_medical_terms(sql_json, medical_word_meta):
#     used_terms = []

#     # WHERE + JOIN ON 전체 조건 문자열로 합치기
#     conditions = []

#     conditions.extend(sql_json.get("where", []))

#     for join in sql_json.get("join", []):
#         conditions.append(join.get("on", ""))

#     full_condition_text = " ".join(conditions)

#     for meta in medical_word_meta:
#         table = meta.get("table")
#         column = meta.get("column")
#         values = meta.get("values", "")
#         name = meta.get("name")

#         # values가 문자열 "[220052, 225312]" 형태라면 정리
#         value_list = re.findall(r"\d+", values)

#         for v in value_list:
#             pattern = rf"{table}\.{column}.*{v}"
#             if re.search(pattern, full_condition_text):
#                 used_terms.append(name)
#                 break
    
#     return used_terms

# extract_used_medical_terms(sql_json, medical_word_meta)

In [None]:
# print(f'추출된 의료 용어 : {medical_short_word + medical_real_term}')
# print(f'매핑 결과 : {extract_used_medical_terms(sql_json, medical_word_meta)}')
# print(f'선택된 테이블 : {selected_tables}')