In [2]:
import os
from langchain_community.utilities import SQLDatabase
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage

import os
import glob

from dotenv import load_dotenv
load_dotenv()


BASE_DIR = "bird/dev_databases" 
SCHEMA_OUTPUT_DIR = "schemas"
DESCRIPTION_OUTPUT_FILE = "descriptions.txt"

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LLM_MODEL = "gpt-4o-mini"

# ===== INIT LLM =====
llm = ChatOpenAI(
    model=LLM_MODEL,
    api_key=OPENAI_API_KEY,
    temperature=0.3
)

os.makedirs(SCHEMA_OUTPUT_DIR, exist_ok=True)
all_descriptions = []

# ===== MAIN LOOP =====
for schema_dir in os.listdir(BASE_DIR):
    schema_path = os.path.join(BASE_DIR, schema_dir)
    if os.path.isdir(schema_path):
        sqlite_file = os.path.join(schema_path, f"{schema_dir}.sqlite")
        if not os.path.exists(sqlite_file):
            print(f"Not found {schema_dir}")
            continue

        # 1. Lấy schema từ SQLite
        db = SQLDatabase.from_uri(f"sqlite:///{sqlite_file}")
        schema_str = db.get_table_info()

        # 2. Lưu schema vào file
        schema_txt_file = os.path.join(SCHEMA_OUTPUT_DIR, f"{schema_dir}.txt")
        with open(schema_txt_file, "w", encoding="utf-8") as f:
            f.write(schema_str)

        # 3. Sinh mô tả từ LLM
        prompt = (
            f"The following is a database schema:\n\n"
            f"{schema_str}\n\n"
            "Please provide a concise 2-3 sentence description about what this database is about."
        )
        response = llm.predict(prompt)
        all_descriptions.append(f"### {schema_dir} ###\n{response.strip()}\n")

# 4. Lưu tất cả mô tả vào một file
with open(DESCRIPTION_OUTPUT_FILE, "w", encoding="utf-8") as f:
    f.write("\n\n".join(all_descriptions))

print("✅ Done. Schema TXT files and descriptions generated.")


  response = llm.predict(prompt)


✅ Done. Schema TXT files and descriptions generated.
