In [None]:
!pip install google-generativeai langchain langchain-google-genai faiss-cpu sqlalchemy psycopg2-binary pandas

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

gemini_key = os.getenv("GEMINI_API_KEY")

if not gemini_key:
    raise ValueError("GEMINI_API_KEY not found in .env")

os.environ["GOOGLE_API_KEY"] = gemini_key

print("Loaded Gemini Key:", gemini_key[:10], "...")

In [None]:
import google.generativeai as genai
from sqlalchemy import create_engine, text
import pandas as pd

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA

In [None]:
DB_URL = "postgresql+psycopg2://postgres:abc123@localhost:5432/smart_tourism"
engine = create_engine(DB_URL)

with engine.connect() as conn:
    print(conn.execute(text("SELECT 1")).fetchall())

In [None]:
query = """
SELECT
    p.id,
    p.name,
    p.type,
    p.province,
    p.description,
    p.best_time_to_visit,
    ARRAY(SELECT activity FROM tourism_activities WHERE tourism_id = p.id) AS activities,
    ARRAY(SELECT tag FROM tourism_tags WHERE tourism_id = p.id) AS tags
FROM tourism_places AS p;
"""

df_places = pd.read_sql(query, engine)
df_places.head()

In [None]:
df_foods = pd.read_sql("SELECT * FROM foods;", engine)
df_hotels = pd.read_sql("SELECT * FROM hotels;", engine)

df_foods.head(), df_hotels.head()

In [None]:
def place_to_text(row):
    return f"""
Địa điểm: {row['name']}
Loại hình: {row['type']}
Tỉnh: {row['province']}
Mô tả: {row['description']}
Hoạt động nổi bật: {', '.join(row['activities'] or [])}
Thời gian lý tưởng: {row['best_time_to_visit']}
Tags: {', '.join(row['tags'] or [])}
"""
places_docs = [place_to_text(r) for _, r in df_places.iterrows()]

In [None]:
foods_docs = [
    f"Đặc sản tỉnh {r['province']}: {r['name']} — {r['description']}"
    for _, r in df_foods.iterrows()
]

In [None]:
hotels_docs = [
    f"Khách sạn: {r['name']} tại {r['parent_geo']} (latitude={r['latitude']}, longitude={r['longitude']})"
    for _, r in df_hotels.iterrows()
]

In [None]:
documents = places_docs + foods_docs + hotels_docs
len(documents)

In [None]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100
)

chunks = splitter.split_text("\n\n".join(documents))
len(chunks)

In [None]:
embedding = GoogleGenerativeAIEmbeddings(
    model="text-embedding-004"
)

In [None]:
vectorstore = FAISS.from_texts(
    texts=chunks,
    embedding=embedding
)

vectorstore.save_local("smart_tourism_faiss")

In [None]:
vectorstore = FAISS.load_local(
    "smart_tourism_faiss",
    embeddings=embedding,
    allow_dangerous_deserialization=True
)

In [None]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5}
)

In [None]:
llm = ChatGoogleGenerativeAI(
    model="gemini-flash-2.0",
    temperature=0.2
)

In [None]:
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

In [None]:
query = "Tôi muốn đi An Giang thì có điểm du lịch nào đẹp?"
result = rag_chain(query)

print("Câu trả lời:")
print(result["result"])

print("\nNguồn được trích dẫn:")
for doc in result["source_documents"]:
    print("-" * 60)
    print(doc.page_content[:400])