In [33]:
import os
from dotenv import load_dotenv

load_dotenv()

gemini_key = os.getenv("GEMINI_API_KEY")
if not gemini_key:
    raise ValueError("GEMINI_API_KEY not found in .env")

os.environ["GOOGLE_API_KEY"] = gemini_key
print("Loaded Gemini Key:", gemini_key[:10], "...")

Loaded Gemini Key: AIzaSyA-Mr ...


In [34]:
import google.generativeai as genai
genai.configure(api_key=gemini_key)

In [35]:
import pandas as pd
from sqlalchemy import create_engine, text
from langchain.vectorstores import FAISS
from langchain.embeddings.base import Embeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.chat_models.base import BaseChatModel

ModuleNotFoundError: No module named 'langchain.chains.retrieval_qa.base'

In [None]:
DB_URL = "postgresql+psycopg2://postgres:abc123@localhost:5432/smart_tourism"
engine = create_engine(DB_URL)
with engine.connect() as conn:
    print("Database connection test:", conn.execute(text("SELECT 1")).fetchall())


In [None]:
df_places = pd.read_sql("""
SELECT
    p.id,
    p.name,
    p.type,
    p.province,
    p.description,
    p.best_time_to_visit,
    ARRAY(SELECT activity FROM tourism_activities WHERE tourism_id = p.id) AS activities,
    ARRAY(SELECT tag FROM tourism_tags WHERE tourism_id = p.id) AS tags
FROM tourism_places AS p;
""", engine)

df_foods = pd.read_sql("SELECT * FROM foods;", engine)
df_hotels = pd.read_sql("SELECT * FROM hotels;", engine)

In [None]:
def place_to_text(row):
    return f"""
Địa điểm: {row['name']}
Loại hình: {row['type']}
Tỉnh: {row['province']}
Mô tả: {row['description']}
Hoạt động nổi bật: {', '.join(row['activities'] or [])}
Thời gian lý tưởng: {row['best_time_to_visit']}
Tags: {', '.join(row['tags'] or [])}
"""

places_docs = [place_to_text(r) for _, r in df_places.iterrows()]
foods_docs = [f"Đặc sản tỉnh {r['province']}: {r['name']} — {r['description']}" for _, r in df_foods.iterrows()]
hotels_docs = [f"Khách sạn: {r['name']} tại {r['parent_geo']} (lat={r['latitude']}, long={r['longitude']})" for _, r in df_hotels.iterrows()]

documents = places_docs + foods_docs + hotels_docs

# Split text thành chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = splitter.split_text("\n\n".join(documents))
print("Total chunks:", len(chunks))

# Tạo Gemini Embeddings
class GeminiEmbeddings(Embeddings):
    def __init__(self, model: str = "text-embedding-004"):
        self.model = model

    def embed_documents(self, texts):
        return [genai.embed_content(model=self.model, content=t)["embedding"] for t in texts]

    def embed_query(self, text):
        return genai.embed_content(model=self.model, content=text)["embedding"]

embedding = GeminiEmbeddings(model="text-embedding-004")

# Tạo FAISS vectorstore
vectorstore = FAISS.from_texts(chunks, embedding)
vectorstore.save_local("smart_tourism_faiss")
vectorstore = FAISS.load_local("smart_tourism_faiss", embeddings=embedding, allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [None]:
class GeminiChatModel(BaseChatModel):
    def __init__(self, model_name="gemini-flash-2.0", temperature=0.2):
        self.model_name = model_name
        self.temperature = temperature

    def _call(self, messages, stop=None):
        prompt = "\n".join([m.content for m in messages])
        response = genai.chat(model=self.model_name, messages=[{"role":"user","content":prompt}])
        return response.last

    @property
    def _llm_type(self):
        return "gemini-chat"

llm = GeminiChatModel(model_name="gemini-flash-2.0", temperature=0.2)

# Tạo RAG chain
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    return_source_documents=True
)

In [None]:
query = "Tôi muốn đi An Giang thì có điểm du lịch nào đẹp?"
result = rag_chain(query)

print("Câu trả lời:")
print(result["result"])

print("\nNguồn được trích dẫn:")
for doc in result["source_documents"]:
    print("-"*60)
    print(doc.page_content[:400])