In [None]:
from llama_index.core.program import LLMTextCompletionProgram
from llama_index.core.output_parsers import PydanticOutputParser
from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters
from pydantic import BaseModel, Field
from typing import List
from llama_index.llms.google_genai import GoogleGenAI
import logging
from llama_index.core.node_parser import (
    SemanticSplitterNodeParser,
    HierarchicalNodeParser
)
from llama_index.core import (
    Settings,
    VectorStoreIndex,
    StorageContext,
    SimpleDirectoryReader, load_index_from_storage,
)
from llama_index.core.node_parser import get_leaf_nodes, get_root_nodes
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core import StorageContext
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.embeddings.bedrock import BedrockEmbedding
from llama_index.llms.bedrock import Bedrock
from llama_index.core import Settings

from llama_index.core import Document
from llama_index.core.extractors import (
    SummaryExtractor,
    QuestionsAnsweredExtractor,
    TitleExtractor,
    KeywordExtractor,
    BaseExtractor,
)
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.question_gen import LLMQuestionGenerator
from llama_index.core.question_gen.prompts import (
    DEFAULT_SUB_QUESTION_PROMPT_TMPL,
)
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from qdrant_client import QdrantClient
from llama_index.vector_stores.qdrant import QdrantVectorStore

from llama_index.core.ingestion import IngestionPipeline
from dotenv import load_dotenv

import json
import os
import boto3
import docx, jaconv, re

In [30]:
load_dotenv()

llm = GoogleGenAI(
    model_name="gemini-2.0-flash",
    temperature=0.1,
)

embed_model = BedrockEmbedding(
    model_name="amazon.titan-embed-text-v2:0",
    region_name=os.getenv("AWS_REGION"),
)

Settings.llm = llm
Settings.embed_model = embed_model

INFO:httpx:HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash "HTTP/1.1 200 OK"
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


In [None]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Assume you have a function to get valid filters from your DB
def get_valid_filters_from_db():
    # In a real application, this would query your PostgreSQL database
    # SELECT DISTINCT "Category" FROM ...; SELECT DISTINCT "Keywords" FROM ...;
    return {
        "categories": ["Customer Value Analysis", "Loyalty & Retention", "Case Study", "Branding & Positioning"],
        "keywords": ["Customer Lifetime Value (CLV)", "Churn Rate Analysis", "Marketing ROI", "Decile Analysis"]
    }

# Define the Pydantic model for the structured output you want from the LLM
class QueryFilters(BaseModel):
    """Data model for query filters."""
    categories: List[str] = Field(description="List of categories to filter on.")
    keywords: List[str] = Field(description="List of keywords to filter on.")

# --- This function replaces the need for the complex prompt in _create_custom_auto_retriever ---
def generate_filters_from_query(query_str: str, llm):
    """
    Uses an LLM to extract relevant categories and keywords from a user query.
    """
    logger.debug(f"Generating filters for user query: {query_str}")
    
    # Get the latest valid filters from the database
    valid_filters = get_valid_filters_from_db()
    
    # Create a new, simpler prompt focused only on extracting filters
    prompt_template_str = f"""
    Based on the user's query, identify the most relevant categories and keywords to use for filtering documents.
    
    Respond ONLY with a JSON object.
    
    Available Categories: {valid_filters['categories']}
    Available Keywords: {valid_filters['keywords']}
    
    User Query: "{query_str}"
    
    JSON Output:
    """
    
    # Define a program to get structured output from the LLM
    program = LLMTextCompletionProgram.from_defaults(
        output_parser=PydanticOutputParser(output_cls=QueryFilters),
        prompt_template_str=prompt_template_str,
        llm=llm,
        verbose=True,
    )
    
    # Run the program to get the structured filter object
    filter_object = program()
    
    logger.info(f"LLM generated filters: Categories={filter_object.categories}, Keywords={filter_object.keywords}")
    return filter_object

def create_query_engine(self, persist_dir: str = "./storage_testing", similarity_top_k: int = 30):
    """
    Creates a Query Engine that first generates filters and then retrieves data.
    """
    logger.debug("Creating dynamic query engine...")
    
    if not self.index:
        self.build_automerging_index(persist_dir)

    # Let's imagine a user asks a question
    user_query = "How can I analyze Marketing ROI for customer retention campaigns?"

    # 1. Generate filters using the LLM
    # You would need to pass your LLM instance here
    generated_filters = generate_filters_from_query(user_query, self.llm) 

    # 2. Construct the metadata filters dynamically
    dynamic_filters = []
    if generated_filters.categories:
        cat_filters = [ExactMatchFilter(key="Category", value=c) for c in generated_filters.categories]
        dynamic_filters.extend(cat_filters)
    
    if generated_filters.keywords:
        kw_filters = [ExactMatchFilter(key="Keywords", value=k) for k in generated_filters.keywords]
        dynamic_filters.extend(kw_filters)

    # 3. Create the base retriever with the DYNAMIC filters
    base_retriever = self.index.as_retriever(
        similarity_top_k=similarity_top_k,
        filters=MetadataFilters(filters=dynamic_filters)
    )

    # 4. Create the AutoMerging Retriever as before
    retriever = AutoMergingRetriever(base_retriever, 
                                     self.storage_context,
                                     verbose=True)
    
    # 5. Create the final Query Engine
    query_engine = RetrieverQueryEngine.from_args(
        retriever=retriever,
        response_mode=ResponseMode.TREE_SUMMARIZE,
        callback_manager=CallbackManager([LlamaDebugHandler(print_trace_on_end=True)])
    )
    
    logger.debug("Dynamic query engine is ready!")


    return query_engine

In [16]:
a = generate_filters_from_query(query_str="Give me some case study on marketing ROI and decile analysis", llm=llm)

INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.
INFO:__main__:LLM generated filters: Categories=['Case Study'], Keywords=['Marketing ROI', 'Decile Analysis']


In [None]:
import os
import logging
from pathlib import Path
from dotenv import load_dotenv
from typing import List, Optional

from llama_index.core import (
    Settings,
    VectorStoreIndex,
    StorageContext,
    SimpleDirectoryReader, 
    load_index_from_storage,
    PromptTemplate
)
from llama_index.core.program import LLMTextCompletionProgram
from llama_index.core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from llama_index.core.node_parser import HierarchicalNodeParser, get_leaf_nodes
from llama_index.core.response_synthesizers import ResponseMode
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.callbacks import LlamaDebugHandler, CallbackManager
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.embeddings.bedrock import BedrockEmbedding
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters

# Assuming 'caller' module and 'MarketingDocs' class exist
# from caller import MarketingDocs 

import qdrant_client

# Load environment variables
load_dotenv()

# Thiết lập logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# --- Định nghĩa các model và hàm hỗ trợ ---

class QueryFilters(BaseModel):
    """Data model for query filters."""
    categories: List[str] = Field(description="List of categories to filter on.")
    keywords: List[str] = Field(description="List of keywords to filter on.")

def get_valid_filters_from_db():
    # Placeholder: Replace with actual DB query in a real application
    return {
        "categories": ["Customer Value Analysis", "Loyalty & Retention", "Case Study", "Branding & Positioning"],
        "keywords": ["Customer Lifetime Value (CLV)", "Churn Rate Analysis", "Marketing ROI", "Decile Analysis"]
    }

def generate_filters_for_query(query_str: str, llm: any) -> QueryFilters:
    """Uses an LLM to extract relevant categories and keywords from a user query."""
    logger.debug(f"Generating filters for query: {query_str}")
    valid_filters = get_valid_filters_from_db()
    
    prompt = PromptTemplate(
        "Based on the user's query, identify relevant filters from the available options.\n"
        "Respond ONLY with a JSON object. If no filters are relevant, return empty lists.\n\n"
        "Available Categories: {categories}\n"
        "Available Keywords: {keywords}\n\n"
        "User Query: \"{query_str}\""
    )
    
    program = LLMTextCompletionProgram.from_defaults(
        output_parser=PydanticOutputParser(output_cls=QueryFilters),
        prompt=prompt.partial_format(
            categories=valid_filters['categories'],
            keywords=valid_filters['keywords']
        ),
        llm=llm,
    )
    
    # Pass the query_str to the program
    filter_object = program(query_str=query_str)
    logger.info(f"LLM generated filters: {filter_object.dict()}")
    return filter_object

# --- Lớp Wrapper cho Dynamic Query Engine ---

class DynamicQueryWrapper:
    def __init__(self, index, storage_context, llm, similarity_top_k=30):
        self.index = index
        self.storage_context = storage_context
        self.llm = llm
        self.similarity_top_k = similarity_top_k
        self.callback_manager = CallbackManager([LlamaDebugHandler(print_trace_on_end=True)])
        logger.info("DynamicQueryWrapper initialized.")

    def query(self, query_str: str):
        """
        Executes the dynamic retrieval process for the given query.
        """
        logger.info(f"Executing dynamic query: '{query_str}'")
        
        # 1. Generate filters dynamically
        filters = generate_filters_for_query(query_str, self.llm)
        
        # 2. Construct metadata filters list
        metadata_filters_list = []
        if filters.categories:
            metadata_filters_list.extend([ExactMatchFilter(key="Category", value=c) for c in filters.categories])
        if filters.keywords:
            metadata_filters_list.extend([ExactMatchFilter(key="Keywords", value=k) for k in filters.keywords])
        
        # 3. Create the base retriever with DYNAMIC filters
        base_retriever = self.index.as_retriever(
            similarity_top_k=self.similarity_top_k,
            filters=MetadataFilters(filters=metadata_filters_list)
        )
        
        # 4. Wrap with AutoMergingRetriever
        retriever = AutoMergingRetriever(
            base_retriever, 
            self.storage_context,
            verbose=True
        )

        # 5. Create the query engine instance
        query_engine_instance = RetrieverQueryEngine.from_args(
            retriever=retriever,
            llm=self.llm,
            response_mode=ResponseMode.TREE_SUMMARIZE,
            callback_manager=self.callback_manager
        )

        # 6. Execute and return the query result
        return query_engine_instance.query(query_str)

# --- Lớp NodeStorageHandler ---

class NodeStorageHandler:
    def __init__(self, google_api_key: str = None, 
                 qdrant_url: str = None, qdrant_api_key: str = None, collection_name: str = "sailing"):
        # ... (Khởi tạo LLM, Embedding, và Qdrant Client như trong code của bạn) ...
        self.collection_name = collection_name
        
        if google_api_key:
            os.getenv["GOOGLE_API_KEY"] = google_api_key
        api_key = os.getenv("GOOGLE_API_KEY")
        if not api_key:
            logger.warning("Chưa có GOOGLE_API_KEY. Vui lòng thiết lập API key để sử dụng Google Gemini models.")

        self.llm = GoogleGenAI(
            model_name="gemini-2.0-flash",
            temperature=0.1,
        )
        self.embed_model = BedrockEmbedding(
            model_name="amazon.titan-embed-text-v2:0",
            aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
            aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
            region_name=os.getenv("AWS_REGION"),
        )

        Settings.llm = self.llm
        Settings.embed_model = self.embed_model

        qdrant_url = qdrant_url or os.getenv("QDRANT_URL")
        qdrant_api_key = qdrant_api_key or os.getenv("QDRANT_API_KEY")
        self.qdrant_client = self._setup_qdrant_client(qdrant_url, qdrant_api_key)
        
        self._check_connections()
        
        self.index = None
        self.storage_context = None

    def _setup_qdrant_client(self, qdrant_url: Optional[str] = None, qdrant_api_key: Optional[str] = None):
        # (Implementazione esistente)
        logger.info("Thiết lập Qdrant client...")
        if qdrant_url and qdrant_api_key:
            client = qdrant_client.QdrantClient(url=qdrant_url, api_key=qdrant_api_key, timeout=60)
            logger.info("Đã kết nối với Qdrant Cloud")
        else:
            client = qdrant_client.QdrantClient(path="./qdrant_storage")
            logger.info("Đã khởi tạo Qdrant local")
        return client

    def _check_connections(self):
        # (Implementazione esistente)
        logger.info("Kiểm tra kết nối...")
        try:
            collections = self.qdrant_client.get_collections()
            logger.info(f"✅ Qdrant connection OK - {len(collections.collections)} collections")
        except Exception as e:
            logger.error(f"❌ Qdrant connection failed: {e}")

    def build_automerging_index(self, nodes: List, persist_dir: str = "./storage", insert_batch_size: int = 20):
        # (Implementazione esistente)
        if not nodes and self.index is None:
            logger.info("Đang thử tải index hiện có...")
            vector_store = QdrantVectorStore(client=self.qdrant_client, collection_name=self.collection_name)
            try:
                self.storage_context = StorageContext.from_defaults(persist_dir=persist_dir, vector_store=vector_store)
                self.index = load_index_from_storage(self.storage_context)
                logger.info("Đã tải index thành công.")
            except Exception as e:
                logger.warning(f"Không thể tải index từ storage: {e}")
                self.index = None
                self.storage_context = StorageContext.from_defaults(vector_store=vector_store)
            return self.index

        # ... (Phần logic còn lại để thêm nodes và persist) ...

        logger.info("Xây dựng hoặc cập nhật index...")
        vector_store = QdrantVectorStore(client=self.qdrant_client, collection_name=self.collection_name)

        if not self.storage_context:
             if os.path.exists(persist_dir):
                self.storage_context = StorageContext.from_defaults(persist_dir=persist_dir, vector_store=vector_store)
             else:
                self.storage_context = StorageContext.from_defaults(vector_store=vector_store)
        
        if not self.index:
            try:
                self.index = load_index_from_storage(self.storage_context)
            except Exception:
                self.index = None
        
        self.storage_context.docstore.add_documents(nodes)
        leaf_nodes = get_leaf_nodes(nodes)

        if self.index is None:
            self.index = VectorStoreIndex(
                nodes=leaf_nodes, storage_context=self.storage_context, show_progress=True
            )
        else:
            self.index.insert_nodes(leaf_nodes, show_progress=True)
            
        self.storage_context.persist(persist_dir=persist_dir)
        logger.info("Đã xây dựng/cập nhật xong AutoMerging Index.")

    def create_query_engine(self, persist_dir: str = "./storage", similarity_top_k: int = 30):
        """
        Tạo và trả về đối tượng DynamicQueryWrapper, là query engine thực tế.
        """
        logger.info("Tạo query engine...")
        
        # Đảm bảo index và storage_context đã được load/tạo
        if not self.index:
            self.build_automerging_index([], persist_dir=persist_dir) # Pass empty list to load existing if it exists
        
        # Trả về một instance của DynamicQueryWrapper
        query_engine = DynamicQueryWrapper(
            index=self.index,
            storage_context=self.storage_context,
            llm=self.llm,
            similarity_top_k=similarity_top_k
        )
        
        logger.debug("Query engine đã sẵn sàng!")
        return query_engine

    def setup_complete_pipeline(self, persist_dir: str = "./storage", similarity_top_k: int = 30):
        """
        Thiết lập pipeline hoàn chỉnh từ nodes đến query engine
        """
        logger.info("Thiết lập complete pipeline...")
        
        all_nodes = [] 
        
        logger.info("Xây dựng AutoMerging Index...")
        self.build_automerging_index(all_nodes, persist_dir)
        
        logger.info("Tạo query engine...")
        query_engine = self.create_query_engine(persist_dir, similarity_top_k)
        
        logger.info("Complete pipeline đã sẵn sàng!")
        return query_engine

def main():
    """
    Hàm main để testing NodeStorageHandler với MarketingDocs
    """
    try:
        print("=== KHỞI TẠO NODE STORAGE HANDLER ===")
        # Thay đổi thông tin Qdrant nếu cần
        storage_handler = NodeStorageHandler(
            collection_name="sailing",
            # qdrant_url="https://<your-qdrant-url>.cloud.qdrant.io",
            # qdrant_api_key="<your-api-key>"
        )

        print("=== THIẾT LẬP COMPLETE PIPELINE ===")
        # query_engine bây giờ là một instance của DynamicQueryWrapper
        query_engine = storage_handler.setup_complete_pipeline()

        print("\n" + "="*50)
        print("=== CHẾ ĐỘ INTERACTIVE QUERY ===")
        print("Nhập câu hỏi của bạn (gõ 'quit' để thoát):")
        print("="*50)
        
        while True:
            user_query = input("\nCâu hỏi: ").strip()
            
            if user_query.lower() in ['quit', 'exit', 'q', 'thoát']:
                print("\nCảm ơn bạn đã sử dụng tui : D !!")
                break
                
            if not user_query:
                print("Vui lòng nhập câu hỏi!")
                continue
                
            try:
                print("\nĐang tìm kiếm...")
                # Lệnh gọi này sẽ hoạt động vì query_engine là đối tượng DynamicQueryWrapper
                response = query_engine.query(user_query) 
                print(f"\n✅ Trả lời:\n{response}")
                print("-" * 80)
                
            except Exception as e:
                logger.error(f"Lỗi khi xử lý câu hỏi: {e}", exc_info=True)
                print(f"Lỗi khi xử lý câu hỏi: {e}")

    except Exception as e:
        logger.error(f"Lỗi trong main: {e}", exc_info=True)
        print(f"Đã xảy ra lỗi nghiêm trọng: {e}")


if __name__ == "__main__":
    main()

=== KHỞI TẠO NODE STORAGE HANDLER ===


INFO:httpx:HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash "HTTP/1.1 200 OK"
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:__main__:Thiết lập Qdrant client...
INFO:httpx:HTTP Request: GET https://76cc3790-f85c-4c86-9b1e-6e9864e43843.us-east4-0.gcp.cloud.qdrant.io:6333 "HTTP/1.1 200 OK"
INFO:__main__:Đã kết nối với Qdrant Cloud
INFO:__main__:Kiểm tra kết nối...
INFO:httpx:HTTP Request: GET https://76cc3790-f85c-4c86-9b1e-6e9864e43843.us-east4-0.gcp.cloud.qdrant.io:6333/collections "HTTP/1.1 200 OK"
INFO:__main__:✅ Qdrant connection OK - 3 collections
INFO:__main__:Thiết lập complete pipeline...
INFO:__main__:Xây dựng AutoMerging Index...
INFO:__main__:Đang thử tải index hiện có...


=== THIẾT LẬP COMPLETE PIPELINE ===


INFO:httpx:HTTP Request: GET https://76cc3790-f85c-4c86-9b1e-6e9864e43843.us-east4-0.gcp.cloud.qdrant.io:6333/collections/sailing/exists "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://76cc3790-f85c-4c86-9b1e-6e9864e43843.us-east4-0.gcp.cloud.qdrant.io:6333/collections/sailing "HTTP/1.1 200 OK"
INFO:llama_index.core.indices.loading:Loading all indices.
INFO:__main__:Đã tải index thành công.
INFO:__main__:Tạo query engine...
INFO:__main__:Tạo query engine...
INFO:__main__:DynamicQueryWrapper initialized.
INFO:__main__:Complete pipeline đã sẵn sàng!


Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage\docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage\index_store.json.

=== CHẾ ĐỘ INTERACTIVE QUERY ===
Nhập câu hỏi của bạn (gõ 'quit' để thoát):


INFO:__main__:Executing dynamic query: 'Tell me something about decile analysis'
INFO:google_genai.models:AFC is enabled with max remote calls: 10.



Đang tìm kiếm...


INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.
C:\Users\Admin\AppData\Local\Temp\ipykernel_6632\3121455676.py:78: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  logger.info(f"LLM generated filters: {filter_object.dict()}")
INFO:__main__:LLM generated filters: {'categories': [], 'keywords': ['Decile Analysis']}
INFO:httpx:HTTP Request: POST https://76cc3790-f85c-4c86-9b1e-6e9864e43843.us-east4-0.gcp.cloud.qdrant.io:6333/collections/sailing/points/search "HTTP/1.1 400 Bad Request"


**********
Trace: query
    |_CBEventType.QUERY -> 2.744161 seconds
**********


ERROR:__main__:Lỗi khi xử lý câu hỏi: Unexpected Response: 400 (Bad Request)
Raw response content:
b'{"status":{"error":"Bad request: Index required but not found for \\"Keywords\\" of one of the following types: [keyword]. Help: Create an index for this key or use a different filter."},"time":3.639 ...'
Traceback (most recent call last):
  File "C:\Users\Admin\AppData\Local\Temp\ipykernel_6632\3121455676.py", line 315, in main
    response = query_engine.query(user_query)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Admin\AppData\Local\Temp\ipykernel_6632\3121455676.py", line 130, in query
    return query_engine_instance.query(query_str)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Admin\Desktop\Advanced-RAG\env\Lib\site-packages\llama_index_instrumentation\dispatcher.py", line 317, in wrapper
    result = func(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Admin\Desktop\Advanced-RAG\env\Lib\site-packages\llama_index\core\b

Lỗi khi xử lý câu hỏi: Unexpected Response: 400 (Bad Request)
Raw response content:
b'{"status":{"error":"Bad request: Index required but not found for \\"Keywords\\" of one of the following types: [keyword]. Help: Create an index for this key or use a different filter."},"time":3.639 ...'


KeyboardInterrupt: Interrupted by user