In [1]:
import os

In [2]:
os.chdir("../")
%pwd

'f:\\ProjectAI\\ChatSystem'

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY")

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class RetrievalConfig:
    top_k: int 
    model_name: str
    embeddings_file: str

In [6]:
from ChatBoxSystem.constants import *
from ChatBoxSystem.utils.helper import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath: Path = CONFIG_FILE_PATH,
        params_filepath: Path = PARAMS_FILE_PATH,
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_retrieval_config(self) -> RetrievalConfig:
        retrieval_config = self.config.retrieval
        retrieval_config = RetrievalConfig(
            top_k=retrieval_config.top_k,
            model_name=self.config.embeddings.model_name,
            embeddings_file=self.config.embeddings.embeddings_file
        )
        return retrieval_config

In [8]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from sklearn.preprocessing import normalize

  from .autonotebook import tqdm as notebook_tqdm


[2025-10-02 10:44:52,879: INFO: loader]: Loading faiss with AVX512 support.
[2025-10-02 10:44:52,879: INFO: loader]: Could not load library with AVX512 support due to:
ModuleNotFoundError("No module named 'faiss.swigfaiss_avx512'")
[2025-10-02 10:44:52,879: INFO: loader]: Loading faiss with AVX2 support.
[2025-10-02 10:44:52,992: INFO: loader]: Successfully loaded faiss with AVX2 support.


In [9]:
class Retrieval:
    def __init__(self, config: RetrievalConfig):
        self.config = config
        self.model = SentenceTransformer(self.config.model_name)
        self.index = faiss.read_index(self.config.embeddings_file)
        embeddings_file = Path(self.config.embeddings_file)
        self.texts = np.load(embeddings_file.with_suffix('.npy'), allow_pickle=True)

    def search(self, query: str):
        # Implement search logic here
        query_emb = self.model.encode([query], convert_to_numpy=True)
        query_emb = normalize(query_emb)
        distances, indices = self.index.search(query_emb.astype("float32"), self.config.top_k)
        results = [self.texts[i] for i in indices[0]]
        return results, distances

In [10]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.indices.postprocessor import SimilarityPostprocessor
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [11]:
class Retrieval:
    def __init__(self, config: RetrievalConfig):
        self.config = config
        storage_context = StorageContext.from_defaults(persist_dir="./storage")
        self.index = load_index_from_storage(storage_context)

    def search(self, query: str):
        retrievel = VectorIndexRetriever(index=self.index, similarity_top_k=self.config.top_k)
        # top_nodes = retrievel.retrieve(query)
        print(type(self.index._embed_model))
        print(f"Top-{self.config.top_k} documents for query: '{query}'\n")
        # for i, node in enumerate(top_nodes, 1):
        #     print(f"{i}. {node.node.get_text()}\n")
        # query_engine = RetrieverQueryEngine(retriever=retrievel)
        # response = query_engine.query(query)
        # if hasattr(response, "source_nodes"):
        #     print(f"Top-{self.config.top_k} results:")
        #     for i, node in enumerate(response.source_nodes, 1):
        #         print(f"{i}. {node.node.get_text()}\n")

        return None

In [12]:
from ChatBoxSystem import logger

In [13]:
# query = "Do you like working in teams?"

# try:
#     config = ConfigurationManager()
#     retrieval_config = config.get_retrieval_config()

#     retrieval = Retrieval(config=retrieval_config)
#     results, distances = retrieval.search(query)

#     for i, (res, dist) in enumerate(zip(results, distances[0])):
#         print(f"Result {i+1}: {res} (Distance: {dist})")
#     logger.info("Retrieval completed successfully.")
# except Exception as e:
#     logger.exception(f"Error occurred: {e}")
#     raise

In [14]:
query = "Do you like working in teams?"
try:
    config = ConfigurationManager()
    retrieval_config = config.get_retrieval_config()

    retrieval = Retrieval(config=retrieval_config)
    response = retrieval.search(query)
    print(response)
    logger.info("Retrieval completed successfully.")
except Exception as e:
    logger.exception(f"Error occurred: {e}")
    raise

[2025-10-02 10:44:54,383: INFO: helper]: YAML file config\config.yaml loaded successfully.
[2025-10-02 10:44:54,391: INFO: helper]: YAML file params.yaml loaded successfully.
[2025-10-02 10:44:54,393: INFO: helper]: Directory created at: artifacts
Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage\docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage\index_store.json.
[2025-10-02 10:44:56,479: INFO: loading]: Loading all indices.
<class 'llama_index.embeddings.openai.base.OpenAIEmbedding'>
Top-3 documents for query: 'Do you like working in teams?'

None
[2025-10-02 10:44:57,064: INFO: 2128669561]: Retrieval completed successfully.
