In [1]:
import os

In [2]:
os.chdir("../")
%pwd

'f:\\ProjectAI\\ChatSystem'

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class RetrievalConfig:
    top_k: int 
    model_name: str
    embeddings_file: str

In [4]:
from ChatBoxSystem.constants import *
from ChatBoxSystem.utils.helper import read_yaml, create_directories

In [5]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath: Path = CONFIG_FILE_PATH,
        params_filepath: Path = PARAMS_FILE_PATH,
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_retrieval_config(self) -> RetrievalConfig:
        retrieval_config = self.config.retrieval
        retrieval_config = RetrievalConfig(
            top_k=retrieval_config.top_k,
            model_name=self.config.embeddings.model_name,
            embeddings_file=self.config.embeddings.embeddings_file
        )
        return retrieval_config

In [6]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from sklearn.preprocessing import normalize

  from .autonotebook import tqdm as notebook_tqdm


[2025-10-01 23:00:12,003: INFO: loader]: Loading faiss with AVX512 support.
[2025-10-01 23:00:12,003: INFO: loader]: Could not load library with AVX512 support due to:
ModuleNotFoundError("No module named 'faiss.swigfaiss_avx512'")
[2025-10-01 23:00:12,019: INFO: loader]: Loading faiss with AVX2 support.
[2025-10-01 23:00:12,139: INFO: loader]: Successfully loaded faiss with AVX2 support.


In [7]:
class Retrieval:
    def __init__(self, config: RetrievalConfig):
        self.config = config
        self.model = SentenceTransformer(self.config.model_name)
        self.index = faiss.read_index(self.config.embeddings_file)
        embeddings_file = Path(self.config.embeddings_file)
        self.texts = np.load(embeddings_file.with_suffix('.npy'), allow_pickle=True)

    def search(self, query: str):
        # Implement search logic here
        query_emb = self.model.encode([query], convert_to_numpy=True)
        query_emb = normalize(query_emb)
        distances, indices = self.index.search(query_emb.astype("float32"), self.config.top_k)
        results = [self.texts[i] for i in indices[0]]
        return results, distances

In [8]:
from ChatBoxSystem import logger

In [10]:
query = "Which country do you live in?"

try:
    config = ConfigurationManager()
    retrieval_config = config.get_retrieval_config()

    retrieval = Retrieval(config=retrieval_config)
    results, distances = retrieval.search(query)

    for i, (res, dist) in enumerate(zip(results, distances[0])):
        print(f"Result {i+1}: {res} (Distance: {dist})")
    logger.info("Retrieval completed successfully.")
except Exception as e:
    logger.exception(f"Error occurred: {e}")
    raise

[2025-10-01 23:00:48,685: INFO: helper]: YAML file config\config.yaml loaded successfully.
[2025-10-01 23:00:48,685: INFO: helper]: YAML file params.yaml loaded successfully.
[2025-10-01 23:00:48,685: INFO: helper]: Directory created at: artifacts
[2025-10-01 23:00:48,690: INFO: SentenceTransformer]: Use pytorch device_name: cpu
[2025-10-01 23:00:48,690: INFO: SentenceTransformer]: Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Batches: 100%|██████████| 1/1 [00:00<00:00, 64.72it/s]

Result 1: 2.  My country of origin is Vietnam. (Distance: 0.9120471477508545)
Result 2: 4.  Vietnam is my home country. (Distance: 0.9571073651313782)
Result 3: 1.  I reside with my relatives. (Distance: 0.9991469383239746)
[2025-10-01 23:00:52,310: INFO: 2634134274]: Retrieval completed successfully.



