In [3]:
import os

In [4]:
%pwd

'd:\\infosys\\notebook'

In [5]:
os.chdir("../")
%pwd

'd:\\infosys'

In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class faiss_data:
    index_path: Path
    metadata_path: Path
    top_k: int

@dataclass
class llmconfig:
    provider: str
    model: str
    temperature: float
    max_tokens: int

@dataclass
class neo4j_config:
    uri: str
    username: str
    password: str

@dataclass
class Ragpipelineconfig:
    input_json: Path
    faiss: faiss_data
    neo4j: neo4j_config
    llm: llmconfig

In [7]:
from src.knowledge_graph.utils.common import read_yaml
from src.knowledge_graph.constants import *

In [8]:
class ConfigurationManager:
    def __init__(self,config_path = CONFIG_FILE_PATH):
        self.config = read_yaml(config_path)
    
    def get_rag_pipeline_config(self)->Ragpipelineconfig:
        config = self.config.rag

        return Ragpipelineconfig(
            input_json = config.input_json,
            faiss = faiss_data(index_path = config.faiss.index_path,
                        metadata_path = config.faiss.metadata_path,
                        top_k = config.faiss.top_k),
            neo4j = neo4j_config(uri = config.neo4j.uri,
                        username = config.neo4j.username,
                        password = config.neo4j.password),
            llm = llmconfig(provider = config.llm.provider,
                        model = config.llm.model,
                        temperature = config.llm.temperature,
                        max_tokens = config.llm.max_tokens)  
        )

In [None]:
import json
import faiss
from langchain_core.retrievers import BaseRetriever
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from neo4j import GraphDatabase
from sentence_transformers import SentenceTransformer

from langchain_groq import ChatGroq

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from typing import List, Any
import spacy
from dotenv import load_dotenv
load_dotenv()
class HybridRetriever(BaseRetriever):
    """
    Same Retriever logic as before (Vectors + Graph)
    """
    vector_index: Any
    vector_metadata: List[dict]
    embedder: Any
    graph: Any
    nlp: Any
    top_k_vector: int = 5
    top_k_graph: int = 5

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        
        # 1. Vector Search
        query_vector = self.embedder.encode([query])
        distances, indices = self.vector_index.search(query_vector, self.top_k_vector)
        
        docs = []
        for idx in indices[0]:
            if idx < len(self.vector_metadata):
                meta = self.vector_metadata[idx]
                content = f"[Source: {meta.get('source_name', 'Unknown')}] {meta.get('text', '')}"
                docs.append(Document(
                    page_content=content,
                    metadata={"type": "vector", "source": meta.get('source_name')}
                ))

        # 2. Graph Search (Fixed for neo4j.Driver)
        spacy_doc = self.nlp(query)
        entities = [ent.text for ent in spacy_doc.ents]
        
        if entities:
            # Open a session properly using the driver
            try:
                with self.graph.session() as session:
                    for entity in entities:
                        # Fuzzy match entity names
                        cypher = """
                        MATCH (n:Entity)-[r]-(m:Entity)
                        WHERE toLower(n.name) CONTAINS toLower($name)
                        RETURN n.name, type(r) AS rel, m.name
                        LIMIT $limit
                        """
                        # Use session.run with parameters (safer than f-strings)
                        result = session.run(cypher, name=entity, limit=self.top_k_graph)
                        
                        for record in result:
                            fact = f"{record['n.name']} --[{record['rel']}]--> {record['m.name']}"
                            docs.append(Document(
                                page_content=fact,
                                metadata={"type": "graph", "entity": entity}
                            ))
            except Exception as e:
                print(f"Graph query error: {e}")
        
        return docs

def get_rag_chain():
    # 1. Load Config
    config = ConfigurationManager().get_rag_pipeline_config()
    
    # 2. Load Resources (Embeddings, FAISS, Graph)
    # Note: We still use SentenceTransformer for embeddings locally to match your FAISS index
    embedder = SentenceTransformer('all-MiniLM-L6-v2') 
    
    index = faiss.read_index(config.faiss.index_path)
    with open(config.faiss.metadata_path, "r") as f:
        metadata = json.load(f)
        
    graph = GraphDatabase.driver(
        config.neo4j.uri,
        auth=(config.neo4j.username, config.neo4j.password)
    )
    
    nlp = spacy.load("en_core_web_sm")

    # 3. Initialize Retriever
    retriever = HybridRetriever(
        vector_index=index,
        vector_metadata=metadata,
        embedder=embedder,
        graph=graph,
        nlp=nlp,
        top_k_vector= config.faiss.top_k,
        top_k_graph= 5
    )

    llm = ChatGroq(
        model=config.llm.model,
        groq_api_key=os.getenv("GROQ_API_KEY"),
        temperature=config.llm.temperature,
        max_tokens=config.llm.max_tokens
        )

    # 5. Prompt & Chain
    template = """You are a helpful assistant. You firstly greet the user with HI .Then, if you get any context
    use that to provide answer to the question with full confidence. If you do not have any context,provide an overview of the 
    question based on your general knowledge and end with "As an AI language model, I do not have the information in my Database".
    context given below  
    
    Context:
    {context}
    
    Question: {question}
    """
    prompt = ChatPromptTemplate.from_template(template)

    chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    
    return chain

In [12]:
# 1. Define your question
question = "What is Shopcart?"

# 2. Load and run the chain
chain = get_rag_chain()
response = chain.invoke(question)

# 3. Print the result
print(f"Answer: {response}")

[2026-01-31 01:06:06,225: INFO: common: YAML file: config\config.yaml loaded successfully]
[2026-01-31 01:06:06,333: INFO: SentenceTransformer: Use pytorch device_name: cpu]
[2026-01-31 01:06:06,335: INFO: SentenceTransformer: Load pretrained SentenceTransformer: all-MiniLM-L6-v2]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[2026-01-31 01:06:28,757: INFO: _client: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"]


Answer: HI and how can I help you today?

Based on the context you provided, it appears that ShopCart is a company that sends emails to its customers or teams. The emails are from different teams within ShopCart, such as Support, Orders, Finance, and Security. This suggests that ShopCart is an e-commerce platform or a company that provides online shopping services.

From the emails, it seems that ShopCart has different teams that handle various aspects of the business, including customer support, orders, finance, and security. This implies that ShopCart is a company that operates online and has a structured organizational setup.

Based on this information, I can confidently say that ShopCart is an e-commerce platform or a company that provides online shopping services.
