In [2]:
import os
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.document_loaders import PyPDFDirectoryLoader
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings


In [4]:
from sentence_transformers import CrossEncoder

cross_encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [3]:
import sys
import logging

In [69]:
# Set up structured logging
logger = logging.getLogger("rag_logger")
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter(
    '{"timestamp": "%(asctime)s", "level": "%(levelname)s", "message": %(message)s}'
)
handler.setFormatter(formatter)
logger.addHandler(handler)

In [49]:
from langchain_community.vectorstores import FAISS

In [50]:
from langchain.chains import create_retrieval_chain

In [66]:

load_dotenv()
## load the GROQ API Key
os.environ['GROQ_API_KEY']=os.getenv("GROQ_API_KEY")
groq_api_key=os.getenv("GROQ_API_KEY")

## If you do not have open AI key use the below Huggingface embedding
os.environ['HF_TOKEN']=os.getenv("HF_TOKEN")


In [63]:
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

In [6]:
embeddings=HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm


In [20]:
import chromadb
print(chromadb.__version__)


1.0.15


In [53]:
loader=PyPDFDirectoryLoader("research_papers") ## Data Ingestion step
docs=loader.load() ## Document Loading
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever()

In [54]:
llm=ChatGroq(groq_api_key=groq_api_key,model_name="llama-3.1-8b-instant")

In [None]:
# llm=ChatGroq(groq_api_key=groq_api_key,model_name="Mistral 7B")

# prompt=ChatPromptTemplate.from_template(
#     """
#     Answer the questions based on the provided context only.
#     Please provide the most accurate respone based on the question
#     <context>
#     {context}
#     <context>
#     Question:{input}

#     """

# )

In [55]:
prompt_template="""
    Answer the questions based on the provided context only.
    Please provide the most accurate respone based on the question

    <context>
    {context}
    <context>

    Question:{input}
    """

In [56]:
prompt = ChatPromptTemplate.from_template(prompt_template)

In [57]:
document_chain=create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\n    Answer the questions based on the provided context only.\n    Please provide the most accurate respone based on the question\n\n    <context>\n    {context}\n    <context>\n\n    Question:{input}\n    '), additional_kwargs={})])
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001DBC1013390>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001DBC10139D0>, model_name='llama-3.1-8b-instant', model_kwargs={}, groq_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_docum

In [58]:
retrieval_chain=create_retrieval_chain(retriever,document_chain)
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001DBC1012C10>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\n    Answer the questions based on the provided context only.\n    Please provide the most accurate respone based on the ques

In [67]:
response=retrieval_chain.invoke({"input":"what are transfomers"})
response

{'input': 'what are transfomers',
 'context': [Document(id='3b647930-625c-4e92-9d38-efe707957ae0', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'research_papers\\Attention.pdf', 'total_pages': 15, 'page': 7, 'page_label': '8'}, page_content='inference to input length + 50, but terminate early when possible [38].\nTable 2 summarizes our results and compares our translation quality and training costs to other model\narchitectures from the literature. We estimate the number of floating point operations used to train a\nmodel by multiplying the training time, the number of GPUs used, and an estimate of the sustained\nsingle-precision floating-point capacity of each GPU 5.\n6.2 Model 

In [27]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Any, Dict

In [28]:
app = FastAPI()

# ------------------------------
# Request & Response Models
# ------------------------------

class QueryRequest(BaseModel):
    query: str

class ContextDoc(BaseModel):
    id: str
    score: float
    metadata: Dict[str, Any]
    summary: str

class QueryResponse(BaseModel):
    query: str
    answer: str
    embedding_model: str
    prompt_used: str
    context_documents: List[ContextDoc]

# ------------------------------
# Define endpoint
# ------------------------------

@app.post("/query", response_model=QueryResponse)
def query_endpoint(request: QueryRequest):

    try:
        # Run your retrieval chain
        rag_response = retrieval_chain.invoke({"input": request.query})
        answer_text = rag_response.get("answer", "")

        # Retrieve similarity scores
        retriever_results = retriever.vectorstore.similarity_search_with_score(
            request.query,
            k=4
        )

        context_documents = []
        for doc, score in retriever_results:
            context_documents.append(
                ContextDoc(
                    id=doc.metadata.get("id", ""),
                    score=float(score),
                    metadata=doc.metadata,
                    summary=doc.page_content[:300] + "..."  # simple snippet as summary
                )
            )

        return QueryResponse(
            query=request.query,
            answer=answer_text,
            embedding_model="BAAI/bge-base-en-v1.5",
            prompt_used=prompt_template.strip(),
            context_documents=context_documents
        )

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))