In [13]:
import os
from dotenv import load_dotenv
load_dotenv()

api_key_llm = os.getenv("NVIDIA_LLM_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [14]:
from langchain_chroma import Chroma
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from langchain_openai import OpenAIEmbeddings

In [15]:
def create_retriever():
    try:
        
        base_dir = os.path.dirname(os.getcwd())
        chroma_dir = os.path.join(base_dir, "data", "chromadb")
        
        if not os.path.exists(chroma_dir):
            raise Exception("Indexes not found")
        
        embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")
        
        vectorstoredb = Chroma(
            collection_name="chroma_indexes",
            embedding_function=embedding_model,
            persist_directory=chroma_dir
        )
        
        retriever = vectorstoredb.as_retriever(search_type="similarity", search_kwargs={"k": 3})
        
        return retriever
    except Exception as e:
        print(f"Error Creating the retriever! {e}")

In [16]:
retriever = create_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001EEB908C8B0>, search_kwargs={'k': 3})

In [17]:
# from langchain_nvidia_ai_endpoints import ChatNVIDIA
# llm = ChatNVIDIA(
#     model="meta/llama-3.1-70b-instruct",
#     api_key=api_key_llm,
#     temperature=0.2,
#     top_p=0.7,
#     max_tokens=5000,
# )
# llm

In [18]:
from langchain_groq import ChatGroq
llm = ChatGroq(
    model="Llama-3.3-70b-Specdec",
    temperature=0.7,
    max_tokens=None,
    max_retries=2,
    timeout=None
)
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001EEB908CF40>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001EEB908E680>, model_name='Llama-3.3-70b-Specdec', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [27]:
chat_history = []

In [20]:
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

contextualize_q_system_prompt = """
    Given a chat history and the latest user question
    which might refer context in the chat history,
    formulate a standalone question which can be understood
    without the chat history. Do NOT answer the question,
    just reformulate it if needed otherwise return it as it is.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ('system', contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ('user', '{input}')
    ]
)

history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
history_aware_retriever

RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
| VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001EEB908C8B0>, search_kwargs={'k': 3}))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessa

In [21]:
system_prompt = """
    You are a powerful assistant for question-answering tasks.
    Use the following pieces of retrieved context to answer
    the question in full detail. If you don't know the answer, say that you
    don't know, but do not ask the user for more information. Use only the content 
    from the uploaded documents to answer this question. Provide a detailed
    explanation and a comprehensive response unless explicitly stated otherwise.
    \n\n
    {context}
"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ('system', system_prompt),
        MessagesPlaceholder("chat_history"),
        ('user', '{input}')
    ]
)

from langchain.chains.combine_documents import create_stuff_documents_chain
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001EEB908C8B0>, search_kwargs={'k': 3}))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.A

In [28]:
user_question = "What is sqa?"

response = rag_chain.invoke({
    "input": user_question,
    "chat_history": chat_history
})

response

Number of requested results 3 is greater than number of elements in index 1, updating n_results = 1


{'input': 'What is sqa?',
 'chat_history': [],
 'context': [Document(id='65689b25-2a99-4ab0-a2f4-aa4671da33d0', metadata={'page': 0, 'source': 'C:\\Users\\Paras Dhoon\\Desktop\\Placement 2025\\RAG Document Q&A\\data\\raw\\SQA Vs Software Testing.pdf'}, page_content='DIFFERENCE BETWEEN SOFTWARE  \nQUALITY ASSURANCE  \n&  \nSOFTWARE TESTING\nDr Lokesh Sharma')],
 'answer': 'Software Quality Assurance (SQA) refers to the systematic process of evaluating and improving the quality of software products and processes. It involves a set of activities, methods, and techniques aimed at ensuring that software development and maintenance processes are carried out in a way that meets the required standards, regulations, and customer expectations.\n\nThe primary goal of SQA is to provide confidence that the software product or system meets the specified requirements, is reliable, stable, and performs as expected. SQA involves a proactive approach to identifying and mitigating potential quality risks

In [23]:
llm.invoke("2+1")

AIMessage(content='2 + 1 = 3', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 37, 'total_tokens': 45, 'completion_time': 0.002648286, 'prompt_time': 0.004367593, 'queue_time': 0.053328486999999994, 'total_time': 0.007015879}, 'model_name': 'Llama-3.3-70b-Specdec', 'system_fingerprint': 'fp_74379b522c', 'finish_reason': 'stop', 'logprobs': None}, id='run-d499017b-6c7a-4a69-aae5-ff902c957aef-0', usage_metadata={'input_tokens': 37, 'output_tokens': 8, 'total_tokens': 45})

In [24]:
retriever.invoke("Gradient Ascent")

Number of requested results 3 is greater than number of elements in index 1, updating n_results = 1


[Document(id='65689b25-2a99-4ab0-a2f4-aa4671da33d0', metadata={'page': 0, 'source': 'C:\\Users\\Paras Dhoon\\Desktop\\Placement 2025\\RAG Document Q&A\\data\\raw\\SQA Vs Software Testing.pdf'}, page_content='DIFFERENCE BETWEEN SOFTWARE  \nQUALITY ASSURANCE  \n&  \nSOFTWARE TESTING\nDr Lokesh Sharma')]

In [2]:
import multiprocessing
print("Max concurrency:", multiprocessing.cpu_count())  # Number of CPU cores

Max concurrency: 20
