In [15]:
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents.stuff import create_stuff_documents_chain
import os
from langchain_groq import ChatGroq
from langchain.prompts import MessagesPlaceholder
from dotenv import load_dotenv
from langchain_core.output_parsers.string import StrOutputParser

# Load environment variables
load_dotenv()
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'

# Initialize the model
model = ChatGroq(
    model="llama-3.1-8b-instant",
    api_key=os.getenv("GROQ_API_KEY"),
    streaming=False
)

# Define the prompt template
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant developed by Mitesh. Use the provided context to answer accurately."),
        ("human", "Context: {context}"),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

# Correctly instantiate the output parser
output_parser = StrOutputParser()

# Create the chain with the LLM, prompt, and output parser
chain = create_stuff_documents_chain(
    llm=model, 
    prompt=prompt, 
    output_parser=output_parser
)

# Example documents
docs = [
    Document(page_content="Jesse loves red but not yellow"),
    Document(page_content="Jamal loves green but not as much as he loves orange")
]

# Query input
query = "What is the document about?"

# Generate the response
response = chain.invoke({"context": docs, "messages": [{"role": "human", "content": query}]})

# Display the response
print(response)  # Expected to output a clean text answer


It appears that the document is about the preferences of two individuals, Jesse and Jamal, when it comes to certain colors.


In [16]:
loader = PyMuPDFLoader("Research_paper.pdf")
documents = loader.load()

# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

# Embeddings and FAISS Vector Store
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = FAISS.from_documents(docs, embeddings)
retriever = db.as_retriever()


In [18]:
query = "What is the document about?"
context = retriever.get_relevant_documents(query)

# Invoke the chain with properly formatted input
response = chain.invoke({
    "context": context, 
    "messages": [{"role": "human", "content": query}]
})

# Display the response
print(response)


<think> The document appears to be discussing the training process of a model called DeepSeek-R1-Zero. It mentions designing a template to guide the model to produce a reasoning process followed by a final answer. The goal is to avoid content-specific biases and focus on a structural format. This suggests that the document is about improving the training of language models, specifically in the context of generating answers to questions while providing reasoning processes. </think>
<answer> The document is about improving the training of language models, specifically for generating answers to questions while providing reasoning processes. </answer>
