In [64]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import ollama

In [65]:
# 1. Load documents
documents = TextLoader("../data/manual.txt").load()


In [66]:
# 2. Split into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
chunks = text_splitter.split_documents(documents)


In [67]:
# 3. Create embeddings and vector store
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
vectorstore = FAISS.from_documents(chunks, embeddings)
print(vectorstore)


<langchain_community.vectorstores.faiss.FAISS object at 0x37d879090>


In [68]:
# 4. Create a retriever
retriever = vectorstore.as_retriever()
print(retriever)

tags=['FAISS', 'HuggingFaceEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x37d879090> search_kwargs={}


In [69]:
def query_document1(question,temp=0.5):
    # Get relevant chunks
    docs = vectorstore.similarity_search(question, k=3)
    context = "\n\n".join([doc.page_content for doc in docs])
    
    # Format prompt for DeepSeek
    prompt = f"""Answer the following question based only on the provided context.
    If you cannot answer from the context, say "I don't have enough information."
    Think step by step before answering.
    
    Context:
    {context}
    
    Question: {question}
    
    <think>
    Think through the reasoning process step by step here. Explore the context and determine the best answer.
    </think>
    
    Answer:"""
    
    # Generate response using DeepSeek-R1
    response = ollama.generate(
        model="deepseek-r1:7b",
        prompt=prompt,
        options={"temperature": temp}
    )
    
    # Extract thinking and answer
    full_response = response["response"]
    
    # Parse thinking section
    thinking = ""
    if "<think>" in full_response and "</think>" in full_response:
        thinking_start = full_response.find("<think>") + len("<think>")
        thinking_end = full_response.find("</think>")
        thinking = full_response[thinking_start:thinking_end].strip()
    
    # Get answer (everything after the </think> tag)
    answer = ""
    if "</think>" in full_response:
        answer = full_response[full_response.find("</think>") + len("</think>"):].strip()
    else:
        answer = full_response  # No thinking tags found, use full response
    
    return {"thinking": thinking, "answer": answer}



In [70]:
# Usage:
question = "Summarize the document in 1 sentence"
result = query_document1(question,temp=0.5)

print(f"\nQuestion: {question}")
#print(f"\nThinking Process:")
#print(result["thinking"])
#print(f"\nAnswer:")
print(result["answer"])



Question: Summarize the document in 1 sentence
The document outlines warranty exclusions for physical damage, water damage, unauthorized modifications, normal wear and tear, and altered serial numbers, provides steps for obtaining warranty service including contacting support, providing proof of purchase, describing issues, following troubleshooting, and obtaining an RMA. Additionally, it details package contents (router, power adapter, Ethernet cable, quick start guide, mounting brackets) and installation steps such as positioning the router, connecting cables, waiting for power, connecting devices, accessing settings via a web browser using default credentials, and following a setup wizard.

Summary: The document explains warranty exclusions, how to obtain warranty service with specific steps, details package contents, and provides an installation guide with procedure steps.


In [71]:
# Usage:
factual_answer = query_document1("Summarize the document in 1 sentence", temp=0.1)
creative_answer = query_document1("Summarize the document in 1 sentence", temp=1.0)


In [72]:
print(factual_answer["answer"])
print(creative_answer["answer"])

The document outlines warranty exclusions for XYZ-company products and provides instructions for obtaining warranty service, along with a detailed installation guide for their router package.
The document outlines the conditions under which warranty is not valid, such as physical damage from accidents or misuse, water damage, unauthorized modifications, normal wear and tear, and altered serial numbers; provides steps to obtain warranty service through customer support with proof of purchase and serial number; mentions extended warranties available within 30 days of purchase; lists product contents including router, power adapter, Ethernet cable, setup guide, and mounting brackets; and offers an installation guide with detailed setup instructions.


In [73]:
creative_answer = query_document1("Summarize the document in 1 sentence")
print(creative_answer)

{'thinking': "Alright, I'm trying to summarize this document based on the provided context. Let me read through it carefully.\n\nThe context starts with Warranty Exclusions, which list several issues that won't be covered under warranty. These include physical damage from accidents or misuse, water or liquid damage, unauthorized modifications, normal wear and tear, and products with altered serial numbers.\n\nNext, there's a section on How to Obtain Warranty Service. It outlines the steps: contacting customer support via email or phone, providing proof of purchase and the product serial number, describing the issue in detail, following troubleshooting steps from support, obtaining an RMA number if needed, and shipping the product with the RMA clearly marked.\n\nThen there's information about Extended warranty plans available within 30 days of purchase. So that's a key point about additional coverage options beyond the standard warranty.\n\nAfter that comes an Installation Guide section