In [1]:
import os
os.chdir('E:/Manohar/Document_Chatbot')

In [None]:
%pip install sentence-transformers faiss-cpu llama-cpp-python


In [37]:
import os
from sentence_transformers import SentenceTransformer
import faiss
from ollama import Client, Options   # <-- import Options


In [38]:
# Function to load & chunk only .txt files under Contracts/
def load_and_chunk(root_dir, chunk_size=500, overlap=100):
    texts, metas = [], []
    for dirpath, _, files in os.walk(root_dir):
        for fn in files:
            if not fn.lower().endswith(".txt"):
                continue
            full_path = os.path.join(dirpath, fn)
            with open(full_path, encoding="utf8", errors="ignore") as f:
                txt = f.read()
            toks = txt.split()
            for i in range(0, len(toks), chunk_size - overlap):
                chunk = " ".join(toks[i : i + chunk_size])
                texts.append(chunk)
                metas.append({"source": full_path, "pos": i})
    return texts, metas

# Load & chunk
chunks, metadata = load_and_chunk("Contracts")
print(f"Loaded {len(chunks)} chunks from 'Contracts/' directory.")


Loaded 3 chunks from 'Contracts/' directory.


In [39]:
# Embed chunks and build FAISS index
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embs = embedder.encode(chunks, show_progress_bar=True)

# Normalize and index
faiss.normalize_L2(embs)
index = faiss.IndexFlatIP(embs.shape[1])
index.add(embs)

print(f"Built FAISS index with {index.ntotal} vectors.")


Batches: 100%|██████████| 1/1 [00:00<00:00,  7.90it/s]

Built FAISS index with 3 vectors.





In [40]:
# Initialize Ollama client (make sure `ollama serve` is running)
client = Client()

# Helper to generate via local Llama 3 using Options for token limit
def llm_generate(prompt, max_tokens=256):
    opts = Options(num_predict=max_tokens)   # <-- use keyword arg
    resp = client.chat(
        model="llama3",
        messages=[{"role": "user", "content": prompt}],
        options=opts
    )
    return resp["message"]["content"]

# RAG answer function
def answer(query, k=5):
    # 1) Embed & retrieve top-k
    q_emb = embedder.encode([query])
    faiss.normalize_L2(q_emb)
    _, I = index.search(q_emb, k)

    # 2) Build context
    context = "\n\n".join(chunks[i] for i in I[0])
    prompt = (
        "You are an assistant. Use the following context to answer the question.\n\n"
        f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
    )

    # 3) Generate answer
    return llm_generate(prompt)


print("Generated LLM")

Generated LLM


In [41]:
# Quick test
print(answer("What is this contract about?"))


This contract, titled "Master Services and Licensing Agreement", is an agreement between Microsoft Corporation and LlamaLLC (Llama). The agreement outlines the terms and conditions for a licensing arrangement where Microsoft provides professional services and deliverables to Llama. The contract covers aspects such as confidentiality, intellectual property rights, data protection, indemnification, warranties, term and termination, and miscellaneous provisions.


In [42]:
# Quick test
print(answer("Who signed this contract?"))


According to the contract, the following individuals signed it:

* Sai Manohar, Data Scientist at Microsoft Corporation (dated June 5, 2025)
* Tejas Gaikwad, AI Engineer at LlamaLLC (dated August 5, 2025)


In [43]:
print(answer("Highlight key points and list them in order of importance"))


Based on the Master Services and Licensing Agreement, I've highlighted the key points and listed them in order of importance:

**Highly Important (1-3)**

1. **Termination**: Either Party may terminate this Agreement without cause upon thirty (30) days written notice to the other Party. (§10.2)
2. **Ownership**: Microsoft retains ownership of Intellectual Property Rights developed in connection with the Services or Deliverables. (§6.1)
3. **Confidentiality**: Each Party agrees to protect Confidential Information disclosed by the other Party with the same degree of care it uses to protect its own confidential information. (§5.1)

**Important (4-6)**

4. **License Grant**: Microsoft grants Llama a limited, non-exclusive, non-transferable, and revocable license to use the Software identified in Exhibit B solely for Llama's internal business purposes. (§3.1)
5. **Fees and Payment Terms**: Llama agrees to pay Microsoft the fees outlined in Exhibit C, with interest accruing at a rate of 1.5%