In [3]:
# Downgrade langchain to the last stable version (v0.3) to support RetrievalQA
%pip install "langchain<1.0.0" "langchain-community<1.0.0" "langchain-core<1.0.0" "langchain-openai<1.0.0" "langchain-text-splitters<1.0.0"

Collecting langchain<1.0.0
  Downloading langchain-0.3.27-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-core<1.0.0
  Downloading langchain_core-0.3.80-py3-none-any.whl.metadata (3.2 kB)
Collecting langchain-openai<1.0.0
  Downloading langchain_openai-0.3.35-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-text-splitters<1.0.0
  Downloading langchain_text_splitters-0.3.11-py3-none-any.whl.metadata (1.8 kB)
INFO: pip is looking at multiple versions of langchain-community to determine which version is compatible with other requirements. This could take a while.
Collecting langchain-community<1.0.0
  Downloading langchain_community-0.4-py3-none-any.whl.metadata (3.0 kB)
  Downloading langchain_community-0.3.31-py3-none-any.whl.metadata (3.0 kB)
Downloading langchain-0.3.27-py3-none-any.whl (1.0 MB)
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ---------- -------------------------

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-classic 1.0.0 requires langchain-core<2.0.0,>=1.0.0, but you have langchain-core 0.3.80 which is incompatible.
langchain-classic 1.0.0 requires langchain-text-splitters<2.0.0,>=1.0.0, but you have langchain-text-splitters 0.3.11 which is incompatible.
langchain-huggingface 1.0.1 requires langchain-core<2.0.0,>=1.0.3, but you have langchain-core 0.3.80 which is incompatible.
langgraph-prebuilt 1.0.4 requires langchain-core>=1.0.0, but you have langchain-core 0.3.80 which is incompatible.


In [1]:
# ------------------------------------------------------------------
# CELL 1: SETUP, IMPORTS, AND CONFIGURATION
# ------------------------------------------------------------------

import os
import time
from dotenv import load_dotenv

# Import necessary LangChain components
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# 1. Load API Keys from .env file
load_dotenv()

# --- CONFIGURATION ---
# Must match the Index Name used in Milestone 1
INDEX_NAME = "legal-assistant"
# Must match the Embedding Model used in Milestone 1
EMBEDDING_MODEL = "all-MiniLM-L6-v2" 

# Check for keys
if not os.environ.get("OPENAI_API_KEY"):
    print("‚ùå Error: OPENAI_API_KEY not found in .env file.")
else:
    print("‚úÖ API Keys loaded successfully.")

  from .autonotebook import tqdm as notebook_tqdm


‚úÖ API Keys loaded successfully.


In [2]:
# ------------------------------------------------------------------
# CELL 2: CONNECT TO PINECONE DATABASE
# ------------------------------------------------------------------

print(f"‚è≥ Loading embedding model: {EMBEDDING_MODEL}...")
# We use the same model as M1 to ensure vectors match
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)

print(f"‚è≥ Connecting to Pinecone Index: {INDEX_NAME}...")
vectorstore = PineconeVectorStore(
    index_name=INDEX_NAME, 
    embedding=embeddings
)

print("‚úÖ Successfully connected to Pinecone VectorStore.")

‚è≥ Loading embedding model: all-MiniLM-L6-v2...
‚è≥ Connecting to Pinecone Index: legal-assistant...
‚úÖ Successfully connected to Pinecone VectorStore.


In [3]:
# ------------------------------------------------------------------
# CELL 3: DATABASE VERIFICATION (Human Check)
# ------------------------------------------------------------------
test_query = "fundamental rights"
print(f"üîé Running Test Query: '{test_query}'")

try:
    # We fetch just 1 document to prove the 'ingestion' worked
    raw_results = vectorstore.similarity_search(test_query, k=1)
    
    if raw_results:
        print("‚úÖ Database Check Passed!")
        doc = raw_results[0]
        print(f"   ‚Ä¢ Found Document: {doc.metadata.get('document_title', 'Untitled')}")
        print(f"   ‚Ä¢ Source File:    {doc.metadata.get('source_file')}")
        print(f"   ‚Ä¢ Content Snippet: {doc.page_content[:100]}...")
    else:
        print("‚ùå Database Check Failed. No results found.")
        print("   STOP: Please check if Milestone 1 ingestion was successful.")

except Exception as e:
    print(f"‚ùå Connection Error: {e}")

üîé Running Test Query: 'fundamental rights'
‚úÖ Database Check Passed!
   ‚Ä¢ Found Document: Kesavananda Bharati v. State of Kerala - Wikipedia
   ‚Ä¢ Source File:    Kesavananda Bharati v. State of Kerala - Wikipedia.html
   ‚Ä¢ Content Snippet: Preamble and the Constitution to carry out the objectives in the Preamble and the Directive Principl...


In [4]:
# ------------------------------------------------------------------
# CELL 4: BUILD LEGABOT RAG CHAIN
# ------------------------------------------------------------------

# 1. Setup Retriever (The Tool for the AI)
# We convert the vectorstore into a retriever interface
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5} # Retrieve top 5 most relevant chunks
)

# 2. Initialize OpenAI (The Brain)
print("‚è≥ Initializing ChatOpenAI (gpt-3.5-turbo)...")
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo", 
    temperature=0.1 # Low temperature = Precise, Factual Legal Answers
)

# 3. Define the System Prompt (Based on Project Doc Section 5)
legal_system_prompt = """
You are LegaBot, a precise legal research assistant. 
For any user query, use the following context to provide an answer.

GUIDELINES:
1. Retrieve and list the most relevant statute sections/judgment excerpts.
2. Provide a concise legal summary (3‚Äì5 sentences).
3. If the answer requires legal interpretation or could affect rights, clearly say you are not a lawyer and recommend consulting a qualified attorney.
4. Always prioritize citing the exact statutory language or judgment excerpt used.
5. If you don't know the answer based on the context, say "I don't have enough information in my legal database."

CONTEXT:
{context}

QUESTION:
{question}

YOUR ANSWER:
"""

PROMPT = PromptTemplate(
    template=legal_system_prompt, 
    input_variables=["context", "question"]
)

# 4. Construct the Chain
# Connects: Retriever -> Prompt -> LLM
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True, # Required to show sources
    chain_type_kwargs={"prompt": PROMPT}
)

print("‚úÖ LegaBot Chain built successfully. Ready to answer.")

‚è≥ Initializing ChatOpenAI (gpt-3.5-turbo)...
‚úÖ LegaBot Chain built successfully. Ready to answer.


In [9]:
# ------------------------------------------------------------------
# CELL 5: RUN LEGABOT (With Detailed Top 5 Text Answers)
# ------------------------------------------------------------------

# Define your question
user_query = "What is the punishment for theft under Indian law?"

print(f"‚ùì USER QUESTION: '{user_query}'")
print("ü§ñ LEGABOT IS THINKING...")

# Run the chain
start_time = time.time()
response = qa_chain.invoke({"query": user_query})
end_time = time.time()

# --- 1. DISPLAY THE AI'S MAIN SUMMARY ---
print("\n" + "="*60)
print(f"üìù LEGABOT FINAL ANSWER ({end_time - start_time:.2f}s):")
print("="*60)
print(response["result"].strip())

# --- 2. DISPLAY THE TOP 5 RETRIEVED TEXTS (THE "5 RESPONSES") ---
print("\n" + "="*60)
print("üîç TOP 5 RETRIEVED SOURCES (The Exact Text Found):")
print("="*60)

# Loop through the source docs and print the ACTUAL TEXT content
for i, doc in enumerate(response["source_documents"]):
    title = doc.metadata.get('document_title', 'Unknown Title')
    source = doc.metadata.get('source_file', 'Unknown File')
    
    print(f"\n[{i+1}] SOURCE: {title}")
    print(f"    File: {source}")
    # ‚¨áÔ∏è THIS IS THE NEW PART: Printing the actual text content ‚¨áÔ∏è
    clean_text = " ".join(doc.page_content.split())
    print(f"    üìñ EXCERPT: \"{clean_text}...\"")
    print("-" * 60)

‚ùì USER QUESTION: 'What is the punishment for theft under Indian law?'
ü§ñ LEGABOT IS THINKING...

üìù LEGABOT FINAL ANSWER (6.35s):
Under Indian law, the punishment for theft is outlined in Section 379 of the Indian Penal Code (45 of 1860). According to Section 379, whoever commits theft shall be punished with imprisonment for a term which may extend to three years, or with fine, or with both. This is specified in the statutory language of Section 379 of the Indian Penal Code.

üîç TOP 5 RETRIEVED SOURCES (The Exact Text Found):

[1] SOURCE: Criminal Procedure Code
    File: Criminal Procedure Code.pdf
    üìñ EXCERPT: "CHAPTER XXI SUMMARY TRIALS 260. Power to try summarily.‚Äî(1) Notwithstanding anything contained in this Code‚Äî (a) any Chief Judicial Magistrate; (b) any Metropolitan Magistrate; (c) any Magistrate of the first class specially empowered in this behalf by the High Court, may, if he thinks fit, try in a summary way all or any of the following offences: ‚Äî (i) off