<a href="https://colab.research.google.com/github/prem-cre/Multirag/blob/main/Copy_of_work.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1: Install Required Libraries
!pip install -qU langchain
!pip install -qU langchain-community langchain-google-community
!pip install -qU tavily-python wikipedia-api beautifulsoup4 requests
# !pip install -qU tiktoken
!pip install -qU lxml[html_clean]
!pip install -qU faiss-cpu pypdf tiktoken tavily-python
!pip install -qU wikipedia
!pip install langchain-google-genai

In [None]:
# Cell 2: Core Imports and Configuration
import os
import re
import json
import hashlib
from datetime import datetime
from typing import List, Dict, Any, Optional
from dataclasses import dataclass, field
from enum import Enum


# from langchain_groq import ChatGroq # Commented out as we are not using Groq
# from langchain_huggingface import HuggingFaceEmbeddings
from langchain.agents import create_react_agent, AgentExecutor
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.documents import Document
from langchain_core.tools import tool
from langchain_google_community import GoogleSearchAPIWrapper
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain.memory import ConversationBufferWindowMemory
from tavily import TavilyClient
from google.colab import userdata
from langchain_google_genai import ChatGoogleGenerativeAI

# Configure API Keys
# os.environ["GROQ_API_KEY"] = userdata.get('groq_api_key') # Commented out due to TimeoutException
os.environ["TAVILY_API_KEY"] = userdata.get('tavily')
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')
os.environ["GOOGLE_CSE_ID"] = userdata.get('GOOGLE_CSE_ID')

# Initialize services
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.1
)

tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
google_search_wrapper = GoogleSearchAPIWrapper(k=7)
wikipedia_wrapper = WikipediaAPIWrapper(top_k_results=3, doc_content_chars_max=1000)
wikipedia_tool = WikipediaQueryRun(api_wrapper=wikipedia_wrapper)

print("✅ All services initialized successfully!")

✅ All services initialized successfully!


In [None]:
# Cell 3: Data Models
class SourceCredibility(Enum):
    OFFICIAL = "official"
    ACADEMIC = "academic"
    REPUTABLE = "reputable"
    GENERAL = "general"
    UNVERIFIED = "unverified"

@dataclass
class LegalSource:
    url: str
    title: str
    content: str
    credibility: SourceCredibility
    author: Optional[str] = None
    jurisdiction: Optional[str] = None
    citation: Optional[str] = None
    hash: Optional[str] = None
    relevance_score: float = 0.0

    def __post_init__(self):
        self.hash = hashlib.md5(self.content.encode()).hexdigest()[:8]

@dataclass
class EvidenceItem:
    claim: str
    supporting_sources: List[LegalSource]
    confidence_score: float
    reasoning: str
    contradictions: List[Dict[str, Any]] = field(default_factory=list)
    verification_status: str = "pending"
    legal_basis: Optional[str] = None

@dataclass
class LegalResearchResult:
    query: str
    summary: str
    evidence_chain: List[EvidenceItem]
    legal_precedents: List[Dict[str, Any]]
    jurisdictional_notes: Dict[str, str]
    confidence_assessment: Dict[str, float]
    citations: List[str]

In [None]:
# Cell 4: Define Tools


@tool
def legal_document_search(query: str, jurisdiction: str = "Indian") -> str:
    """Search legal documents, cases, and statutes with enhanced Indian law focus."""
    try:
        legal_query = f"{jurisdiction} law legal {query} case judgment statute act"
        results = tavily_client.search(
            query=legal_query,
            search_depth="advanced",
            max_results=12,
            include_domains=["indiankanoon.org", "scconline.com", "lawmin.gov.in", "legislative.gov.in"],
        )
        return json.dumps(results.get('results', []), indent=2)
    except Exception as e:
        return json.dumps({"error": f"Error searching legal documents: {str(e)}"})

@tool
def google_legal_search(query: str) -> str:
    """Calls Google Search API for specialized legal search focusing on Indian legal databases."""
    try:
        search_query = f"{query} site:indiankanoon.org OR site:scconline.com"
        search_results = google_search_wrapper.results(search_query, num_results=10)
        return json.dumps(search_results, indent=2)
    except Exception as e:
        return f"Error calling Google Search: {str(e)}"

@tool
def wikipedia_legal_concepts(query: str) -> str:
    """Searches Wikipedia for legal concepts, landmark cases, and constitutional matters."""
    try:
        wiki_result = wikipedia_tool.invoke(query)
        return wiki_result
    except Exception as e:
        return f"Error calling Wikipedia: {str(e)}"

@tool
def fact_check_legal_claim(claim: str) -> str:
    """Comprehensive fact-checking of legal claims with Indian law focus."""
    try:
        supporting_query = f'"{claim}" Indian law legal valid true correct Supreme Court High Court'
        contradicting_query = f'"{claim}" Indian law legal invalid false incorrect exception limitation'

        supporting = tavily_client.search(supporting_query, max_results=6)
        contradicting = tavily_client.search(contradicting_query, max_results=4)

        result = {
            'claim': claim,
            'supporting_evidence_raw': supporting.get('results', []),
            'contradicting_evidence_raw': contradicting.get('results', []),
            'verification_summary': f"Found {len(supporting.get('results', []))} potential supporting and {len(contradicting.get('results', []))} potential contradicting sources"
        }

        return json.dumps(result, indent=2)
    except Exception as e:
        return json.dumps({"error": f"Error fact-checking claim: {str(e)}"})

In [None]:
# Cell 5: Fixed Legal Research Agent
class EnhancedLegalResearchAgent:
    """Advanced legal research agent with structured analysis and strong prompting"""

    def __init__(self, llm, tools):
        self.llm = llm
        self.tools = tools
        self.memory = ConversationBufferWindowMemory(
            memory_key="chat_history",
            return_messages=True,
            k=10
        )

        # Get tool names for the prompt
        tool_names = [tool.name for tool in tools]
        tool_descriptions = "\n".join([f"- {tool.name}: {tool.description}" for tool in tools])

        # Use ChatPromptTemplate.from_messages for better structure
        self.prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an expert Indian legal research assistant. Use the tools to research the query comprehensively.

Available tools:
{tool_descriptions}

Use this exact format:
Thought: I need to search for information about this legal query
Action: tool_name
Action Input: the input for the tool
Observation: the result of the action
... (repeat Thought/Action/Action Input/Observation as needed)
Thought: I now have enough information to provide a comprehensive answer
Final Answer: [Your detailed legal analysis here]

IMPORTANT: When providing your Final Answer, include:
1. Summary of the legal issue
2. Relevant laws and statutes found
3. Important case citations (list ALL citations you found)
4. Legal precedents
5. Analysis and interpretation
6. Conclusions
You are an expert Indian legal research assistant with comprehensive knowledge of Indian law, statutes, and legal procedures. You provide thorough, well-structured legal analysis.

## YOUR APPROACH TO LEGAL RESEARCH:

1. **Query Analysis**: Identify legal issues, applicable laws, jurisdiction, and key legal concepts
2. **Comprehensive Research**: Search relevant cases, statutes, acts, and legal documents
3. **Source Verification**: Verify credibility and authenticity of all sources
4. **Precedent Analysis**: Extract and analyze relevant legal precedents and landmark cases
5. **Fact Verification**: Cross-check all legal claims against multiple authoritative sources
6. **Evidence Synthesis**: Build a logical chain of evidence with proper legal reasoning
7. **Confidence Assessment**: Evaluate the strength and reliability of findings

## AVAILABLE TOOLS:
{tool_descriptions}

You have access to the following tools: {tool_names}

## STRUCTURED OUTPUT FORMAT:

Your final analysis MUST be structured with these EXACT headings:

### 1. LEGAL ISSUE IDENTIFICATION
- Primary legal question(s)
- Applicable areas of law
- Relevant jurisdiction(s)
- Key legal concepts involved

### 2. APPLICABLE LAWS AND STATUTES
- Relevant Acts and Sections
- Constitutional provisions (if applicable)
- Regulatory frameworks
- State-specific laws (if applicable)

### 3. JUDICIAL PRECEDENTS AND CASE LAW
- Landmark cases
- Supreme Court judgments
- High Court decisions
- Foreign precedents (if persuasive)

### 4. LEGAL ANALYSIS AND INTERPRETATION
- Detailed legal reasoning
- Application of law to facts
- Conflicting interpretations (if any)
- Expert opinions and commentaries

### 5. CONCLUSIONS AND RECOMMENDATIONS
- Summary of findings
- Legal position clarity
- Risk assessment
- Recommended course of action

### 6. CITATIONS AND REFERENCES
- Minimum 5-6 authoritative citations
- Format: Case name, Citation, Court, Year
- Include statutory references
- Academic sources (if used)

# ## IMPORTANT INSTRUCTIONS:
# - Always cite specific sections of Acts
# - Verify every citation before including
# - Distinguish between binding and persuasive precedents
# - Note any recent amendments or changes in law
# - Highlight any conflicting judgments
# - Provide confidence level for each conclusion

Use the tools systematically to gather comprehensive information before providing your structured analysis."""),
            MessagesPlaceholder(variable_name="chat_history"),
            ("human", "{input}"),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ])

        # Partial the prompt with tool information
        self.prompt = self.prompt.partial(
            tool_descriptions=tool_descriptions,
            tool_names=tool_names, # Ensure tool_names is passed
            tools=tools # Ensure tools are passed if the prompt requires them
        )


        # Create the agent with the fixed prompt
        self.agent = create_react_agent(
            llm=self.llm,
            tools=self.tools,
            prompt=self.prompt
        )

        self.executor = AgentExecutor(
            agent=self.agent,
            tools=self.tools,
            memory=self.memory,
            verbose=True,
            max_iterations=10,
            handle_parsing_errors=True,
            return_intermediate_steps=True
        )

    def research(self, query: str) -> Dict[str, Any]:
        """Conduct comprehensive legal research"""
        try:
            # Execute the research
            result = self.executor.invoke({"input": query})

            # Extract the output
            output_text = result.get('output', '')

            return {
                "query": query,
                "summary": output_text,
                "evidence_chain": [],
                "legal_precedents": [],
                "jurisdictional_notes": {},
                "confidence_assessment": {"overall": 0.85},
                "citations": []
            }
        except Exception as e:
            print(f"Error in legal research: {str(e)}")
            return {
                "query": query,
                "summary": f"Error conducting research: {str(e)}",
                "evidence_chain": [],
                "legal_precedents": [],
                "jurisdictional_notes": {},
                "confidence_assessment": {"overall": 0.0},
                "citations": []
            }

In [None]:
# Cell 6: Simplified Fact Checker
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate

class EnhancedLegalFactChecker:
    """Enhanced fact-checking system using LangChain chains"""

    def __init__(self, llm, tools):
        self.llm = llm
        self.tools = tools # Store the tools passed during initialization
        self._create_analysis_chains()

    def _create_analysis_chains(self):
        """Create LangChain chains for structured analysis"""

        # Chain for initial claim analysis
        self.claim_analysis_chain = LLMChain(
            llm=self.llm,
            prompt=PromptTemplate(
                input_variables=["claim"],
                template="""Analyze this legal claim:

Claim: {claim}

Provide analysis of:
1. Main legal assertion
2. Jurisdiction
3. Legal concepts involved
4. Key facts claimed
5. Potential issues"""
            )
        )

        # Chain for final report
        self.final_report_chain = LLMChain(
            llm=self.llm,
            prompt=PromptTemplate(
                input_variables=["claim", "analysis", "evidence"],
                template="""Generate a legal fact-checking report:

CLAIM: {claim}

ANALYSIS: {analysis}

EVIDENCE: {evidence}


"""
            )
        )

    def verify_claim(self, claim: str) -> Dict[str, Any]:
        """Execute fact-checking process"""
        results = {
            "claim": claim,
            "timestamp": datetime.now(),
            "steps": []
        }

        try:
            # Step 1: Analyze claim
            print("Step 1: Analyzing claim...")
            claim_analysis = self.claim_analysis_chain.run(claim=claim)
            results["steps"].append({
                "step": "claim_analysis",
                "output": claim_analysis
            })

            # Find the tools by name from the self.tools list
            fact_check_tool = next((tool for tool in self.tools if tool.name == "fact_check_legal_claim"), None)
            legal_search_tool = next((tool for tool in self.tools if tool.name == "legal_document_search"), None)

            if not fact_check_tool:
                raise ValueError("Fact check tool not found.")
            if not legal_search_tool:
                raise ValueError("Legal document search tool not found.")

            # Step 2: Fact-check
            print("Step 2: Fact-checking claim...")
            fact_check_result = fact_check_tool.invoke(claim) # Use the tool found in self.tools
            results["steps"].append({
                "step": "fact_checking",
                "output": fact_check_result
            })

            # Step 3: Search for precedents
            print("Step 3: Searching for legal precedents...")
            precedent_search = legal_search_tool.invoke(claim) # Use the tool found in self.tools
            results["steps"].append({
                "step": "precedent_search",
                "output": precedent_search
            })

            # Step 4: Generate final report
            print("Step 4: Generating final report...")
            final_report = self.final_report_chain.run(
                claim=claim,
                analysis=claim_analysis,
                evidence=f"Fact check: {fact_check_result}\nPrecedents: {precedent_search}"
            )

            results["final_report"] = final_report
            results["verification_complete"] = True

        except Exception as e:
            results["error"] = str(e)
            results["verification_complete"] = False

        return results

In [None]:
# Cell 7: Main Execution
# Initialize tools
all_tools = [
    legal_document_search,
    google_legal_search,
    fact_check_legal_claim,
    wikipedia_legal_concepts
]

# Initialize agents
print("Initializing Legal Research System...")
fact_checker = EnhancedLegalFactChecker(llm=llm, tools=all_tools)
legal_research_agent = EnhancedLegalResearchAgent(llm=llm, tools=all_tools)
print("System initialized successfully!")

# Test claim
test_claim = "The Supreme Court of India in Kesavananda Bharati case established that the basic structure of the Constitution cannot be amended by Parliament."

# Execute research
def execute_legal_research(claim, agent, fact_checker):
    """Execute legal research with simplified output"""

    print("\n" + "="*80)
    print("LEGAL RESEARCH REPORT")
    print("="*80)
    print(f"\nCLAIM: {claim}")
    print("\n" + "-"*80)

    # Fact-checking
    print("\nFACT-CHECKING PROCESS:")
    fact_result = fact_checker.verify_claim(claim)

    if fact_result.get("final_report"):
        print("\nFact Check Report:")
        print(fact_result["final_report"])

    # Legal Research
    print("\n" + "-"*80)
    print("LEGAL RESEARCH:")
    research_result = agent.research(claim)

    print("\nResearch Summary:")
    print(research_result["summary"])

    return {
        "claim": claim,
        "fact_check_result": fact_result,
        "research_result": research_result
    }

# Run the analysis
print("\nStarting Legal Analysis...")
results = execute_legal_research(test_claim, legal_research_agent, fact_checker)

print("\n" + "="*80)
print("ANALYSIS COMPLETE")
print("="*80)

Initializing Legal Research System...
System initialized successfully!

Starting Legal Analysis...

LEGAL RESEARCH REPORT

CLAIM: The Supreme Court of India in Kesavananda Bharati case established that the basic structure of the Constitution cannot be amended by Parliament.

--------------------------------------------------------------------------------

FACT-CHECKING PROCESS:
Step 1: Analyzing claim...
Step 2: Fact-checking claim...
Step 3: Searching for legal precedents...
Step 4: Generating final report...

Fact Check Report:
## Legal Fact-Check Report: Kesavananda Bharati and the Basic Structure Doctrine

**Claim:** The Supreme Court of India in the *Kesavananda Bharati* case established that the basic structure of the Constitution cannot be amended by Parliament.

**Rating:** Mostly True

**Analysis:**

The Supreme Court of India's decision in *Kesavananda Bharati v. State of Kerala* (1973) is a landmark judgment that introduced the "basic structure doctrine" into Indian constitu