In [None]:
# Cell 1: Install Required Libraries
!pip install -qU langchain
!pip install -qU langchain-community
!pip install -qU tavily-python wikipedia-api  requests
# !pip install -qU tiktoken  faiss-cpu pypdf tiktoken
!pip install -qU lxml[html_clean]
!pip install -qU tavily-python
!pip install -qU wikipedia
!pip install langchain-google-genai
!pip install langchain_groq

In [29]:
# Cell 2: Core Imports and Configuration
import os
import re
import json
import hashlib
from datetime import datetime
from typing import List, Dict, Any, Optional
from dataclasses import dataclass, field
from enum import Enum


from langchain_groq import ChatGroq
# from langchain_huggingface import HuggingFaceEmbeddings
from langchain.agents import create_react_agent, AgentExecutor
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.documents import Document
from langchain_core.tools import tool
from langchain_google_community import GoogleSearchAPIWrapper
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain.memory import ConversationBufferWindowMemory
from tavily import TavilyClient
from google.colab import userdata
from langchain_google_genai import ChatGoogleGenerativeAI

# Configure API Keys
os.environ["GROQ_API_KEY"] = userdata.get('groq_api_key') # Commented out due to TimeoutException
os.environ["TAVILY_API_KEY"] = userdata.get('tavily')
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')
os.environ["GOOGLE_CSE_ID"] = userdata.get('GOOGLE_CSE_ID')

# Initialize services
llm = ChatGroq(model_name="llama-3.1-8b-instant", temperature=0.1)

tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
google_search_wrapper = GoogleSearchAPIWrapper(k=7)
wikipedia_wrapper = WikipediaAPIWrapper(top_k_results=3, doc_content_chars_max=1000)
wikipedia_tool = WikipediaQueryRun(api_wrapper=wikipedia_wrapper)

print("✅ All services initialized successfully!")

✅ All services initialized successfully!


In [41]:
# Cell 3: Data Models
class SourceCredibility(Enum):
    OFFICIAL = "official"
    ACADEMIC = "academic"
    REPUTABLE = "reputable"
    GENERAL = "general"
    UNVERIFIED = "unverified"

@dataclass
class LegalSource:
    url: str
    title: str
    content: str
    credibility: SourceCredibility
    author: Optional[str] = None
    jurisdiction: Optional[str] = None
    citation: Optional[str] = None
    hash: Optional[str] = None
    relevance_score: float = 0.0

    def __post_init__(self):
        self.hash = hashlib.md5(self.content.encode()).hexdigest()[:8]

@dataclass
class EvidenceItem:
    claim: str
    supporting_sources: List[LegalSource]
    confidence_score: float
    reasoning: str
    contradictions: List[Dict[str, Any]] = field(default_factory=list)
    verification_status: str = "pending"
    legal_basis: Optional[str] = None

@dataclass
class LegalResearchResult:
    query: str
    summary: str
    evidence_chain: List[EvidenceItem]
    legal_precedents: List[Dict[str, Any]]
    jurisdictional_notes: Dict[str, str]
    confidence_assessment: Dict[str, float]
    citations: List[str]

In [42]:
# Cell 4: Define Tools


@tool
def legal_document_search(query: str, jurisdiction: str = "Indian") -> str:
    """Search legal documents, cases, and statutes with enhanced Indian law focus."""
    try:
        legal_query = f"{jurisdiction} law legal {query} case judgment statute act"
        results = tavily_client.search(
            query=legal_query,
            search_depth="advanced",
            max_results=7,
            include_domains=["indiankanoon.org", "scconline.com", "lawmin.gov.in", "legislative.gov.in"],
        )
        return json.dumps(results.get('results', []), indent=2)
    except Exception as e:
        return json.dumps({"error": f"Error searching legal documents: {str(e)}"})

@tool
def google_legal_search(query: str) -> str:
    """Calls Google Search API for specialized legal search focusing on Indian legal databases."""
    try:
        search_query = f"{query} site:indiankanoon.org OR site:scconline.com"
        search_results = google_search_wrapper.results(search_query, num_results=5)
        return json.dumps(search_results, indent=2)
    except Exception as e:
        return f"Error calling Google Search: {str(e)}"


# def wikipedia_legal_concepts(query: str) -> str:
#     """Searches Wikipedia for legal concepts, landmark cases, and constitutional matters."""
#     try:
#         wiki_result = wikipedia_tool.invoke(query)
#         return wiki_result
#     except Exception as e:
#         return f"Error calling Wikipedia: {str(e)}"

@tool
def fact_check_legal_claim(claim: str) -> str:
    """Comprehensive fact-checking of legal claims with Indian law focus."""
    try:
        supporting_query = f'"{claim}" Indian law legal valid true correct Supreme Court High Court'
        contradicting_query = f'"{claim}" Indian law legal invalid false incorrect exception limitation'

        supporting = tavily_client.search(supporting_query, max_results=5)
        contradicting = tavily_client.search(contradicting_query, max_results=4)

        result = {
            'claim': claim,
            'supporting_evidence_raw': supporting.get('results', []),
            'contradicting_evidence_raw': contradicting.get('results', []),
            'verification_summary': f"Found {len(supporting.get('results', []))} potential supporting and {len(contradicting.get('results', []))} potential contradicting sources"
        }

        return json.dumps(result, indent=2)
    except Exception as e:
        return json.dumps({"error": f"Error fact-checking claim: {str(e)}"})

In [43]:
# Cell 5: Fixed Legal Research Agent
import asyncio
import concurrent.futures
from functools import partial

class EnhancedLegalResearchAgent:
    """Advanced legal research agent with structured analysis and strong prompting"""

    def __init__(self, llm, tools):
        self.llm = llm
        self.tools = tools
        self.tool_dict = {tool.name: tool for tool in tools}

        # Direct tool calling approach to avoid agent_scratchpad issues
        self.use_agent = False

    def research(self, query: str) -> Dict[str, Any]:
        """Conduct comprehensive legal research"""
        try:
            # Direct tool calling approach
            output_text = self._direct_research(query)

            return {
                "query": query,
                "summary": output_text,
                "evidence_chain": [],
                "legal_precedents": [],
                "jurisdictional_notes": {},
                "confidence_assessment": {"overall": 0.85},
                "citations": []
            }
        except Exception as e:
            print(f"Error in legal research: {str(e)}")
            return {
                "query": query,
                "summary": f"Error conducting research: {str(e)}",
                "evidence_chain": [],
                "legal_precedents": [],
                "jurisdictional_notes": {},
                "confidence_assessment": {"overall": 0.0},
                "citations": []
            }

    def _direct_research(self, query: str) -> str:
        """Research with streaming response"""

        # Use ThreadPoolExecutor for parallel API calls
        with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
            print("Executing parallel searches...")

            # Submit all tasks at once
            future_legal = executor.submit(self.tool_dict['legal_document_search'].invoke, query)
            future_google = executor.submit(self.tool_dict['google_legal_search'].invoke, query)
            # future_wiki = executor.submit(self.tool_dict['wikipedia_legal_concepts'].invoke, query)

            # Collect results as they complete
            legal_docs = future_legal.result()
            google_results = future_google.result()
            # wiki_results = future_wiki.result()

        # Combine results
        results = [
            f"Legal Documents: {legal_docs}",
            f"Google Results: {google_results}"
            # f"Wikipedia Results: {wiki_results}"
        ]

        combined = "\n".join(results)

        print("\nGenerating analysis...")
        # Use LLM to analyze
        analysis_prompt = f"""You are an expert Indian legal research assistant. Based on the following search results, provide a comprehensive legal analysis.

Query: {query}

Search Results:
{combined}

Your analysis MUST be structured with these EXACT professional headings:

### 📋 CASE OVERVIEW & LEGAL ISSUES
- **Primary Legal Question(s):** What are the core legal issues at stake?
- **Area(s) of Law:** Constitutional, Criminal, Civil, Corporate, etc.
- **Jurisdictional Scope:** Supreme Court, High Court, District Court jurisdiction
- **Key Legal Principles:** Fundamental concepts and doctrines involved

### ⚖️ STATUTORY FRAMEWORK & LEGISLATION
- **Primary Legislation:** Relevant Acts with specific Sections
- **Constitutional Provisions:** Articles of the Constitution (if applicable)
- **Rules & Regulations:** Subordinate legislation and notifications
- **State Enactments:** State-specific laws and amendments
- **Recent Amendments:** Latest changes to applicable laws

### 🏛️ JUDICIAL PRECEDENTS & CASE LAW
- **Landmark Judgments:** Seminal cases establishing principles
- **Supreme Court Rulings:** Binding precedents from apex court
- **High Court Decisions:** Relevant HC judgments with jurisdiction
- **Foreign Jurisprudence:** Persuasive international precedents
- **Case Evolution:** How the law has developed through cases

### 🔍 LEGAL ANALYSIS & INTERPRETATION
- **Ratio Decidendi:** The binding principle of law
- **Application to Facts:** How the law applies to current scenario
- **Judicial Reasoning:** Court's interpretation methodology
- **Conflicting Views:** Different judicial interpretations (if any)
- **Academic Commentary:** Expert opinions from legal scholars

### 💼 STRATEGIC RECOMMENDATIONS
- **Executive Summary:** Key findings in brief
- **Legal Position:** Clear statement of current legal status
- **Risk Assessment:** Potential legal risks and challenges
- **Action Plan:** Step-by-step recommended course of action
- **Alternative Strategies:** Other viable legal approaches

### 📚 CITATIONS & LEGAL AUTHORITIES
**IMPORTANT: You MUST provide 8-10 authoritative citations with clickable links**

Format each citation as:
- **Case Name:** [Full Case Title](actual_url_from_search_results)
  - *Citation:* AIR/SCC/Other Reporter
  - *Court:* Supreme Court/High Court
  - *Year:* Decision year
  - *Brief:* One-line description of the legal principle established

Example(this is just an example not ans for every query):
- **Kesavananda Bharati v. State of Kerala:** [View Full Judgment](https://indiankanoon.org/doc/257876/)
  - *Citation:* AIR 1973 SC 1461, (1973) 4 SCC 225
  - *Court:* Supreme Court of India (13-judge bench)
  - *Year:* 1973
  - *Brief:* Established the basic structure doctrine limiting Parliament's amending power

**Statutory References:**
- Include Act name with clickable link to full text
- Specific sections cited with brief description

**MANDATORY REQUIREMENTS:**
1. Extract ALL URLs from the search results for citations
2. Minimum 8 citations, maximum 10 citations
3. Each citation MUST have a clickable link from the actual search results
4. Include brief description of what each case/statute establishes
5. Prioritize Supreme Court and High Court judgments
6. Include at least 2-3 statutory references with links

Remember: Legal professionals rely on these citations for their practice. Accuracy and accessibility through clickable links is paramount."""
        response = self.llm.stream(analysis_prompt)

        full_response = ""
        for chunk in response:
            print(chunk.content, end='', flush=True)
            full_response += chunk.content

        return full_response

In [44]:
# Cell 6: Simplified Fact Checker
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence

class EnhancedLegalFactChecker:
    """Enhanced fact-checking system using LangChain chains"""

    def __init__(self, llm, tools):
        self.llm = llm
        self.tools = tools # Store the tools passed during initialization
        self._create_analysis_chains()

    def _create_analysis_chains(self):
        """Create LangChain chains for structured analysis"""

        # Chain for initial claim analysis
        self.claim_analysis_chain = PromptTemplate(
            input_variables=["claim"],
            template="""Analyze this legal claim:

Claim: {claim}

Provide analysis of:
1. Main legal assertion
2. Jurisdiction
3. Legal concepts involved
4. Key facts claimed
5. Potential issues"""
        ) | self.llm


        # Chain for final report
        self.final_report_chain = PromptTemplate(
            input_variables=["claim", "analysis", "evidence"],
            template="""Generate a legal fact-checking report:

CLAIM: {claim}

ANALYSIS: {analysis}

EVIDENCE: {evidence}


"""
        ) | self.llm

    def verify_claim(self, claim: str) -> Dict[str, Any]:
        """Execute fact-checking process"""
        results = {
            "claim": claim,
            "timestamp": datetime.now(),
            "steps": []
        }

        try:
            # Step 1: Analyze claim
            print("Step 1: Analyzing claim...")
            claim_analysis = self.claim_analysis_chain.invoke({"claim": claim})
            results["steps"].append({
                "step": "claim_analysis",
                "output": claim_analysis.content
            })

            # Find the tools by name from the self.tools list
            fact_check_tool = next((tool for tool in self.tools if tool.name == "fact_check_legal_claim"), None)
            legal_search_tool = next((tool for tool in self.tools if tool.name == "legal_document_search"), None)

            if not fact_check_tool:
                raise ValueError("Fact check tool not found.")
            if not legal_search_tool:
                raise ValueError("Legal document search tool not found.")

            # Step 2: Fact-check
            print("Step 2: Fact-checking claim...")
            fact_check_result = fact_check_tool.invoke(claim) # Use the tool found in self.tools
            results["steps"].append({
                "step": "fact_checking",
                "output": fact_check_result
            })

            # Step 3: Search for precedents
            print("Step 3: Searching for legal precedents...")
            precedent_search = legal_search_tool.invoke(claim) # Use the tool found in self.tools
            results["steps"].append({
                "step": "precedent_search",
                "output": precedent_search
            })

            # Step 4: Generate final report
            print("Step 4: Generating final report...")
            final_report = self.final_report_chain.invoke(
                {"claim": claim,
                 "analysis": claim_analysis.content,
                 "evidence": f"Fact check: {fact_check_result}\nPrecedents: {precedent_search}"}
            )

            results["final_report"] = final_report.content
            results["verification_complete"] = True

        except Exception as e:
            results["error"] = str(e)
            results["verification_complete"] = False

        return results

In [45]:
# Cell 7: Main Execution
# Initialize tools
all_tools = [
    legal_document_search,
    google_legal_search,
    fact_check_legal_claim,
    # wikipedia_legal_concepts
]

# Initialize agents
print("Initializing Legal Research System...")
fact_checker = EnhancedLegalFactChecker(llm=llm, tools=all_tools)
legal_research_agent = EnhancedLegalResearchAgent(llm=llm, tools=all_tools)
print("System initialized successfully!")

# Test claim
test_claim = "The Supreme Court of India in Kesavananda Bharati case established that the basic structure of the Constitution cannot be amended by Parliament."

# Execute research
def execute_legal_research(claim, agent, fact_checker):
    """Execute legal research with simplified output"""

    print("\n" + "="*80)
    print("LEGAL RESEARCH REPORT")
    print("="*80)
    print(f"\nCLAIM: {claim}")
    print("\n" + "-"*80)

    # Fact-checking
    print("\nFACT-CHECKING PROCESS:")
    fact_result = fact_checker.verify_claim(claim)

    if fact_result.get("final_report"):
        print("\nFact Check Report:")
        print(fact_result["final_report"])

    # Legal Research
    print("\n" + "-"*80)
    print("LEGAL RESEARCH:")
    research_result = agent.research(claim)

    print("\nResearch Summary:")
    print(research_result["summary"])

    return {
        "claim": claim,
        "fact_check_result": fact_result,
        "research_result": research_result
    }

# Run the analysis
print("\nStarting Legal Analysis...")
results = execute_legal_research(test_claim, legal_research_agent, fact_checker)

print("\n" + "="*80)
print("ANALYSIS COMPLETE")
print("="*80)

Initializing Legal Research System...
System initialized successfully!

Starting Legal Analysis...

LEGAL RESEARCH REPORT

CLAIM: The Supreme Court of India in Kesavananda Bharati case established that the basic structure of the Constitution cannot be amended by Parliament.

--------------------------------------------------------------------------------

FACT-CHECKING PROCESS:
Step 1: Analyzing claim...
Step 2: Fact-checking claim...
Step 3: Searching for legal precedents...
Step 4: Generating final report...

Fact Check Report:
**Legal Fact-Checking Report:**

**Claim:** The Supreme Court of India in Kesavananda Bharati case established that the basic structure of the Constitution cannot be amended by Parliament.

**Analysis:**

The claim is analyzed based on the main legal assertion, jurisdiction, legal concepts involved, key facts claimed, and potential issues.

**Main Legal Assertion:** The main legal assertion is that the Supreme Court of India, in the Kesavananda Bharati case, e