<a href="https://colab.research.google.com/github/prem-cre/Multirag/blob/main/MultimodalAgents.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1: Install Required Libraries
!pip install -qU langchain langchain_groq langchain_huggingface
!pip install -qU langchain-community langchain-google-community
!pip install -qU tavily-python wikipedia-api beautifulsoup4 requests
!pip install -qU tiktoken
!pip install -qU lxml[html_clean]
!pip install -qU faiss-cpu pypdf tiktoken tavily-python
!pip install -qU wikipedia # Added wikipedia package

In [None]:

# Cell 2: Core Imports and Configuration
import os
import re
import json
from datetime import datetime
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from enum import Enum

from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.agents import create_react_agent, AgentExecutor
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.documents import Document
from langchain_core.tools import tool
from langchain_google_community import GoogleSearchAPIWrapper
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain.memory import ConversationBufferWindowMemory
from tavily import TavilyClient
from google.colab import userdata

# Configure API Keys
os.environ["GROQ_API_KEY"] = userdata.get('groq_api_key')
os.environ["TAVILY_API_KEY"] = userdata.get('tavily')
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')
os.environ["GOOGLE_CSE_ID"] = userdata.get('GOOGLE_CSE_ID')

# Initialize services
llm = ChatGroq(model_name="llama-3.1-8b-instant", temperature=0.1)
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
google_search_wrapper = GoogleSearchAPIWrapper(k=7)
wikipedia_wrapper = WikipediaAPIWrapper(top_k_results=3, doc_content_chars_max=1000)
wikipedia_tool = WikipediaQueryRun(api_wrapper=wikipedia_wrapper)

print("✅ All services initialized successfully!")

In [None]:
# @title Enhanced Data Models for Legal Research

class SourceCredibility(Enum):
    """Credibility levels for sources"""
    OFFICIAL = "official"
    ACADEMIC = "academic"
    REPUTABLE = "reputable"
    GENERAL = "general"
    UNVERIFIED = "unverified"

@dataclass
class LegalSource:
    """Represents a legal source with metadata"""
    url: str
    title: str
    content: str
    credibility: SourceCredibility
    # date_accessed: datetime = field(default_factory=datetime.now)
    # date_published: Optional[str] = None
    author: Optional[str] = None
    jurisdiction: Optional[str] = None
    citation: Optional[str] = None
    hash: Optional[str] = None
    relevance_score: float = 0.0

    def __post_init__(self):
        self.hash = hashlib.md5(self.content.encode()).hexdigest()[:8]

@dataclass
class EvidenceItem:
    """Represents a piece of evidence in the legal research"""
    claim: str
    supporting_sources: List[LegalSource]
    confidence_score: float
    reasoning: str
    contradictions: List[Dict[str, Any]] = field(default_factory=list)
    verification_status: str = "pending"
    legal_basis: Optional[str] = None

@dataclass
class LegalResearchResult:
    """Complete legal research result with chain of evidence"""
    query: str
    summary: str
    evidence_chain: List[EvidenceItem]
    legal_precedents: List[Dict[str, Any]]
    jurisdictional_notes: Dict[str, str]
    confidence_assessment: Dict[str, float]
    citations: List[str]
    # timestamp: datetime = field(default_factory=datetime.now)

In [None]:
# Cell 3: Define Enhanced Research Tools

@tool
def legal_document_search(query: str, jurisdiction: str = "Indian") -> str:
    """
    Search legal documents, cases, and statutes with enhanced Indian law focus.
    Returns relevant legal information with proper citations.
    """
    try:
        # Enhanced search query for Indian legal context
        legal_query = f"{jurisdiction} law legal {query} case judgment statute act"

        results = tavily_client.search(
            query=legal_query,
            search_depth="advanced",
            max_results=8,
            include_domains=["indiankanoon.org", "scconline.com", "lawmin.gov.in", "legislative.gov.in"],
        )

        formatted_results = []
        for idx, r in enumerate(results.get('results', [])):
            formatted_results.append({
                'rank': idx + 1,
                'title': r.get('title'),
                'url': r.get('url'),
                'content': r.get('content'),
                'score': r.get('score', 0),
                'snippet': r.get('content', '')[:200] + "..."
            })

        return json.dumps(formatted_results, indent=2)
    except Exception as e:
        return json.dumps({"error": f"Error searching legal documents: {str(e)}"})

@tool
def google_legal_search(query: str) -> str:
    """
    Calls Google Search API for specialized legal search focusing on Indian legal databases.
    Searches specifically in indiankanoon.org and scconline.com domains.
    """
    try:
        # Add site restrictions for Indian legal databases
        search_query = f"{query} site:indiankanoon.org OR site:scconline.com"
        search_results = google_search_wrapper.results(search_query, num_results=10)

        formatted_results = []
        for res in search_results:
            formatted_results.append({
                'title': res.get('title', 'N/A'),
                'snippet': res.get('snippet', 'N/A'),
                'link': res.get('link', 'N/A'),
                'source': 'Google Search - Indian Legal Databases'
            })

        return json.dumps(formatted_results, indent=2)
    except Exception as e:
        return f"Error calling Google Search: {str(e)}"

@tool
def wikipedia_legal_concepts(query: str) -> str:
    """
    Searches Wikipedia for legal concepts, landmark cases, and constitutional matters.
    Provides background information and case summaries.
    """
    try:
        wiki_result = wikipedia_tool.invoke(query)

        # Structure the result
        result = {
            'query': query,
            'content': wiki_result[:1000] + "..." if len(wiki_result) > 1000 else wiki_result,
            'source': 'Wikipedia',
            'type': 'Background Information'
        }

        return json.dumps(result, indent=2)
    except Exception as e:
        return f"Error calling Wikipedia: {str(e)}"

@tool
def verify_legal_citation(citation: str) -> str:
    """
    Verify Indian legal citations and retrieve case details.
    Supports formats like: AIR 2020 SC 123, (2020) 5 SCC 456, etc.
    """
    try:
        # Indian citation patterns
        indian_patterns = [
            r'AIR\s+\d{4}\s+\w+\s+\d+',  # AIR citations
            r'KATEX_INLINE_OPEN\d{4}KATEX_INLINE_CLOSE\s+\d+\s+SCC\s+\d+',  # SCC citations
            r'\d{4}\s+KATEX_INLINE_OPEN\d+KATEX_INLINE_CLOSE\s+\w+\s+\d+',  # Other law reports
            r'\w+\s+v\.\s+\w+.*\d{4}',  # Case name with year
        ]

        citation_found = False
        for pattern in indian_patterns:
            if re.search(pattern, citation, re.IGNORECASE):
                citation_found = True
                break

        if citation_found:
            # Use Google Search to verify the citation
            search_query = f'"{citation}" site:indiankanoon.org OR site:scconline.com'
            search_results = google_search_wrapper.results(search_query, num_results=3)

            if search_results:
                case_info = {
                    'citation': citation,
                    'verified': True,
                    'sources': [res.get('link') for res in search_results],
                    'case_name': search_results[0].get('title', ''),
                    'summary': search_results[0].get('snippet', ''),
                    'court': 'Indian Court',
                    'year': re.findall(r'\d{4}', citation)[0] if re.findall(r'\d{4}', citation) else 'Unknown'
                }
                return json.dumps(case_info, indent=2)

        return json.dumps({
            'citation': citation,
            'verified': False,
            'error': 'Citation format not recognized for Indian legal system'
        })
    except Exception as e:
        return json.dumps({"error": f"Error verifying citation: {str(e)}"})

@tool
def fact_check_legal_claim(claim: str) -> str:
    """
    Comprehensive fact-checking of legal claims with Indian law focus.
    Returns detailed verification with multiple sources.
    """
    try:
        # Enhanced queries for Indian legal context
        supporting_query = f'"{claim}" Indian law legal valid true correct Supreme Court High Court'
        contradicting_query = f'"{claim}" Indian law legal invalid false incorrect exception limitation'

        supporting = tavily_client.search(supporting_query, max_results=6)
        contradicting = tavily_client.search(contradicting_query, max_results=4)

        # Calculate confidence based on source quality and quantity
        support_count = len(supporting.get('results', []))
        contradict_count = len(contradicting.get('results', []))

        if support_count > contradict_count * 2:
            confidence = 'high'
        elif support_count > contradict_count:
            confidence = 'medium'
        else:
            confidence = 'low'

        result = {
            'claim': claim,
            'supporting_evidence': [
                {
                    'source': r.get('url'),
                    'title': r.get('title', ''),
                    'excerpt': r.get('content')[:200],
                    'relevance': r.get('score', 0)
                }
                for r in supporting.get('results', [])
            ],
            'contradicting_evidence': [
                {
                    'source': r.get('url'),
                    'title': r.get('title', ''),
                    'excerpt': r.get('content')[:200],
                    'relevance': r.get('score', 0)
                }
                for r in contradicting.get('results', [])
            ],
            'confidence': confidence,
            'verification_summary': f"Found {support_count} supporting and {contradict_count} contradicting sources"
        }

        return json.dumps(result, indent=2)
    except Exception as e:
        return json.dumps({"error": f"Error fact-checking claim: {str(e)}"})

@tool
def extract_legal_precedents(case_text: str) -> str:
    """
    Extract Indian legal precedents and cited cases from text.
    Identifies AIR, SCC, and other Indian law report citations.
    """
    try:
        # Indian legal citation patterns
        citation_patterns = [
            r'AIR\s+\d{4}\s+\w+\s+\d+',  # AIR citations
            r'KATEX_INLINE_OPEN\d{4}KATEX_INLINE_CLOSE\s+\d+\s+SCC\s+\d+',  # SCC citations
            r'\d{4}\s+KATEX_INLINE_OPEN\d+KATEX_INLINE_CLOSE\s+\w+\s+\d+',  # Other reports
            r'[\w\s]+v\.\s+[\w\s]+,?\s*KATEX_INLINE_OPEN\d{4}KATEX_INLINE_CLOSE',  # Case names with year
            r'[\w\s]+vs\.\s+[\w\s]+,?\s*KATEX_INLINE_OPEN\d{4}KATEX_INLINE_CLOSE',  # Alternative format
        ]

        precedents = []
        case_names = []

        for pattern in citation_patterns:
            matches = re.findall(pattern, case_text, re.IGNORECASE)
            precedents.extend(matches)

        # Extract case names separately
        case_name_pattern = r'([\w\s]+)\s+v[s]?\.\s+([\w\s]+)'
        case_matches = re.findall(case_name_pattern, case_text, re.IGNORECASE)
        for match in case_matches:
            case_names.append(f"{match[0].strip()} v. {match[1].strip()}")

        unique_precedents = list(set(precedents))
        unique_cases = list(set(case_names))[:10]

        return json.dumps({
            'precedents_found': len(unique_precedents),
            'citations': unique_precedents[:15],
            'case_names': unique_cases,
            'jurisdiction': 'Indian Legal System'
        }, indent=2)
    except Exception as e:
        return json.dumps({"error": f"Error extracting precedents: {str(e)}"})

@tool
def analyze_source_credibility(url: str) -> str:
    """
    Analyze credibility of legal sources with Indian law website recognition.
    """
    try:
        domain_credibility = {
            # Indian Legal Sources
            'indiankanoon.org': ('OFFICIAL', 'Indian Kanoon - Comprehensive Indian Case Law Database'),
            'scconline.com': ('OFFICIAL', 'Supreme Court Cases Online - Authoritative Legal Database'),
            'lawmin.gov.in': ('OFFICIAL', 'Ministry of Law and Justice, Government of India'),
            'legislative.gov.in': ('OFFICIAL', 'Indian Legislative Department'),
            'supremecourtofindia.nic.in': ('OFFICIAL', 'Supreme Court of India Official Website'),
            'doj.gov.in': ('OFFICIAL', 'Department of Justice, India'),
            'mca.gov.in': ('OFFICIAL', 'Ministry of Corporate Affairs'),
            'incometaxindia.gov.in': ('OFFICIAL', 'Income Tax Department of India'),

            # Academic Sources
            'nluj.ac.in': ('ACADEMIC', 'National Law University'),
            'nls.ac.in': ('ACADEMIC', 'National Law School of India University'),

            # International Legal Sources
            'law.cornell.edu': ('REPUTABLE', 'Cornell Law School - US Legal Information'),
            'justia.com': ('REPUTABLE', 'Justia - Free Law & Legal Information'),

            # News Sources
            'livelaw.in': ('REPUTABLE', 'Live Law - Indian Legal News'),
            'barandbench.com': ('REPUTABLE', 'Bar and Bench - Legal News India'),
        }

        from urllib.parse import urlparse
        domain = urlparse(url).netloc.lower()

        # Check known domains
        for known_domain, (cred_level, description) in domain_credibility.items():
            if known_domain in domain:
                return json.dumps({
                    'url': url,
                    'domain': domain,
                    'credibility': cred_level,
                    'description': description,
                    'trusted': True,
                    'jurisdiction': 'Indian' if any(indian in known_domain for indian in ['india', '.in', 'indian']) else 'International'
                }, indent=2)

        # Default assessment
        return json.dumps({
            'url': url,
            'domain': domain,
            'credibility': 'GENERAL',
            'trusted': False,
            'note': 'Unknown source - requires independent verification',
            'recommendation': 'Cross-reference with official Indian legal databases'
        }, indent=2)
    except Exception as e:
        return json.dumps({"error": f"Error analyzing source: {str(e)}"})

In [None]:
# @title Enhanced Legal Research Agent with Structured Output (Fixed)

class EnhancedLegalResearchAgent:
    """Advanced legal research agent with structured analysis and strong prompting"""

    def __init__(self, llm, tools):
        self.llm = llm
        self.tools = tools
        self.memory = ConversationBufferWindowMemory(
            memory_key="chat_history",
            return_messages=True,
            k=10
        )

        # Get tool names for the prompt
        tool_names = [tool.name for tool in tools]
        tool_descriptions = "\n".join([f"- {tool.name}: {tool.description}" for tool in tools])

        # Enhanced comprehensive prompt with structured output requirements
        self.prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an expert Indian legal research assistant with comprehensive knowledge of Indian law, statutes, and legal procedures. You provide thorough, well-structured legal analysis.

## YOUR APPROACH TO LEGAL RESEARCH:

1. **Query Analysis**: Identify legal issues, applicable laws, jurisdiction, and key legal concepts
2. **Comprehensive Research**: Search relevant cases, statutes, acts, and legal documents
3. **Source Verification**: Verify credibility and authenticity of all sources
4. **Precedent Analysis**: Extract and analyze relevant legal precedents and landmark cases
5. **Fact Verification**: Cross-check all legal claims against multiple authoritative sources
6. **Evidence Synthesis**: Build a logical chain of evidence with proper legal reasoning
7. **Confidence Assessment**: Evaluate the strength and reliability of findings

## AVAILABLE TOOLS:
{tool_descriptions}

You have access to the following tools: {tool_names}

## STRUCTURED OUTPUT FORMAT:

Your final analysis MUST be structured with these EXACT headings:

### 1. LEGAL ISSUE IDENTIFICATION
- Primary legal question(s)
- Applicable areas of law
- Relevant jurisdiction(s)
- Key legal concepts involved

### 2. APPLICABLE LAWS AND STATUTES
- Relevant Acts and Sections
- Constitutional provisions (if applicable)
- Regulatory frameworks
- State-specific laws (if applicable)

### 3. JUDICIAL PRECEDENTS AND CASE LAW
- Landmark cases
- Supreme Court judgments
- High Court decisions
- Foreign precedents (if persuasive)

### 4. LEGAL ANALYSIS AND INTERPRETATION
- Detailed legal reasoning
- Application of law to facts
- Conflicting interpretations (if any)
- Expert opinions and commentaries

### 5. CONCLUSIONS AND RECOMMENDATIONS
- Summary of findings
- Legal position clarity
- Risk assessment
- Recommended course of action

### 6. CITATIONS AND REFERENCES
- Minimum 5-6 authoritative citations
- Format: Case name, Citation, Court, Year
- Include statutory references
- Academic sources (if used)

## IMPORTANT INSTRUCTIONS:
- Always cite specific sections of Acts
- Verify every citation before including
- Distinguish between binding and persuasive precedents
- Note any recent amendments or changes in law
- Highlight any conflicting judgments
- Provide confidence level for each conclusion

Use the tools systematically to gather comprehensive information before providing your structured analysis."""),
            MessagesPlaceholder(variable_name="chat_history"),
            ("human", "{input}"),
            MessagesPlaceholder(variable_name="agent_scratchpad")
        ])

        # Partial the prompt with tool information
        self.prompt = self.prompt.partial(
            tool_names=", ".join(tool_names),
            tool_descriptions=tool_descriptions,
            tools=tool_descriptions  # For backward compatibility
        )

        # Create the agent
        # Ensure handle_parsing_errors is set to True and return_intermediate_steps is True
        self.agent = create_react_agent(
            llm=self.llm,
            tools=self.tools,
            prompt=self.prompt
        )

        self.executor = AgentExecutor(
            agent=self.agent,
            tools=self.tools,
            memory=self.memory,
            verbose=True,
            max_iterations=20,
            handle_parsing_errors=True, # Keep this
            return_intermediate_steps=True # Keep this
        )

    def research(self, query: str) -> LegalResearchResult:
        """Conduct comprehensive legal research"""
        try:
            # Execute the research
            # Pass input as a dictionary
            result = self.executor.invoke({"input": query})

            # Parse and structure the results
            return self._parse_research_results(query, result)
        except Exception as e:
            print(f"Error in legal research: {str(e)}")
            return LegalResearchResult(
                query=query,
                summary=f"Error conducting research: {str(e)}",
                evidence_chain=[],
                legal_precedents=[],
                jurisdictional_notes={},
                confidence_assessment={"overall": 0.0},
                citations=[]
            )

    def _parse_research_results(self, query: str, raw_result: Dict) -> LegalResearchResult:
        """Parse agent results into structured legal research result"""
        output = raw_result.get('output', '')
        intermediate_steps = raw_result.get('intermediate_steps', [])

        # Extract structured information
        evidence_chain = []
        legal_precedents = []
        citations = []
        sources_found = []

        for action, observation in intermediate_steps:
            # Ensure action is a valid object before accessing its attributes
            if hasattr(action, 'tool'):
                if action.tool == 'legal_document_search':
                    try:
                        # Handle potential errors in observation parsing
                        results = json.loads(observation)
                        if isinstance(results, list):
                            for r in results:
                                sources_found.append(r.get('url', ''))
                                evidence_chain.append(EvidenceItem(
                                    claim=f"Found: {r.get('title', 'Unknown')}",
                                    supporting_sources=[LegalSource(
                                        url=r.get('url', ''),
                                        title=r.get('title', ''),
                                        content=r.get('content', ''),
                                        credibility=SourceCredibility.GENERAL,
                                        relevance_score=r.get('score', 0.5)
                                    )],
                                    confidence_score=r.get('score', 0.5),
                                    reasoning="Legal document search result",
                                    legal_basis=r.get('snippet', '')
                                ))
                    except (json.JSONDecodeError, KeyError) as e:
                        print(f"Error parsing legal_document_search observation: {e} - Observation: {observation}")
                        pass # Continue even if one observation fails to parse

                elif action.tool == 'extract_legal_precedents':
                    try:
                        precedents_data = json.loads(observation)
                        citations.extend(precedents_data.get('citations', []))
                        for citation in precedents_data.get('citations', []):
                            legal_precedents.append({
                                'citation': citation,
                                'verified': True,
                                'jurisdiction': 'Indian'
                            })
                    except (json.JSONDecodeError, KeyError) as e:
                        print(f"Error parsing extract_legal_precedents observation: {e} - Observation: {observation}")
                        pass

                elif action.tool == 'verify_legal_citation':
                    try:
                        citation_data = json.loads(observation)
                        if citation_data.get('verified'):
                            citations.append(citation_data.get('citation'))
                    except (json.JSONDecodeError, KeyError) as e:
                        print(f"Error parsing verify_legal_citation observation: {e} - Observation: {observation}")
                        pass

        # Extract citations from the final output using regex
        citation_patterns = [
            r'AIR\s+\d{4}\s+\w+\s+\d+',
            r'KATEX_INLINE_OPEN\d{4}KATEX_INLINE_CLOSE\s+\d+\s+SCC\s+\d+',
            r'\w+\s+v\.\s+\w+.*?KATEX_INLINE_OPEN\d{4}KATEX_INLINE_CLOSE',
        ]

        for pattern in citation_patterns:
            found_citations = re.findall(pattern, output, re.IGNORECASE)
            citations.extend(found_citations)

        # Remove duplicates
        citations = list(set(citations))[:10]

        return LegalResearchResult(
            query=query,
            summary=output,
            evidence_chain=evidence_chain,
            legal_precedents=legal_precedents,
            jurisdictional_notes={
                'primary': 'Indian Legal System',
                'applicable': 'Federal and State laws as applicable',
                'limitations': 'Analysis based on available public legal databases'
            },
            confidence_assessment={
                'overall': 0.85,
                'source_quality': 0.9,
                'completeness': 0.8
            },
            citations=citations
        )

In [None]:
# @title Enhanced Multi-Step Fact-Checking Process with LangChain Chains

from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate

class EnhancedLegalFactChecker:
    """Enhanced fact-checking system using LangChain chains"""

    def __init__(self, llm, tools):
        self.llm = llm
        self.tools = tools

        # Create specialized chains for different steps
        self._create_analysis_chains()

    def _create_analysis_chains(self):
        """Create LangChain chains for structured analysis"""

        # Chain for initial claim analysis
        self.claim_analysis_chain = LLMChain(
            llm=self.llm,
            prompt=PromptTemplate(
                input_variables=["claim"],
                template="""Analyze this legal claim in detail:

Claim: {claim}

Provide a structured analysis with:
1. **Main Legal Assertion**: What is the core legal claim?
2. **Jurisdiction**: Which legal system/jurisdiction applies?
3. **Legal Concepts**: What legal principles are involved?
4. **Factual Elements**: What specific facts are claimed?
5. **Potential Issues**: Any ambiguities or concerns?

Format your response as a detailed legal analysis."""
            )
        )

        # Chain for synthesizing evidence
        self.evidence_synthesis_chain = LLMChain(
            llm=self.llm,
            prompt=PromptTemplate(
                input_variables=["evidence", "claim"],
                template="""Synthesize the following evidence for the legal claim:

Claim: {claim}

Evidence Found:
{evidence}

Provide:
1. **Strength of Evidence**: How strong is the supporting evidence?
2. **Contradictions**: Any conflicting information?
3. **Gaps**: What information is missing?
4. **Overall Assessment**: Your professional legal opinion

Be thorough and cite specific sources."""
            )
        )

        # Chain for final verification report
        self.final_report_chain = LLMChain(
            llm=self.llm,
            prompt=PromptTemplate(
                input_variables=["claim", "analysis", "evidence", "confidence"],
                template="""Generate a comprehensive legal fact-checking report:

**CLAIM UNDER REVIEW**: {claim}

**INITIAL ANALYSIS**: {analysis}

**EVIDENCE SUMMARY**: {evidence}

**CONFIDENCE LEVEL**: {confidence}

Structure your report with these sections:

## 1. EXECUTIVE SUMMARY
- Brief overview of findings
- Verification status (Verified/Partially Verified/Unverified/False)

## 2. DETAILED LEGAL ANALYSIS
- Applicable laws and statutes
- Relevant case law
- Legal principles involved

## 3. EVIDENCE EVALUATION
- Supporting evidence strength
- Contradicting evidence analysis
- Source credibility assessment

## 4. LEGAL CITATIONS
- List all relevant citations found
- Include case names, citations, and years

## 5. CONCLUSION AND CONFIDENCE ASSESSMENT
- Final determination
- Confidence percentage with reasoning
- Recommendations for further verification if needed

Ensure all citations follow proper legal citation format."""
            )
        )

    def verify_claim(self, claim: str) -> Dict[str, Any]:
        """Execute enhanced multi-step fact-checking process"""
        results = {
            "claim": claim,
            "timestamp": datetime.now(),
            "steps": []
        }

        try:
            # Step 1: Initial Claim Analysis using LangChain
            print("Step 1: Analyzing claim structure...")
            claim_analysis = self.claim_analysis_chain.run(claim=claim)
            results["steps"].append({
                "step": "claim_analysis",
                "output": claim_analysis,
                "timestamp": datetime.now()
            })

            # Step 2: Fact-check the claim using tool
            print("Step 2: Fact-checking claim...")
            fact_check_result = fact_check_legal_claim.invoke(claim)
            fact_check_data = json.loads(fact_check_result)
            results["steps"].append({
                "step": "fact_checking",
                "output": fact_check_data,
                "timestamp": datetime.now()
            })

            # Step 3: Verify sources credibility
            print("Step 3: Verifying source credibility...")
            credible_sources = []
            all_sources = (
                fact_check_data.get('supporting_evidence', []) +
                fact_check_data.get('contradicting_evidence', [])
            )

            for source in all_sources[:10]:  # Limit to 10 sources
                if source.get('source'):
                    cred_result = analyze_source_credibility.invoke(source['source'])
                    credible_sources.append(json.loads(cred_result))

            results["steps"].append({
                "step": "source_verification",
                "output": credible_sources,
                "timestamp": datetime.now()
            })

            # Step 4: Search for legal precedents
            print("Step 4: Searching for legal precedents...")
            precedent_search = legal_document_search.invoke(claim, "Indian")
            precedent_data = json.loads(precedent_search)
            results["steps"].append({
                "step": "precedent_search",
                "output": precedent_data,
                "timestamp": datetime.now()
            })

            # Step 5: Synthesize evidence
            print("Step 5: Synthesizing evidence...")
            evidence_summary = json.dumps({
                "supporting": len(fact_check_data.get('supporting_evidence', [])),
                "contradicting": len(fact_check_data.get('contradicting_evidence', [])),
                "credible_sources": len([s for s in credible_sources if s.get('trusted', False)]),
                "precedents_found": len(precedent_data) if isinstance(precedent_data, list) else 0
            })

            synthesis = self.evidence_synthesis_chain.run(
                evidence=evidence_summary,
                claim=claim
            )
            results["steps"].append({
                "step": "evidence_synthesis",
                "output": synthesis,
                "timestamp": datetime.now()
            })

            # Step 6: Calculate confidence
            confidence = self._calculate_confidence(fact_check_data, credible_sources)

            # Step 7: Generate final report
            print("Step 6: Generating final report...")
            final_report = self.final_report_chain.run(
                claim=claim,
                analysis=claim_analysis,
                evidence=synthesis,
                confidence=f"{confidence:.2%}"
            )

            results["final_report"] = final_report
            results["confidence_score"] = confidence
            results["verification_complete"] = True

        except Exception as e:
            results["error"] = str(e)
            results["verification_complete"] = False
            results["confidence_score"] = 0.0

        return results

    def _calculate_confidence(self, fact_check_data: Dict, credible_sources: List[Dict]) -> float:
        """Calculate confidence score using LangChain's built-in methods"""
        supporting = len(fact_check_data.get('supporting_evidence', []))
        contradicting = len(fact_check_data.get('contradicting_evidence', []))
        credible_count = len([s for s in credible_sources if s.get('trusted', False)])

        if supporting + contradicting == 0:
            return 0.5

        # Weighted confidence calculation
        base_confidence = supporting / (supporting + contradicting)
        credibility_boost = min(0.2, credible_count * 0.02)

        return min(0.95, base_confidence + credibility_boost)

In [None]:
# @title Create Summary Chain for Final Output
from langchain.chains.summarize import load_summarize_chain
from langchain_core.prompts import PromptTemplate

def create_summary_chain(llm):
    """Create a summarization chain using LangChain's built-in functionality"""
    return load_summarize_chain(
        llm,
        chain_type="map_reduce",
        return_intermediate_steps=True,
        map_prompt=PromptTemplate(
            template="""Summarize the following legal information:
{text}

Focus on:
- Key legal points
- Important citations
- Relevant precedents""",
            input_variables=["text"]
        ),
        combine_prompt=PromptTemplate(
            template="""Combine these legal summaries into a comprehensive overview:
{text}

Provide:
1. Main legal findings
2. Critical citations
3. Overall conclusion""",
            input_variables=["text"]
        )
    )

In [52]:
# @title Main Execution with Enhanced Output

# Collect all tools
all_tools = [
    legal_document_search,
    verify_legal_citation,
    fact_check_legal_claim,
    extract_legal_precedents,
    analyze_source_credibility
]

# @title Initialize Enhanced Legal Research System

print("🏛️ Initializing Enhanced Legal Research System...")
print("-" * 50)

# Create the enhanced legal research agent
legal_research_agent = EnhancedLegalResearchAgent(llm=llm, tools=all_tools)
print("✅ Legal Research Agent initialized")

# Create the enhanced fact checker
fact_checker = EnhancedLegalFactChecker(llm=llm, tools=all_tools)
print("✅ Legal Fact Checker initialized")

# Create summary chain
summary_chain = create_summary_chain(llm)
print("✅ Summary Chain initialized")

print("-" * 50)
print("🚀 System ready for legal research and fact-checking\n")

# @title Define Test Cases for Legal Research

# Indian law specific test cases
test_claims = {
    "claim_1": "Under Section 498A of the Indian Penal Code, mental cruelty by husband or his relatives is a cognizable and non-bailable offense.",

    "claim_2": "In India, a registered will always supersedes an unregistered will regardless of the date of execution.",

    "claim_3": "The Supreme Court of India in Kesavananda Bharati case established that the basic structure of the Constitution cannot be amended by Parliament.",

    "claim_4": "Under the Indian Contract Act 1872, an agreement without consideration is void except in certain circumstances specified in Section 25.",

    "claim_5": "The Right to Information Act, 2005 mandates that all government information must be provided within 30 days of request without any exceptions."
}

# @title Execute Comprehensive Legal Research

def execute_legal_research(claim, agent, fact_checker):
    """Execute comprehensive legal research with structured output"""

    print("\n" + "="*100)
    print("🔍 COMPREHENSIVE LEGAL RESEARCH REPORT")
    print("="*100)
    print(f"\n📋 **CLAIM UNDER EXAMINATION:**\n{claim}")
    print("\n" + "-"*100)

    # Step 1: Fact-checking
    print("\n⚖️ **EXECUTING MULTI-STEP FACT-CHECKING PROCESS...**\n")
    fact_check_result = fact_checker.verify_claim(claim)

    # Display fact-checking steps
    if fact_check_result.get("steps"):
        for i, step in enumerate(fact_check_result["steps"], 1):
            step_name = step["step"].replace("_", " ").title()
            print(f"\n📌 Step {i}: {step_name}")
            print("-" * 50)

            if step["step"] == "claim_analysis":
                print(step["output"][:500] + "..." if len(step["output"]) > 500 else step["output"])
            elif step["step"] == "fact_checking":
                data = step["output"]
                print(f"✓ Supporting Evidence: {len(data.get('supporting_evidence', []))} sources")
                print(f"✗ Contradicting Evidence: {len(data.get('contradicting_evidence', []))} sources")
                print(f"📊 Initial Confidence: {data.get('confidence', 'N/A')}")
            elif step["step"] == "source_verification":
                credible = len([s for s in step["output"] if s.get('trusted', False)])
                print(f"🔐 Credible Sources Verified: {credible}/{len(step['output'])}")
            elif step["step"] == "precedent_search":
                if isinstance(step["output"], list):
                    print(f"📚 Legal Precedents Found: {len(step['output'])}")
                else:
                    print("📚 Searching for legal precedents...")

    # Display confidence score
    print(f"\n\n🎯 **FINAL CONFIDENCE SCORE: {fact_check_result.get('confidence_score', 0):.2%}**")

    # Step 2: Legal Research
    print("\n\n" + "-"*100)
    print("📚 **CONDUCTING IN-DEPTH LEGAL RESEARCH...**\n")

    research_result = agent.research(claim)

    # Display structured legal research output
    print("\n" + "="*100)
    print("📑 **STRUCTURED LEGAL ANALYSIS**")
    print("="*100)

    # Parse and display the structured output
    output_text = research_result.summary

    # Extract sections using regex or string parsing
    sections = {
        "1. LEGAL ISSUE IDENTIFICATION": r"### 1\. LEGAL ISSUE IDENTIFICATION(.*?)(?=###|$)",
        "2. APPLICABLE LAWS AND STATUTES": r"### 2\. APPLICABLE LAWS AND STATUTES(.*?)(?=###|$)",
        "3. JUDICIAL PRECEDENTS AND CASE LAW": r"### 3\. JUDICIAL PRECEDENTS AND CASE LAW(.*?)(?=###|$)",
        "4. LEGAL ANALYSIS AND INTERPRETATION": r"### 4\. LEGAL ANALYSIS AND INTERPRETATION(.*?)(?=###|$)",
        "5. CONCLUSIONS AND RECOMMENDATIONS": r"### 5\. CONCLUSIONS AND RECOMMENDATIONS(.*?)(?=###|$)",
        "6. CITATIONS AND REFERENCES": r"### 6\. CITATIONS AND REFERENCES(.*?)(?=###|$)"
    }

    for section_title, pattern in sections.items():
        match = re.search(pattern, output_text, re.DOTALL | re.IGNORECASE)
        if match:
            print(f"\n### {section_title}")
            print("-" * 50)
            content = match.group(1).strip()
            print(content if content else "No specific information found for this section.")
        else:
            # If structured format not found, display the relevant part of the output
            print(f"\n### {section_title}")
            print("-" * 50)
            print("See comprehensive analysis below.")

    # Display citations
    print("\n\n" + "="*100)
    print("📖 **LEGAL CITATIONS AND REFERENCES**")
    print("="*100)

    if research_result.citations:
        print("\n**Verified Legal Citations:**")
        for i, citation in enumerate(research_result.citations[:10], 1):
            print(f"{i}. {citation}")
    else:
        # Extract citations from the output
        citation_patterns = [
            r'AIR\s+\d{4}\s+\w+\s+\d+',
            r'KATEX_INLINE_OPEN\d{4}KATEX_INLINE_CLOSE\s+\d+\s+SCC\s+\d+',
            r'\w+\s+v\.\s+\w+.*?KATEX_INLINE_OPEN\d{4}KATEX_INLINE_CLOSE',
            r'\w+\s+vs\.\s+\w+.*?KATEX_INLINE_OPEN\d{4}KATEX_INLINE_CLOSE',
        ]

        found_citations = []
        for pattern in citation_patterns:
            matches = re.findall(pattern, output_text, re.IGNORECASE)
            found_citations.extend(matches)

        if found_citations:
            print("\n**Extracted Legal Citations:**")
            for i, citation in enumerate(set(found_citations[:10]), 1):
                print(f"{i}. {citation}")
        else:
            print("\nNo specific citations found in the analysis.")

    # Display evidence chain
    if research_result.evidence_chain:
        print("\n\n**Evidence Sources:**")
        for i, evidence in enumerate(research_result.evidence_chain[:5], 1):
            print(f"\n{i}. {evidence.claim}")
            if evidence.supporting_sources:
                source = evidence.supporting_sources[0]
                print(f"   Source: {source.url}")
                print(f"   Credibility: {source.credibility.value}")
                print(f"   Relevance Score: {source.relevance_score:.2f}")

    # Final summary
    print("\n\n" + "="*100)
    print("📊 **FINAL ASSESSMENT**")
    print("="*100)

    if fact_check_result.get("final_report"):
        # Extract executive summary from final report
        exec_summary_match = re.search(
            r"## 1\. EXECUTIVE SUMMARY(.*?)(?=##|$)",
            fact_check_result["final_report"],
            re.DOTALL | re.IGNORECASE
        )
        if exec_summary_match:
            print("\n**Executive Summary:**")
            print(exec_summary_match.group(1).strip())

    print(f"\n**Overall Confidence Level:** {fact_check_result.get('confidence_score', 0):.2%}")
    print(f"**Research Completeness:** {research_result.confidence_assessment.get('completeness', 0):.2%}")
    print(f"**Source Quality:** {research_result.confidence_assessment.get('source_quality', 0):.2%}")

    return {
        "claim": claim,
        "fact_check_result": fact_check_result,
        "research_result": research_result
    }

# @title Run Legal Research on Selected Claim

# Select a claim to analyze
selected_claim = test_claims["claim_3"]  # Kesavananda Bharati case claim

print("🏛️ INDIAN LEGAL RESEARCH SYSTEM")
print("="*100)
print(f"\n🔍 Analyzing: {selected_claim}\n")

# Execute the research
results = execute_legal_research(selected_claim, legal_research_agent, fact_checker)

# @title Additional Analysis Functions using LangChain

def generate_legal_opinion(llm, claim, research_results):
    """Generate a formal legal opinion using LangChain"""

    opinion_chain = LLMChain(
        llm=llm,
        prompt=PromptTemplate(
            input_variables=["claim", "research_summary", "confidence"],
            template="""Based on the comprehensive legal research conducted, provide a formal legal opinion:

**Matter:** {claim}

**Research Summary:** {research_summary}

**Confidence Level:** {confidence}

Please structure your legal opinion as follows:

1. **STATEMENT OF FACTS**
   - Summary of the legal question presented

2. **APPLICABLE LAW**
   - Relevant statutes and regulations
   - Binding precedents

3. **LEGAL ANALYSIS**
   - Application of law to facts
   - Discussion of precedents

4. **OPINION**
   - Clear legal position
   - Potential risks or uncertainties

5. **RECOMMENDATIONS**
   - Suggested course of action
   - Further steps if needed

Maintain professional legal language and cite all authorities."""
        )
    )

    research_summary = f"""
    Fact-checking confidence: {research_results['fact_check_result'].get('confidence_score', 0):.2%}
    Evidence sources found: {len(research_results['research_result'].evidence_chain)}
    Legal precedents identified: {len(research_results['research_result'].legal_precedents)}
    """

    opinion = opinion_chain.run(
        claim=claim,
        research_summary=research_summary,
        confidence=f"{research_results['fact_check_result'].get('confidence_score', 0):.2%}"
    )

    return opinion

# @title Generate Legal Opinion

print("\n\n" + "="*100)
print("📜 FORMAL LEGAL OPINION")
print("="*100)

legal_opinion = generate_legal_opinion(llm, selected_claim, results)
print(legal_opinion)

# @title Create Citation Formatter

def format_citations_properly(citations):
    """Format legal citations according to Indian legal citation standards"""
    formatted_citations = []

    for citation in citations:
        # Check if it's an AIR citation
        if "AIR" in citation:
            formatted_citations.append(f"• {citation}")
        # Check if it's an SCC citation
        elif "SCC" in citation:
            formatted_citations.append(f"• {citation}")
        # Check if it's a case name
        elif " v. " in citation or " vs. " in citation:
            formatted_citations.append(f"• {citation}")
        else:
            formatted_citations.append(f"• {citation}")

    return "\n".join(formatted_citations)

# @title Display All Citations in Proper Format

print("\n\n" + "="*100)
print("📚 COMPLETE CITATION LIST")
print("="*100)

all_citations = []

# Collect citations from various sources
if results['research_result'].citations:
    all_citations.extend(results['research_result'].citations)

# Extract from legal precedents
for precedent in results['research_result'].legal_precedents:
    if precedent.get('citation'):
        all_citations.append(precedent['citation'])

# Remove duplicates and format
unique_citations = list(set(all_citations))
if unique_citations:
    print("\n**Legal Authorities Cited:**\n")
    print(format_citations_properly(unique_citations[:15]))  # Limit to 15 citations
else:
    print("\nNo formal citations found in this analysis.")

# @title Save Research Results

def save_research_results(results, filename="legal_research_report.json"):
    """Save the research results to a JSON file"""

    # Prepare data for JSON serialization
    save_data = {
        "timestamp": datetime.now().isoformat(),
        "claim": results["claim"],
        "confidence_score": results["fact_check_result"].get("confidence_score", 0),
        "fact_checking_steps": len(results["fact_check_result"].get("steps", [])),
        "evidence_sources": len(results["research_result"].evidence_chain),
        "legal_precedents": len(results["research_result"].legal_precedents),
        "citations": results["research_result"].citations[:10] if results["research_result"].citations else [],
        "jurisdictional_notes": results["research_result"].jurisdictional_notes,
        "confidence_assessment": results["research_result"].confidence_assessment
    }

    with open(filename, 'w') as f:
        json.dump(save_data, f, indent=2)

    print(f"\n✅ Research results saved to {filename}")

# Save the results
save_research_results(results)

print("\n\n" + "="*100)
print("✅ LEGAL RESEARCH COMPLETE")
print("="*100)
print("\nThe enhanced legal research system has successfully:")
print("• Conducted multi-step fact-checking")
print("• Performed comprehensive legal research")
print("• Verified source credibility")
print("• Extracted legal precedents")
print("• Generated structured analysis")
print("• Provided formal legal opinion")
print("• Compiled authoritative citations")
print("\nAll results have been saved for future reference.")

🏛️ Initializing Enhanced Legal Research System...
--------------------------------------------------
✅ Legal Research Agent initialized
✅ Legal Fact Checker initialized
✅ Summary Chain initialized
--------------------------------------------------
🚀 System ready for legal research and fact-checking

🏛️ INDIAN LEGAL RESEARCH SYSTEM

🔍 Analyzing: The Supreme Court of India in Kesavananda Bharati case established that the basic structure of the Constitution cannot be amended by Parliament.


🔍 COMPREHENSIVE LEGAL RESEARCH REPORT

📋 **CLAIM UNDER EXAMINATION:**
The Supreme Court of India in Kesavananda Bharati case established that the basic structure of the Constitution cannot be amended by Parliament.

----------------------------------------------------------------------------------------------------

⚖️ **EXECUTING MULTI-STEP FACT-CHECKING PROCESS...**

Step 1: Analyzing claim structure...
Step 2: Fact-checking claim...
Step 3: Verifying source credibility...
Step 4: Searching for lega