In [2]:
import os
from typing import TypedDict, Annotated, List, Literal, Dict, Any
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
from langchain_core.tools import tool
from langgraph.graph import StateGraph, END, MessagesState
from langgraph.checkpoint.memory import MemorySaver
import random
from datetime import datetime

import pymupdf4llm

import chromadb
from chromadb.config import Settings
from chromadb.utils import embedding_functions

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
from langchain.chat_models import init_chat_model

llm=init_chat_model(api_key=os.environ.get("GROQ_API_KEY"), model="groq:llama-3.1-8b-instant")
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000289D1EF5850>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000289D221F650>, model_name='llama-3.1-8b-instant', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [4]:
class SupervisorState(MessagesState):
    """State for the multi-agent system"""
    next_agent: str = ""
    extracted_data: str = ""
    analysis: str = ""
    report: str = ""
    task_complete: bool = False
    current_task: str = ""

In [None]:
from langchain_core.prompts import ChatPromptTemplate
def create_supervisor_chain():
    """Creates the supervisor decision chain"""
    
    supervisor_prompt = ChatPromptTemplate.from_messages([
        ("system", """You are a supervisor managing a team of agents:
        
1. Data_Extractor - Extract information from various sources
2. Analyst - Analyzes data and finding discrepancies in two sources of data 
3. Writer - Creates report on how to adjust the discrepancies.

Based on the current state and conversation, decide which agent should work next.
If the task is complete, respond with 'DONE'.

Current state:
- Has data: {has_data}
- Has analysis: {has_analysis}
- Has report: {has_report}

Respond with ONLY the agent name (data_extractor/analyst/writer) or 'DONE'.
"""),
        ("human", "{task}")
    ])
    
    return supervisor_prompt | llm

In [None]:
def supervisor_agent(state: SupervisorState) -> Dict:
    """Supervisor decides next agent using LLM"""
    
    messages = state["messages"]
    task = messages[-1].content if messages else "No task"
    
    # Check what's been completed
    has_data = bool(state.get("extracted_data", ""))
    has_analysis = bool(state.get("analysis", ""))
    has_report = bool(state.get("report", ""))
    
    # Get LLM decision
    chain = create_supervisor_chain()
    decision = chain.invoke({
        "task": task,
        "has_data": has_data,
        "has_analysis": has_analysis,
        "has_report": has_report
    })
    
    # Parse decision
    decision_text = decision.content.strip().lower()
    print(decision_text)
    
    # Determine next agent
    if "done" in decision_text or has_report:
        next_agent = "end"
        supervisor_msg = "✅ Supervisor: All tasks complete! Great work team."
    elif "Data_Extractor" in decision_text or not has_data:
        next_agent = "Data_Extractor"
        supervisor_msg = "📋 Supervisor: Let's start with data extraction. Assigning to Data_Extractor..."
    elif "Analyst" in decision_text or (has_data and not has_analysis):
        next_agent = "Analyst"
        supervisor_msg = "📋 Supervisor: Data extraction done. Time for analysis. Assigning to Analyst..."
    elif "Writer" in decision_text or (has_analysis and not has_report):
        next_agent = "Writer"
        supervisor_msg = "📋 Supervisor: Analysis complete. Let's create the report. Assigning to Writer..."
    else:
        next_agent = "end"
        supervisor_msg = "✅ Supervisor: Task seems complete."
    
    return {
        "messages": [AIMessage(content=supervisor_msg)],
        "next_agent": next_agent,
        "current_task": task
    }

In [4]:
from langchain_openai import OpenAIEmbeddings
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient

qdrant_client = QdrantClient(
    url=os.environ.get("QDRANT_URL"), 
    api_key=os.environ.get("QDRANT_API_KEY"),
)

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name="insurance-policies",
    embedding=embeddings,
)

In [None]:
@tool
def document_splitting(path: str) -> List[str]:
    """Split the pdf saved in the given location into sections."""
    document_text = pymupdf4llm.to_markdown(doc=path)
    split_pattern = "\n#"
    chunks = document_text.split(split_pattern)
    return chunks

@tool
def data_retrieving(sections: List[str]) -> List[str]:
    """For each item from the given list, retrieve the most similar sections from vector DB."""
    results = []
    for section in sections:
        result = vector_store.similarity_search(
            section, k=1
        )
        results.append({"section":section,"retrieved_data":result})
    return results

In [None]:
def document_splitting(path: str) -> List[str]:
    """Split the pdf saved in the given location into sections."""
    document_text = pymupdf4llm.to_markdown(doc=path)
    split_pattern = "\n#"
    chunks = document_text.split(split_pattern)
    return chunks

def data_retrieving(sections: List[str]) -> List[str]:
    """For each item from the given list, retrieve the most similar sections from vector DB."""
    results = []
    for section in sections:
        result = vector_store.similarity_search(
            section, k=1
        )
        results.append(result)
    return results

In [None]:
result = document_splitting("Legal Expenses Cover Policy Wording.pdf")
final= data_retrieving(result)
print(final)

[[Document(metadata={'country': 'UK', 'source': 'FCA Consumer Duty Final Rules', '_id': '67c7d10a-60c9-41bd-94e4-85aa00281edb', '_collection_name': 'insurance-policies'}, page_content="<chunk_context>Annex 1 provides a comprehensive list of non-confidential respondents to the Consultation Paper CP21/36 regarding the new Consumer Duty proposed by the Financial Conduct Authority (FCA). This section illustrates the diverse range of stakeholders, including financial institutions, consumer organizations, and industry associations, who provided feedback on the FCA's draft rules and guidance. The inputs from these respondents play a critical role in shaping the final rules focused on improving customer outcomes and ensuring robust consumer protection in the financial services sector.</chunk_context>\n<chunk>## Annex 1 List of non‑confidential respondents to CP21/36\n\n\nabrdn plc\n\n\nAccess to Insurance Working Group (A2I Group)\n\n\nAFS Compliance\n\n\nAJ Bell\n\n\nAmigo\n\n\nAmplified Glob

In [None]:
print(final[0][0])

page_content='<chunk_context>Annex 1 provides a comprehensive list of non-confidential respondents to the Consultation Paper CP21/36 regarding the new Consumer Duty proposed by the Financial Conduct Authority (FCA). This section illustrates the diverse range of stakeholders, including financial institutions, consumer organizations, and industry associations, who provided feedback on the FCA's draft rules and guidance. The inputs from these respondents play a critical role in shaping the final rules focused on improving customer outcomes and ensuring robust consumer protection in the financial services sector.</chunk_context>
<chunk>## Annex 1 List of non‑confidential respondents to CP21/36


abrdn plc


Access to Insurance Working Group (A2I Group)


AFS Compliance


AJ Bell


Amigo


Amplified Global Ltd


Association for Financial Markets in Europe (AFME)


Association of British Credit Unions Limited (ABCUL)


Association of British Insurers (ABI)


Association of Finance Brokers (A

: 

In [None]:
def data_extractor_agent(state: SupervisorState) -> Dict:
    """Extracts vendor doc sections and retrieves closest construction standards for each section."""
    
    file_location = state.get("current_task", "")
    sections = document_splitting(file_location)
    
    extracted_data = []
    for section in sections:
        related = vector_store.similarity_search(section, k=1)
        extracted_data.append({
            "vendor_text": section,
            "standard_text": related[0].page_content if related else "",
            "similarity_score": related[0].score if related else None
        })
    
    agent_message = f"🔍 Data Extractor: Extracted {len(extracted_data)} sections with related standards."
    
    return {
        "messages": [AIMessage(content=agent_message)],
        "extracted_data": extracted_data,
        "next_agent": "analyst"
    }


In [None]:
def analyst_agent(state: SupervisorState) -> Dict:
    """Analyzes vendor doc sections against construction standards and finds discrepancies in structured format."""
    
    extracted_data = state.get("extracted_data", [])
    
    # Force JSON structure
    analysis_prompt = f"""You are a compliance analyst. 
Compare each vendor document section with the related construction standard and return STRICT JSON.

For each section, output a JSON object with:
- "section_id": index number
- "vendor_text": vendor's text snippet
- "standard_text": matched construction standard
- "discrepancies": list of identified mismatches or missing items
- "risk_level": "low" | "medium" | "high"
- "recommendations": list of corrective actions

Input data:
{extracted_data}

Return ONLY a JSON list of objects (no extra text).
"""
    
    analysis_response = llm.invoke([HumanMessage(content=analysis_prompt)])
    
    # Try parsing JSON (in case LLM outputs extra text, we fallback gracefully)
    import json
    try:
        discrepancies = json.loads(analysis_response.content)
    except json.JSONDecodeError:
        discrepancies = []  # or retry with a stricter parser
    
    agent_message = f"📊 Analyst: Discrepancy analysis complete for {len(discrepancies)} sections."
    
    return {
        "messages": [AIMessage(content=agent_message)],
        "discrepancies": discrepancies,
        "next_agent": "supervisor"
    }



In [None]:
def writer_agent(state: SupervisorState) -> Dict:
    """Writer produces a full compliance report with both compliant and non-compliant sections.
    Non-compliant sections are rewritten to meet standards.
    """
    
    extracted_data = state.get("extracted_data", [])
    discrepancies = state.get("discrepancies", [])
    task = state.get("current_task", "Vendor Document Compliance Report")
    
    # Separate compliant and flagged sections
    flagged_sections = [d for d in discrepancies if d.get("discrepancies")]
    compliant_sections = [d for d in discrepancies if not d.get("discrepancies")]
    
    corrected_sections = []
    if flagged_sections:
        # Create rewriting prompt for only flagged ones
        rewrite_prompt = f"""You are a professional technical writer. 
Rewrite ONLY the vendor sections that have discrepancies so they comply with construction standards.

For each flagged section, return STRICT JSON with:
- "section_id": id
- "original_vendor_text": vendor text
- "standard_text": reference standard
- "discrepancies": list
- "rewritten_vendor_text": corrected and compliant rewrite

Flagged sections:
{flagged_sections}

Return ONLY a JSON list of objects (no extra commentary)."""
        
        rewrite_response = llm.invoke([HumanMessage(content=rewrite_prompt)])
        
        import json
        try:
            corrected_sections = json.loads(rewrite_response.content)
        except json.JSONDecodeError:
            corrected_sections = []
    
    # Merge compliant + corrected into one structured report
    full_sections_report = []
    
    # Add compliant
    for section in compliant_sections:
        full_sections_report.append({
            "section_id": section.get("section_id"),
            "original_vendor_text": section.get("vendor_text"),
            "standard_text": section.get("standard_text"),
            "discrepancies": [],
            "rewritten_vendor_text": section.get("vendor_text")  # unchanged
        })
    
    # Add corrected
    full_sections_report.extend(corrected_sections)
    
    # Build final compliance report
    import json
    final_report = f"""
📄 COMPLIANCE REPORT
{'='*60}
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}
Task: {task}
{'='*60}

Executive Summary:
This report reviews all vendor document sections against construction standards. 
Compliant sections are listed unchanged. Non-compliant sections have been corrected to align with standards.

Detailed Sections:
{json.dumps(full_sections_report, indent=2)}

{'='*60}
Report compiled by Multi-Agent AI System powered by Groq
"""
    
    return {
        "messages": [AIMessage(content="✍️ Writer: Full compliance report (compliant + corrected) is ready!")],
        "corrected_sections": corrected_sections,   # only the rewritten ones
        "full_report_sections": full_sections_report,  # all sections
        "final_report": final_report,
        "next_agent": "supervisor",
        "task_complete": True
    }
