# Single Agent Architecture
## ├── Tool 1: Generator (architectural analysis)
## └── Tool 2: Judge (scoring + pass/fail)

In [1]:
# Install the required libraries
!pip install -q langchain_openai==0.3.33 langchain==0.3.27 langchain-community==0.3.24 serpapi google-search-results


In [2]:
#imports
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from typing import List

In [3]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
os.environ["SERPAPI_API_KEY"] = userdata.get("SERPAPI_API_KEY")
os.environ["OPENAI_API_BASE"] = userdata.get("OPENAI_API_BASE")

In [4]:

#Initialize the LLM
#model_client_llm = OpenAI(temperature=0.4,
 #                model="gpt-4o-mini",
  #               openai_api_key=os.environ["OPENAI_API_KEY"],
   #              openai_api_base=os.environ["OPENAI_API_BASE"])

In [5]:
from langchain_openai import ChatOpenAI

def create_model_client(temperature: float):
    return ChatOpenAI(
        model="gpt-4o-mini",  # or whatever is available to you
        temperature=temperature,
        openai_api_key=os.environ["OPENAI_API_KEY"],
        openai_api_base=os.environ["OPENAI_API_BASE"]
    )


# Model Client (LangChain-native)

# Initialization complete

# Define the tools

## Tool 1: generate_analysis

### Purpose:
Generate the architectural analysis using the guardrailed single-agent prompt.

This tool does not decide quality. It only produces content.

In [6]:
from langchain_core.messages import SystemMessage, HumanMessage


def generate_analysis(model_client, user_context: str) -> str:
    """
    Generates a structured, opinionated analysis comparing
    Single-Agent vs Multi-Agent systems.
    """

    system_prompt = (
        """
        You are writing a Medium article for senior engineers, founders, and AI practitioners.

        The reader is impatient, skeptical, and allergic to generic explanations.
        They expect clear opinions, concrete tradeoffs, and hard-earned lessons.

        Write with authority, not neutrality.
        Avoid academic framing, buzzwords, and cautious hedging.
        Prefer short paragraphs, crisp transitions, and decisive language.

        Assume the reader already knows basic AI concepts.
        Your job is not to educate beginners, but to sharpen judgment.

        """
    )

    task_prompt = (

        "Produce a structured, opinionated analysis titled:\n\n"
        "Single-Agent vs. Multi-Agent Systems: A Comparison\n\n"
        "Strictly follow this section order and headings:\n"
        "1. Single-Agent vs. Multi-Agent Systems: A Comparison\n"
        "2. The Core Question: One Agent or Many\n"
        "3. Examples of Single-Agent Systems\n"
        "4. Examples of Multi-Agent Systems\n"
        "5. Decision Framework: Choosing the Right Agent Architecture\n"
        "6. Implementation Strategy\n"
        "7. Key Warning Signs for Architecture Change\n"
        "8. Conclusion\n\n"
        "Rules:\n"
        "- No emojis\n"
        "- No hype or visionary language\n"
        "- No 'it depends' without a hard constraint\n"
        "- Every section must mention at least one downside\n"
        "- Assume the reader is technical and skeptical\n\n"
        f"Additional context (if any):\n{user_context}"
        "Formatting rules for Medium:\n"
        "- Use Markdown headings (##) only\n"
        "- Keep paragraphs to 2–4 lines max\n"
        "- Avoid numbered lists unless comparison is essential\n"
        "- Use short lead sentences under each heading\n"
        "- Remove repetitive summary statements\n"
        "- Write as if this is an opinionated essay, not documentation\n"
        "Examples must:\n"
        "- Reflect real production or architectural patterns\n"
        "- Avoid brand-name-dropping unless it adds insight\n"
        "- Explicitly state what breaks in practice\n"
        "Generic or textbook examples are not acceptable.\n"
        "The conclusion must:\n"
        "- Take a strong position\n"
        "- Call out a common industry mistake\n"
        "- End with a clear recommendation, not a summary\n"


    )

    messages = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=task_prompt)
    ]

    response = model_client.invoke(messages)

    return response.content


## Tool 2: judge_output

### Purpose:
Score the generated output using LLM-as-judge and enforce pass/fail.

This tool never edits content. It only evaluates.

In [7]:
import json
from langchain_core.messages import SystemMessage, HumanMessage


def judge_output(model_client, generated_text: str) -> dict:
    """
    Evaluates generated output using a strict LLM-as-judge rubric.
    """

    system_prompt = (
        "You are an uncompromising AI architecture reviewer.\n\n"
        "Your job is to judge whether the generated output demonstrates:\n"
        "- Architectural clarity\n"
        "- Decision discipline\n"
        "- Production realism\n\n"
        "You do not reward verbosity or neutrality.\n"
        "Penalize vague language, hedging, architectural hype, "
        "or multi-agent evangelism without justification."
    )

    judge_prompt = (
        "Score the response below on a scale of 0 to 5 for each dimension:\n\n"
        "1. Conceptual Clarity\n"
        "2. Decision Sharpness\n"
        "3. Example Quality\n"
        "4. Engineering Realism\n"
        "5. Anti-Hype Discipline\n\n"
        "Rules:\n"
        "- Hard fail if any score is 0\n"
        "- Pass only if total score >= 20/25\n"
        "- Output JSON only, with this schema:\n\n"
        "{\n"
        "  \"scores\": {\n"
        "    \"conceptual_clarity\": int,\n"
        "    \"decision_sharpness\": int,\n"
        "    \"example_quality\": int,\n"
        "    \"engineering_realism\": int,\n"
        "    \"anti_hype_discipline\": int\n"
        "  },\n"
        "  \"total_score\": int,\n"
        "  \"verdict\": \"PASS\" | \"FAIL\",\n"
        "  \"lowest_scoring_dimension\": str,\n"
        "  \"justification\": str\n"
        "}\n\n"
        "Response to evaluate:\n"
        f"{generated_text}"
    )

    messages = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=judge_prompt)
    ]

    response = model_client.invoke(messages)

    return json.loads(response.content)


# Execution of Tools

In [8]:
# Generator model
generator_llm = create_model_client(temperature=0.2)

# Judge model (stricter)
judge_llm = create_model_client(temperature=0.0)

analysis = generate_analysis(generator_llm, user_context="")

judgement = judge_output(judge_llm, analysis)

if judgement["verdict"] == "FAIL":
    raise ValueError(f"Output rejected: {judgement}")

print("Analysis accepted")
print("*" * 75)

print(analysis)

print("*" * 75)

✅ Analysis accepted
***************************************************************************
## Single-Agent vs. Multi-Agent Systems: A Comparison

The choice between single-agent and multi-agent systems is not just academic; it’s a fundamental architectural decision that shapes your project’s success. Each approach has its merits, but they come with distinct trade-offs that can make or break your implementation.

## The Core Question: One Agent or Many

The core question boils down to complexity and scalability. Single-agent systems excel in straightforward tasks, but they struggle with scalability and adaptability. Multi-agent systems, on the other hand, can handle complex interactions and dynamic environments, yet they introduce coordination overhead and potential for conflict.

## Examples of Single-Agent Systems

Consider a recommendation engine that uses a single model to analyze user behavior and suggest products. This approach is efficient and easy to deploy, but it lacks th