# AutoGen Advanced Patterns: Multi-Modal, Teams, and MCP Integration

This notebook explores advanced AutoGen capabilities:
1. **Multi-modal conversations**: Processing images alongside text
2. **Structured outputs**: Type-safe responses using Pydantic schemas
3. **LangChain tool integration**: Leveraging LangChain's rich ecosystem
4. **Team collaboration**: Multi-agent workflows with round-robin coordination
5. **MCP (Model Context Protocol)**: Anthropic's standard for tool integration

In [None]:
# Import dependencies
from io import BytesIO
import requests
from autogen_agentchat.messages import TextMessage, MultiModalMessage
from autogen_core import Image as AGImage
from PIL import Image
from dotenv import load_dotenv
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_agentchat.agents import AssistantAgent
from autogen_core import CancellationToken
from IPython.display import display, Markdown
from pydantic import BaseModel, Field
from typing import Literal
import textwrap

load_dotenv(override=True)

## Phase 1: Multi-Modal Image Analysis

In [None]:
# Load Image from URL
url = "https://edwarddonner.com/wp-content/uploads/2024/10/from-software-engineer-to-AI-DS.jpeg"
pil_image = Image.open(BytesIO(requests.get(url).content))
img = AGImage(pil_image)
img

In [None]:
# Create Multi-Modal Message
multi_modal_message = MultiModalMessage(
    content=["Describe the content of this image in detail", img], 
    source="User"
)

In [None]:
# Vision-Enabled Agent
model_client = OpenAIChatCompletionClient(model="gpt-4o-mini")

describer = AssistantAgent(
    name="image_analyst",
    model_client=model_client,
    system_message="You are skilled at analyzing images and providing detailed descriptions.",
)

response = await describer.on_messages([multi_modal_message], cancellation_token=CancellationToken())
display(Markdown(response.chat_message.content))

## Phase 2: Structured Outputs with Pydantic

In [None]:
# Define Output Schema
class ImageDescription(BaseModel):
    scene: str = Field(description="Brief description of the overall scene")
    message: str = Field(description="The point the image is trying to convey")
    style: str = Field(description="The artistic style of the image")
    orientation: Literal["portrait", "landscape", "square"] = Field(description="Image orientation")

In [None]:
# Agent with Structured Output
describer = AssistantAgent(
    name="structured_analyst",
    model_client=model_client,
    system_message="Analyze images and provide structured descriptions.",
    output_content_type=ImageDescription,
)

response = await describer.on_messages([multi_modal_message], cancellation_token=CancellationToken())
reply = response.chat_message.content

# Display Structured Output
print(f"Scene: {reply.scene}\n")
print(f"Message: {reply.message}\n")
print(f"Style: {reply.style}\n")
print(f"Orientation: {reply.orientation}")

## Phase 3: LangChain Tool Integration

In [None]:
# Import LangChain Tools
from autogen_ext.tools.langchain import LangChainToolAdapter
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_community.agent_toolkits import FileManagementToolkit
from langchain.agents import Tool

# Wrap LangChain Tools
serper = GoogleSerperAPIWrapper()
langchain_serper = Tool(
    name="internet_search", 
    func=serper.run, 
    description="Search the internet"
)
autogen_serper = LangChainToolAdapter(langchain_serper)

# File Management Tools
autogen_tools = [autogen_serper]
langchain_file_tools = FileManagementToolkit(root_dir="sandbox").get_tools()
for tool in langchain_file_tools:
    autogen_tools.append(LangChainToolAdapter(tool))

# Display Available Tools
for tool in autogen_tools:
    print(f"{tool.name}: {tool.description}")

In [None]:
# Task: Flight Search with File Writing
prompt = """Find a one-way non-stop flight from JFK to LHR in June 2025.
Search online for deals, write them to flights.md, then select the best option."""

agent = AssistantAgent(
    name="flight_researcher", 
    model_client=model_client, 
    tools=autogen_tools, 
    reflect_on_tool_use=True
)

message = TextMessage(content=prompt, source="user")
result = await agent.on_messages([message], cancellation_token=CancellationToken())

# Show Internal Workflow
for msg in result.inner_messages:
    print(f"[Tool Call] {msg.content}")

display(Markdown(result.chat_message.content))

## Phase 4: Multi-Agent Team Collaboration

In [None]:
# Team Setup
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.conditions import TextMentionTermination
from autogen_agentchat.teams import RoundRobinGroupChat

# Define Team Members
primary_agent = AssistantAgent(
    "researcher",
    model_client=model_client,
    tools=[autogen_serper],
    system_message="Research flight deals and incorporate feedback.",
)

evaluation_agent = AssistantAgent(
    "evaluator",
    model_client=model_client,
    system_message="Provide constructive feedback. Respond with 'APPROVE' when satisfied.",
)

# Termination Condition
text_termination = TextMentionTermination("APPROVE")

# Create Team
team = RoundRobinGroupChat(
    [primary_agent, evaluation_agent], 
    termination_condition=text_termination, 
    max_turns=20
)

In [None]:
# Execute Team Workflow
prompt = "Find a one-way non-stop flight from JFK to LHR in June 2025."
result = await team.run(task=prompt)

# Display Conversation
for message in result.messages:
    print(f"{message.source}:\n{message.content}\n\n")

## Phase 5: MCP (Model Context Protocol) Integration

In [None]:
# MCP Server Tools
from autogen_ext.tools.mcp import StdioServerParams, mcp_server_tools

# Configure MCP Fetch Server
fetch_mcp_server = StdioServerParams(
    command="uvx", 
    args=["mcp-server-fetch"], 
    read_timeout_seconds=30
)
fetcher = await mcp_server_tools(fetch_mcp_server)

# Agent with MCP Tools
agent = AssistantAgent(
    name="web_fetcher", 
    model_client=model_client, 
    tools=fetcher, 
    reflect_on_tool_use=True
)

# Execute Web Scraping Task
result = await agent.run(task="Review edwarddonner.com and summarize what you learn. Reply in Markdown.")
display(Markdown(result.messages[-1].content))