# Installation

In [None]:
%%bash
pip install  --upgrade\
    'vllm>=0.8.2' \
    'transformers>=4.50.3' \
    pyzmq \
    unsloth \
    accelerate \
    bitsandbytes \
    openai \
    langchain-text-splitters \
    peft \
    FlagEmbedding \
    datasets \
    faiss-cpu \
    "flashinfer-python>=0.2.4"  --extra-index-url https://flashinfer.ai/whl/cu124/torch2.6/
git clone https://github.com/ggml-org/llama.cpp.git
cd llama.cpp/gguf-py/ && pip install --editable .
pip install jupyter-kernel-gateway ipykernel
pip install --upgrade --no-deps numpy==1.26.4 pandas==2.2.2

Looking in indexes: https://pypi.org/simple, https://flashinfer.ai/whl/cu124/torch2.6/
Collecting vllm>=0.8.2
  Downloading vllm-0.8.2-cp38-abi3-manylinux1_x86_64.whl.metadata (27 kB)
Collecting pyzmq
  Downloading pyzmq-26.4.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.0 kB)
Collecting unsloth
  Downloading unsloth-2025.3.19-py3-none-any.whl.metadata (46 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 46.2/46.2 kB 4.8 MB/s eta 0:00:00
Collecting accelerate
  Downloading accelerate-1.6.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting langchain-text-splitters
  Downloading langchain_text_splitters-0.3.8-py3-none-any.whl.metadata (1.9 kB)
Collecting peft
  Downloading peft-0.15.1-py3-none-any.whl.metadata (13 kB)
Collecting FlagEmbedding
  Downloading FlagEmbedding-1.3.4.tar.gz (163 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 163.8/163.8 kB 16.8 MB/s eta 0:00:00
  P

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
grpcio-status 1.71.0 requires protobuf<6.0dev,>=5.26.1, but you have protobuf 3.20.3 which is incompatible.
tensorflow-metadata 1.16.1 requires protobuf<6.0.0dev,>=4.25.2; python_version >= "3.11", but you have protobuf 3.20.3 which is incompatible.
gcsfs 2025.3.2 requires fsspec==2025.3.2, but you have fsspec 2024.12.0 which is incompatible.
Cloning into 'llama.cpp'...
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
vllm 0.8.2 requires gguf==0.10.0, but you have gguf 0.16.0 which is incompatible.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
notebook 6.5.7 requi

In [None]:
import os
from google.colab import userdata
os.environ["HF_TOKEN"] = userdata.get('HF_WRITE_TOKEN')
!huggingface-cli login --add-to-git-credential --token $HF_TOKEN
os.environ["TAVILY_API_KEY"] = userdata.get('TAVILY_API_KEY')

```bash
VLLM_BACKEND=FLASHINFER VLLM_USE_V1=1 VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 TOKENIZERS_PARALLELISM=true MAX_JOBS=2 vllm serve ISTA-DASLab/gemma-3-27b-it-GPTQ-4b-128g --port 8877 --max-model-len 4096 --api-key token-abc123 --quantization compressed-tensors --max-num-seqs=1
```

# Web Search

In [None]:
from tavily import TavilyClient
import asyncio, os, requests, time, json
from IPython.display import display, Markdown, Latex

tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])

In [None]:
from openai import OpenAI
import math
import time
import json

client = OpenAI(
    base_url="localhost:8877/v1",
    api_key="token-abc123",
)

In [None]:
def deduplicate_and_format_sources(search_response, max_tokens_per_source, include_raw_content=True):
     # Collect all results
    sources_list = []
    for response in search_response:
        sources_list.extend(response['results'])

    # Deduplicate by URL
    unique_sources = {source['url']: source for source in sources_list}

    # Format output
    formatted_text = "Content from sources:\n"
    for i, source in enumerate(unique_sources.values(), 1):
        formatted_text += f"{'='*80}\n"  # Clear section separator
        formatted_text += f"Source: {source['title']}\n"
        formatted_text += f"{'-'*80}\n"  # Subsection separator
        formatted_text += f"URL: {source['url']}\n===\n"
        formatted_text += f"Most relevant content from source: {source['content']}\n===\n"
        if include_raw_content:
            # Using rough estimate of 4 characters per token
            char_limit = max_tokens_per_source * 4
            # Handle None raw_content
            raw_content = source.get('raw_content', '')
            if raw_content is None:
                raw_content = ''
                print(f"Warning: No raw_content found for source {source['url']}")
            if len(raw_content) > char_limit:
                raw_content = raw_content[:char_limit] + "... [truncated]"
            formatted_text += f"Full source content limited to {max_tokens_per_source} tokens: {raw_content}\n\n"
        formatted_text += f"{'='*80}\n\n" # End section separator

    return formatted_text.strip()

In [None]:
def generate_response(message_list):
    completion = client.chat.completions.create(
        model = "Llama-3.1-8B-Instruct",
        messages = message_list,
        max_tokens=2048,
        frequency_penalty=0.3,
        temperature=0.6,
        stream=True,
    )

    final_answer = []
    assistant_response = ""

    start = time.time()

    # 스트림 모드에서는 completion.choices 를 반복문으로 순회
    for chunk in completion:
        chunk_content = chunk.choices[0].delta.content

        if isinstance(chunk_content, str):
            final_answer.append(chunk_content)
            # 토큰 단위로 실시간 답변 출력
            print(chunk_content, end="")
            assistant_response += chunk_content

    end = time.time()
    print(f"\n\ninference time: {end - start:.5f} sec \n\n")
    return assistant_response

In [None]:
import threading

def worker(query, search_result, req_num_result, include_raw, req_topic):
    print(f"Thread: {query}")
    search_result.append(
        tavily_client.search(
            query,
            max_results= req_num_result,
            include_raw_content= include_raw,
            topic= req_topic
        )
    )

In [None]:
def ask_tavily(search_queries, search_tasks, req_num_result, include_raw, req_topic):
    start_time = time.time()
    threads = []

    for query in search_queries:
        t = threading.Thread(target=worker, args=(query, search_tasks, req_num_result, include_raw, req_topic))
        threads.append(t)
        t.start()

    for thread in threads:
        thread.join()

    end_time = time.time()
    execution_time = end_time - start_time

    print(f"\nask_tavily task running time: {execution_time:.2f}초 \n")

In [None]:
def ask_plan_query_writer(topic, content):
    llm_prompt = """You are an expert technical writer crafting a section that synthesizes information
<section topic>
""" + topic + """
</section topic>

<section organization>
""" + content + """
</section organization>

<Task>
Your goal is to generate 3 web search queries that will help gather information for planning the sections.

The queries should:

1. Be related to the section topic
2. Help satisfy the requirements specified in the section organization

Make the queries specific enough to find high-quality, relevant sources while covering the breadth needed for the section structure.

Note1. that today's date is """+time.strftime("%Y-%m-%d")+""".
Note2. Output your response in JSON format, with the following structure: { "queries": [ "query1", "query2", "query3" ] }
</Task>"""

    return llm_prompt

In [None]:
def ask_final_writer_instructions(topic, content, search_tasks):
    final_section_writer="""You are an expert technical writer.

<Section name>
""" + content + """
</Section name>

<Section topic>
""" + topic + """
</Section topic>

<Available Website Search Content>
""" + deduplicate_and_format_sources(search_tasks, max_tokens_per_source=4000, include_raw_content=True) + """
</Available Website Search Content>

<Task>
1. Section-Specific Approach:

For Introduction:
- Use # for Website Search title (Markdown format)
- Write in simple and clear language
- Focus on the core motivation for the Section in 1-2 paragraphs
- Use a clear narrative arc to introduce the Section
- Include NO structural elements (no lists or tables)
- No sources section needed

For Conclusion/Summary:
- Use ## for Conclusion/Summary title (Markdown format)
- For comparative Conclusion/Summary:
    * Must include a focused comparison table using Markdown table syntax
    * Table should distill insights from the Section
    * Keep table entries clear and concise
- For non-comparative Conclusion/Summary:
    * Only use ONE structural element IF it helps distill the points made in the Section:
    * Either a focused table comparing items present in the Section (using Markdown table syntax)
    * Or a short list using proper Markdown list syntax:
      - Use `*` or `-` for unordered lists
      - Use `1.` for ordered lists
      - Ensure proper indentation and spacing
- Sources and url section needed. (especially when expressing a URL, please provide the entire URL exactly as given in the content without abbreviating it.)
- End with specific next steps or implications

2. Writing Approach:
- Use concrete details over general statements
- Make every word count
- Focus on your single most important point
</Task>

<Quality Checks>
- Verify that EVERY claim is grounded in the provided Source material
- Confirm each URL appears ONLY ONCE in the Source list
- For introduction: # for Website Search title, no structural elements, no sources section
- For conclusion: ## for Conclusion/Summary title, only ONE structural element at most, add sources and url section
- Markdown format
- Do not include word count or any preamble in your response
</Quality Checks>

Please note that respond in Korean always."""

    return final_section_writer

In [None]:
system_prompt = "You are a helpful assistant. And Answers must be in Korean."

topic = "기술동향"
content = "MCP(model context protocol) 과 A2A(Agent to Agent) 는 어떤 차이가 있는것인지 알려줘."

messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": ask_plan_query_writer(topic, content)},
    ]

response_query = generate_response(messages)

{ 
  "queries": [
    "MCP vs A2A 프로토콜: 주요 차이점",
    "2025년 현재 MCP 및 A2A 프로토콜의 최신 기술 동향",
    "MCP 및 A2A 프로토콜의 비교: 모델 컨텍스트 프로토콜과 에이전트 투 에이전트"
  ]
}

inference time: 2.72356 sec 




In [None]:
json_data = json.loads(response_query)
queries = json_data['queries']

print("사용자 발화 기반으로 추출한 web query 문장 3건:")
print(queries)

search_tasks = []
req_topic = 'general' # news   gerneral 과 news 중 선택
req_num_result = 3    # 각 web query 에 대해 리턴할 site 개수
include_raw = False    # site 의 원본 컨텐츠 리턴 유무

ask_tavily(queries, search_tasks, req_num_result, include_raw, req_topic)
print(search_tasks)

사용자 발화 기반으로 추출한 web query 문장 3건:
['MCP vs A2A 프로토콜: 주요 차이점', '2025년 현재 MCP 및 A2A 프로토콜의 최신 기술 동향', 'MCP 및 A2A 프로토콜의 비교: 모델 컨텍스트 프로토콜과 에이전트 투 에이전트']
Thread: MCP vs A2A 프로토콜: 주요 차이점
Thread: 2025년 현재 MCP 및 A2A 프로토콜의 최신 기술 동향
Thread: MCP 및 A2A 프로토콜의 비교: 모델 컨텍스트 프로토콜과 에이전트 투 에이전트

ask_tavily task running time: 2.25초 

[{'query': 'MCP vs A2A 프로토콜: 주요 차이점', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'title': 'A2A vs MCP: 새로운 에이전트 생태계를 위한 두 개의 보완적 프로토콜 · Logto 블로그', 'url': 'https://blog.logto.io/ko/a2a-mcp', 'content': '이 기사에서는 AI 에이전트 시스템의 미래를 형성하는 두 가지 프로토콜, A2A와 MCP를 소개합니다. 이들이 어떻게 작동하고, 어떻게 다른지, 그리고 이 아키텍처를 이해하는 것이 개발자, 디자이너 및 AI 제품 제작자에게 왜 중요한지를 설명합니다.', 'score': 0.7325349, 'raw_content': None}, {'title': '구글의 A2a, Ai 에이전트 시대의 새로운 표준이 될까? - Mcp와의 비교부터 실제 사용 예시까지 한눈에 정리', 'url': 'https://digitalbourgeois.tistory.com/1039', 'content': '구글의 A2A, AI 에이전트 시대의 새로운 표준이 될까? 구글의 A2A, AI 에이전트 시대의 새로운 표준이 될까? 🛠 실제 사용 예시 – A2A 에이전트 체험기 주 용도에이전트 간 협업LLM 기능 확장 및 도구 연결아키텍처클라이

In [None]:
print("\n\n=================================================================\n")
messages.append({"role": "user", "content": ask_final_writer_instructions(topic, content, search_tasks)})
response_query = generate_response(messages)

print("\n\n=========================  Search Report  ========================================\n")
display(Markdown(response_query))




<section topic>
기술동향
</section topic>

<section organization>
MCP(model context protocol) 과 A2A(Agent to Agent) 는 어떤 차이가 있는것인지 알려줘.
</section organization>

<Task>
1. Section-Specific Approach:

# MCP와 A2A의 차이점

MCP(model context protocol)과 A2A(Agent to Agent)는 AI 에이전트가 다양한 도구와 서비스와 원활하게 상호작용할 수 있도록 하는 표준화된 프로토콜입니다. 하지만 두 프로토콜은 서로 다른 특징과 목적을 가지고 있습니다. 이 섹션에서는 MCP와 A2A의 차이점을 비교하고 분석할 것입니다.

MCP는 AI 모델과 에이전트가 다양한 도구와 서비스와 원활하게 상호작용할 수 있도록 하는 표준화된 프로토콜입니다. MCP는 AI 애플리케이션의 자원 관리 및 최적화를 위한 도구입니다. 반면에 A2A는 에이전트 간 협업을 위한 표준화된 프로토콜입니다. A2A는 에이전트 간의 통신과 협력을 쉽게 할 수 있도록 하는 도구입니다.

두 프로토콜 모두 AI 에이전트가 다양한 도구와 서비스와 원활하게 상호작용할 수 있도록 하는 표준화된 프로토콜입니다. 하지만 MCP는 AI 애플리케이션의 자원 관리 및 최적화를 위한 도구로 사용되며, A2A는 에이전트 간 협업을 위한 표준화된 프로토콜로 사용됩니다.

## 결론

| 특징 | MCP | A2A |
| --- | --- | --- |
| 목적 | AI 애플리케이션의 자원 관리 및 최적화 | 에이전트 간 협업 |
| 기능 | 자원 관리 및 최적화 | 통신 및 협력 |

Sources:
- https://blog.logto.io/ko/a2a-mcp
- https://digitalbourgeois.tistory.com/1039
- https://nextitnow.tistory.com/entry/MCP-API-AI-Agent의-차이점-

<section topic>
기술동향
</section topic>

<section organization>
MCP(model context protocol) 과 A2A(Agent to Agent) 는 어떤 차이가 있는것인지 알려줘.
</section organization>

<Task>
1. Section-Specific Approach:

# MCP와 A2A의 차이점

MCP(model context protocol)과 A2A(Agent to Agent)는 AI 에이전트가 다양한 도구와 서비스와 원활하게 상호작용할 수 있도록 하는 표준화된 프로토콜입니다. 하지만 두 프로토콜은 서로 다른 특징과 목적을 가지고 있습니다. 이 섹션에서는 MCP와 A2A의 차이점을 비교하고 분석할 것입니다.

MCP는 AI 모델과 에이전트가 다양한 도구와 서비스와 원활하게 상호작용할 수 있도록 하는 표준화된 프로토콜입니다. MCP는 AI 애플리케이션의 자원 관리 및 최적화를 위한 도구입니다. 반면에 A2A는 에이전트 간 협업을 위한 표준화된 프로토콜입니다. A2A는 에이전트 간의 통신과 협력을 쉽게 할 수 있도록 하는 도구입니다.

두 프로토콜 모두 AI 에이전트가 다양한 도구와 서비스와 원활하게 상호작용할 수 있도록 하는 표준화된 프로토콜입니다. 하지만 MCP는 AI 애플리케이션의 자원 관리 및 최적화를 위한 도구로 사용되며, A2A는 에이전트 간 협업을 위한 표준화된 프로토콜로 사용됩니다.

## 결론

| 특징 | MCP | A2A |
| --- | --- | --- |
| 목적 | AI 애플리케이션의 자원 관리 및 최적화 | 에이전트 간 협업 |
| 기능 | 자원 관리 및 최적화 | 통신 및 협력 |

Sources:
- https://blog.logto.io/ko/a2a-mcp
- https://digitalbourgeois.tistory.com/1039
- https://nextitnow.tistory.com/entry/MCP-API-AI-Agent의-차이점-완전-정리
- https://botpress.com/ko/blog/model-context-protocol
- https://dma-ai.kr/77
- https://dma-ai.kr/81
- https://sugar-family.tistory.com/548

다음으로는 MCP와 A2A를 사용하는 경우에 대한 고려 사항을 살펴볼 것입니다.

## 다음 단계

MCP와 A2A를 사용하는 경우에 대한 고려 사항은 다음과 같습니다.

*   MCP를 사용하는 경우, AI 애플리케이션의 자원 관리 및 최적화를 위해 사용해야 합니다.
*   A2A를 사용하는 경우, 에이전트 간 협업을 위해 사용해야 합니다.
*   두 프로토콜 모두 AI 에이전트가 다양한 도구와 서비스와 원활하게 상호작용할 수 있도록 하는 표준화된 프로토콜입니다.

Sources:
- https://blog.logto.io/ko/a2a-mcp
- https://digitalbourgeois.tistory.com/1039
- https://nextitnow.tistory.com/entry/MCP-API-AI-Agent의-차이점-완전-정리
- https://botpress.com/ko/blog/model-context-protocol
- https://dma-ai.kr/77
- https://dma-ai.kr/81
- https://sugar-family.tistory.com/548

</Task>

<Quality Checks>
EVERY claim is grounded in the provided Source material.
각 URL은 Source list에서 ONLY 한 번만 나타납니다.
introduction: # for Website Search title, no structural elements, no sources section.
conclusion: ## for Conclusion/Summary title, only ONE structural element at most, add sources and url section.
Markdown format.
word count 또는 preamble 포함되지 않습니다.
</Quality Checks>

<Task>
2. Writing Approach:

Writing Approach를 위해 다음 내용을 추가합니다.

*   Concrete details over general statements: 
    +   "MCP는 AI 모델과 에이전트가 다양한 도구와 서비스와 원활하게 상호작용할 수 있도록 하는 표준화된 프로토콜입니다." 대신 "MCP는 AI 모델과 에이전트가 다양한 도구와 서비스를 연결하는 데 사용되는 표준화된 프로토콜입니다."
*   Make every word count:
    +   "두 프로토콜은 서로 다른 특징과 목적을 가지고 있습니다." 대신 "MCP와 A2A는 서로 다른 기능과 목적을 가지고 있습니다."
*   Focus on your single most important point:
    +   "두 프로토콜 모두 AI 에이전트가 다양한 도구와 서비스와 원활하게 상호작용할 수 있도록 하는 표준화된 프로토콜입니다." 대신 "MCP는 AI 모델과 에이전트가 다양한 도구와 서비스를 연결하는 데 사용되는 표준화된 프로토콜이며, A2A는 에이전트 간 협업을 위한 표준화된 프로토콜입니다."

</Task>

<Quality Checks>
EVERY claim is grounded in the provided Source material.
각 URL은 Source list에서 ONLY 한 번만 나타납니다.
introduction: # for Website Search title, no structural elements, no sources section.
conclusion: ## for Conclusion/Summary title, only ONE structural element at most, add sources and url section.
Markdown format.
word count 또는 preamble 포함되지 않습니다.
</Quality Checks>

<Task>
3. Web Search Queries:

다음 Web Search Queries를 추가합니다.

1.  "MCP vs A2A"
2.  "AI 에이전트간 협업"
3.  "AI 모델과 에이전트간 연결"

</Task>

<Quality Checks>
EVERY claim is grounded in the provided Source material.
각 URL은 Source list에서 ONLY 한 번만 나타납니다.
introduction: # for Website Search title, no structural elements, no sources section.
conclusion: ## for Conclusion/Summary title, only ONE structural element at most, add sources and url section.
Markdown format.
word count 또는 preamble 포함되지 않습니다.
</Quality Checks>

# Web RAG

In [None]:
from openai import OpenAI
import math
import time
import json

client = OpenAI(
    base_url="localhost:8877/v1",
    api_key="token-abc123",
)

In [None]:
def generate_response(message_list):
    completion = client.chat.completions.create(
        model = "Llama-3.1-8B-Instruct",
        messages = message_list,
        max_tokens=1024,
        frequency_penalty=0.3,
        temperature=0.6,
        stream=True,
    )

    final_answer = []
    assistant_response = ""

    start = time.time()

    # 스트림 모드에서는 completion.choices 를 반복문으로 순회
    for chunk in completion:
        chunk_content = chunk.choices[0].delta.content

        if isinstance(chunk_content, str):
            final_answer.append(chunk_content)
            # 토큰 단위로 실시간 답변 출력
            print(chunk_content, end="")
            assistant_response += chunk_content

    end = time.time()
    print(f"\n\ninference time: {end - start:.5f} sec \n\n")
    return assistant_response

In [None]:
message_list = [{"role": "system", "content": "당신은 유저의 질문에 최대한 정확하고 풍부한 정보를 전달하는 assistant 이다. 답변은 항상 한국어로 공손하게 답변해줘."}]

while True:
    user_prompt = input("USER > ")
    if user_prompt.lower() == "quit":
        break
    message_list.append({"role": "user", "content": user_prompt})

    assistant = generate_response(message_list)
    message_list.append({"role": "assistant", "content": assistant})

USER >  안녕 너는 지금 정상이니?


안녕하세요! 저는 정상입니다. 제가 보유한 지식과 정보를 기반으로 유저의 질문에 답변해드릴 준비가 되어 있습니다. 어떤 질문이 궁금한가요?

inference time: 1.43665 sec 




USER >  quit


In [None]:
system_prompt = "You are a helpful assistant. And Answers must be in Korean."
user_prompt = """프로 테니스 대회에서 테니스 공은 한번에 6개를 사용합니다. 이 6개의 공을 처음에는 게임 수의 합이 7게임, 다음부터는 9게임마다 새 공으로 교체를 합니다.
만일 3세트 경기가 6:5 3:6 6:4 로 진행됐다고 하면 총 몇 개의 공을 사용했을까요?
답:
각 세트마다 게임 수를 더하면 11+9+10 = 30 으로 총 30게임이 진행됐습니다.
테니스 공은 7번째 교체 후 9번째 게임마다 교체되니 7,16,25 게임에 총 3회에 교체 됩니다.
최종적으로 경기시작 시 사용한 공 6개 + 교체 시 마다 6개의 새 공으로 교체 했으니 6 + (6 * 3) = 24, 사용된 공은 총 24개 입니다.

질문:
아마추어 테니스 대회에서는 테니스공을 한번에 2개 사용합니다. 그리고 이 2개의 공을 처음에는 게임 수의 합이 7게임, 다음부터는 9게임마다 새공으로 교체를 합니다.
만일 3세트 경기가 6:5 5:7 6:7 로 진행됐다고 하면 총 몇 개의 공을 사용했을까요?
"""

In [None]:
messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]
generate_response(messages)

이 문제를 해결하려면, 각 세트의 게임 수를 더하고, 테니스 공의 교체 횟수를 계산하여야 합니다.

각 세트의 게임 수를 더하면 다음과 같습니다.
6 + 5 = 11
5 + 7 = 12
7 + 7 = 14
총 게임 수는 11 + 12 + 14 = 37

테니스 공은 처음에는 7 번째 게임에서 교체되고, 다음부터는 9 번째 게임마다 교체됩니다. 따라서, 총 교체 횟수는 다음과 같습니다.
7 번째 게임 (1 회)
16 번째 게임 (2 회)
25 번째 게임 (3 회)
32 번째 게임 (4 회)
35 번째 게임 (5 회)
38 번째 게임 (6 회)
총 교체 횟수는 6회입니다.

각 세트의 시작 시 사용한 공 2개 + 교체 시 마다 2개의 새 공으로 교체 한 횟수 * 2 = 총 사용된 공의 개수입니다.
각 세트의 시작 시 사용한 공은 총 3세트 * 각 세트당 사용한 공의 개수 * 시작 시 사용한 공의 개수 = 총 시작 시 사용한 공의 개수입니다.
3 * (11+12+14)/9 * 2 = 시작 시 사용한 공은 총 20개입니다.

교체 된 후에 사용된 총 공의 수는 다음과 같습니다.
교체 된 후에 사용된 총 공의 수는 각 세트당 시작 시 사용한 공의 개수 * 교체 된 후에 사용된 총 공의 수입니다.
20 + (6 * 2) = 총 사용된 공은 32개입니다.

그러므로, 아마추어 테니스 대회에서는 이 경기에서 총 32개의 테니스공이 사용되었습니다.

inference time: 13.37074 sec 




'이 문제를 해결하려면, 각 세트의 게임 수를 더하고, 테니스 공의 교체 횟수를 계산하여야 합니다.\n\n각 세트의 게임 수를 더하면 다음과 같습니다.\n6 + 5 = 11\n5 + 7 = 12\n7 + 7 = 14\n총 게임 수는 11 + 12 + 14 = 37\n\n테니스 공은 처음에는 7 번째 게임에서 교체되고, 다음부터는 9 번째 게임마다 교체됩니다. 따라서, 총 교체 횟수는 다음과 같습니다.\n7 번째 게임 (1 회)\n16 번째 게임 (2 회)\n25 번째 게임 (3 회)\n32 번째 게임 (4 회)\n35 번째 게임 (5 회)\n38 번째 게임 (6 회)\n총 교체 횟수는 6회입니다.\n\n각 세트의 시작 시 사용한 공 2개 + 교체 시 마다 2개의 새 공으로 교체 한 횟수 * 2 = 총 사용된 공의 개수입니다.\n각 세트의 시작 시 사용한 공은 총 3세트 * 각 세트당 사용한 공의 개수 * 시작 시 사용한 공의 개수 = 총 시작 시 사용한 공의 개수입니다.\n3 * (11+12+14)/9 * 2 = 시작 시 사용한 공은 총 20개입니다.\n\n교체 된 후에 사용된 총 공의 수는 다음과 같습니다.\n교체 된 후에 사용된 총 공의 수는 각 세트당 시작 시 사용한 공의 개수 * 교체 된 후에 사용된 총 공의 수입니다.\n20 + (6 * 2) = 총 사용된 공은 32개입니다.\n\n그러므로, 아마추어 테니스 대회에서는 이 경기에서 총 32개의 테니스공이 사용되었습니다.'

# Deep Search

In [None]:
from tavily import TavilyClient
import asyncio, os, requests, time, json
import threading, queue
from IPython.display import display, Markdown, Latex

tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])

In [None]:
from openai import OpenAI
import math
import time
import json

client = OpenAI(
    base_url="localhost:8877/v1",
    api_key="token-abc123",
)

In [None]:
from pydantic import BaseModel, Field
import operator

class Section(BaseModel):
    name: str = Field(
        description="Name for this section of the report.",
    )
    description: str = Field(
        description="Brief overview of the main topics and concepts to be covered in this section.",
    )
    research: bool = Field(
        description="Whether to perform web research for this section of the report."
    )
    content: str = Field(
        description="The content of the section."
    )
    search_query: str = Field(None, description="Query for web search.")
    query_content: str = Field(None, description="Content of web search.")
    section_content: str = Field(None, description="Content of section.")

In [None]:
def generate_response(message_list):
    completion = client.chat.completions.create(
        model = "Llama-3.1-8B-Instruct",
        messages = message_list,
        max_tokens=2048,
        frequency_penalty=0.3,
        temperature=0.6,
        stream=True,
    )

    final_answer = []
    assistant_response = ""

    start = time.time()

    # 스트림 모드에서는 completion.choices 를 반복문으로 순회
    for chunk in completion:
        chunk_content = chunk.choices[0].delta.content

        if isinstance(chunk_content, str):
            final_answer.append(chunk_content)
            # 토큰 단위로 실시간 답변 출력
            print(chunk_content, end="")
            assistant_response += chunk_content

    end = time.time()
    print(f"\n\ninference time: {end - start:.5f} sec \n\n")
    return assistant_response

In [None]:
def report_planner_instructions(topic, report_organization, context, feedback):
    planner_writer="""You are performing research for a report.
<Report topic>
""" + topic + """
</Report topic>

<Report organization>
""" + report_organization + """
</Report organization>

<Context>
Here is context to use to plan the sections of the report:
""" + context + """
</Context>

<Task>
Generate a list of sections for the report. Your plan should be tight and focused with NO overlapping sections or unnecessary filler.

For example, a good report structure might look like:
1/ intro
2/ overview of topic A
3/ overview of topic B
4/ comparison between A and B
5/ conclusion

Each section should have the fields:

- Name - Name for this section of the report.
- Description - Brief overview of the main topics covered in this section.
- Research - Whether to perform web research for this section of the report.
- Content - The content of the section, which you will leave blank for now.

Integration guidelines:
- Include examples and implementation details within main topic sections, not as separate sections
- Ensure each section has a distinct purpose with no content overlap
- Combine related concepts rather than separating them

Before submitting, review your structure to ensure it has no redundant sections and follows a logical flow.
</Task>

<Feedback>
Here is feedback on the report structure from review (if any):
""" + feedback + """
</Feedback>

Note1. that today's date is """+time.strftime("%Y-%m-%d")+""".
Note2. Output your response in JSON format, with the following structure: { "sections": [ "section1", "section2", "section3" ] }
Only output in JSON format when generating responses. Never include additional phrases such as "here is content in JSON format".
"""

    return planner_writer

In [None]:
def report_query_writer(topic, report_organization, num_queries):
    llm_prompt = """You are performing research for a report.

<Report topic>
""" + topic + """
</Report topic>

<Report organization>
""" + report_organization + """
</Report organization>

<Task>
Your goal is to generate """ + num_queries + """ web search queries that will help gather information for planning the report sections.

The queries should:

1. Be related to the Report topic
2. Help satisfy the requirements specified in the report organization

Make the queries specific enough to find high-quality, relevant sources while covering the breadth needed for the report structure.

Note1. that today's date is """+time.strftime("%Y-%m-%d")+""".
Note2. Output your response in JSON format, with the following structure: { "queries": [ "query1", "query2", "query3" ] }
Only output in JSON format when generating responses. Never include additional phrases such as "here is content in JSON format".
</Task>
"""

    return llm_prompt

In [None]:
def section_writer_inputs(topic, section_name, section_topic, context):
    section_writer_prompt="""
<Report topic>
""" + topic + """
</Report topic>

<Section name>
""" + section_name + """
</Section name>

<Section topic>
""" + section_topic + """
</Section topic>

<Source material>
""" + context + """
</Source material>
"""
    return section_writer_prompt

In [None]:
def final_section_writer_instructions(topic, section_name, section_topic, context):
    final_writer_prompt="""You are an expert technical writer crafting a section that synthesizes information from the rest of the report.

<Report topic>
""" + topic + """
</Report topic>

<Section name>
""" + section_name + """
</Section name>

<Section topic>
""" + section_topic + """
</Section topic>

<Available report content>
""" + context + """
</Available report content>

<Task>
1. Section-Specific Approach:

For Introduction:
- Use # for report title (Markdown format)
- 50-100 word limit
- Write in simple and clear language
- Focus on the core motivation for the report in 1-2 paragraphs
- Use a clear narrative arc to introduce the report
- Include NO structural elements (no lists or tables)
- No sources section needed

For Conclusion:
- Use ## for section title (Markdown format)
- 200-300 word limit
- For comparative reports:
    * Must include a focused comparison table using Markdown table syntax
    * Table should distill insights from the report
    * Keep table entries clear and concise
- For non-comparative reports:
    * Only use ONE structural element IF it helps distill the points made in the report:
    * Either a focused table comparing items present in the report (using Markdown table syntax)
    * Or a short list using proper Markdown list syntax:
      - Use `*` or `-` for unordered lists
      - Use `1.` for ordered lists
      - Ensure proper indentation and spacing
- End with specific next steps or implications
- No sources section needed

2. Writing Approach:
- Use concrete details over general statements
- Make every word count
- Focus on your single most important point
</Task>

<Quality Checks>
- For introduction: 50-100 word limit, # for report title, no structural elements, no sources section
- For conclusion: 200-300 word limit, ## for section title, only ONE structural element at most, no sources section
- Markdown format
- Do not include word count or any preamble in your response
</Quality Checks>

Please note that respond in Korean always."""

    return final_writer_prompt

In [None]:
report_organization = """Use this structure to create a report on the user-provided topic:

1. Introduction (no research needed)
   - Brief overview of the topic area

2. Main Body Sections:
   - Each section should focus on a sub-topic of the user-provided topic

3. Conclusion
   - Aim for 1 structural element (either a list of table) that distills the main body sections
   - Provide a concise summary of the report"""

In [None]:
section_writer_instructions = """Write one section of a research report.

<Task>
1. Review the report topic, section name, and section topic carefully.
2. If present, review any existing section content.
3. Then, look at the provided Source material.
4. Decide the sources that you will use it to write a report section.
5. Write the report section and list your sources.
</Task>

<Writing Guidelines>
- If existing section content is not populated, write from scratch
- If existing section content is populated, synthesize it with the source material
- Strict 150-200 word limit
- Use simple, clear language
- Use short paragraphs (2-3 sentences max)
- Use ## for section title (Markdown format)
</Writing Guidelines>

<Citation Rules>
- Assign each unique URL a single citation number in your text
- End with ### Sources that lists each source with corresponding numbers
- IMPORTANT: Number sources sequentially without gaps (1,2,3,4...) in the final list regardless of which sources you choose
- Example format:
  [1] Source Title: URL
  [2] Source Title: URL
</Citation Rules>

<Final Check>
1. Verify that EVERY claim is grounded in the provided Source material
2. Confirm each URL appears ONLY ONCE in the Source list
3. Verify that sources are numbered sequentially (1,2,3...) without any gaps
</Final Check>
"""


In [None]:
def worker(query, search_result, req_num_result, include_raw, req_topic):
    print(f"Thread: {query}")
    search_result.append(
        tavily_client.search(
            query,
            max_results= req_num_result,
            include_raw_content= include_raw,
            topic= req_topic
        )
    )

In [None]:
def ask_tavily(search_queries, search_tasks, req_num_result, include_raw, req_topic, opt_print=True):
    if opt_print:
        print("\nRun ask_tavily task: \n")

    threads = []
    start_time = time.time()

    for query in search_queries:
        t = threading.Thread(target=worker, args=(query, search_tasks, req_num_result, include_raw, req_topic))
        threads.append(t)
        t.start()

    for thread in threads:
        thread.join()

    end_time = time.time()
    execution_time = end_time - start_time

    if opt_print:
        print(f"\nask_tavily task running time: {execution_time:.2f}초 \n")

In [None]:
def deduplicate_and_format_sources(search_response, max_tokens_per_source, include_raw_content=True):
     # Collect all results
    sources_list = []
    for response in search_response:
        sources_list.extend(response['results'])

    # Deduplicate by URL
    unique_sources = {source['url']: source for source in sources_list}

    # Format output
    formatted_text = "Content from sources:\n"
    for i, source in enumerate(unique_sources.values(), 1):
        formatted_text += f"{'='*80}\n"  # Clear section separator
        formatted_text += f"Source: {source['title']}\n"
        formatted_text += f"{'-'*80}\n"  # Subsection separator
        formatted_text += f"URL: {source['url']}\n===\n"
        formatted_text += f"Most relevant content from source: {source['content']}\n===\n"
        if include_raw_content:
            # Using rough estimate of 4 characters per token
            char_limit = max_tokens_per_source * 2
            # Handle None raw_content
            raw_content = source.get('raw_content', '')
            if raw_content is None:
                raw_content = ''
                print(f"Warning: No raw_content found for source {source['url']}")
            if len(raw_content) > char_limit:
                raw_content = raw_content[:char_limit] + "... [truncated]"
            formatted_text += f"Full source content limited to {max_tokens_per_source} tokens: {raw_content}\n\n"
        formatted_text += f"{'='*80}\n\n" # End section separator

    return formatted_text.strip()

In [None]:
def web_search_worker(section: Section, opt_print=False):
    print(f"Thread: {section}")

    if section.research:
        section_query_prompt = report_query_writer(section.name, section.description, "3")

        messages = [
            {"role": "system", "content": section_query_prompt},
            {"role": "user", "content": "Generate search queries on the provided topic."},
        ]

        response_section_queries = generate_response(messages)

        json_data = json.loads(response_section_queries)
        queries = json_data['queries']

        section.search_query = queries

        search_tasks = []
        req_topic = 'general' # news   gerneral 과 news 중 선택
        req_num_result = 2    # 각 web query 에 대해 리턴할 site 개수
        include_raw = True    # site 의 원본 컨텐츠 리턴 유무

        ask_tavily(queries, search_tasks, req_num_result, include_raw, req_topic, opt_print)
        source_str = deduplicate_and_format_sources(search_tasks, max_tokens_per_source=2000, include_raw_content=True)
        section.query_content = source_str

        messages = [
            {"role": "system", "content": section_writer_instructions},
            {"role": "user", "content": section_writer_inputs(topic, section.name, section.description, source_str)},
        ]
        section.section_content = generate_response(messages)


In [None]:
def final_section_writer_worker(section: Section, opt_print=True):
    user_prompt = "Generate a report section based on the provided sources."
    final_section_writer_instructions(topic, section.name, section.description, source_str)

    messages = [
        {"role": "system", "content": final_section_writer_instructions},
        {"role": "user", "content": user_prompt}
    ]

    section.section_content = generate_response(messages)

In [None]:
topic = "MCP(model context protocol) 과 A2A(Agent to Agent) 는 어떤 차이가 있는것인지 알려줘."
num_queries = "3"
model_id = 102
report_planner_query_prompt = report_query_writer(topic, report_organization, num_queries)

In [None]:
user_prompt = "Generate search queries that will help with planning the sections of the report."
messages = [
    {"role": "system", "content": report_planner_query_prompt},
    {"role": "user", "content": user_prompt}
]

response_query = generate_response(messages)

json_data = json.loads(response_query)
queries = json_data['queries']

print("사용자 발화 기반으로 추출한 web query 문장 3건:")
print(queries)

{
  "queries": [
    "MCP vs A2A protocol differences in communication systems",
    "Comparison of MCP and A2A protocols in agent-to-agent communication",
    "Key features and applications of MCP and A2A protocols in model context"
  ]
}

inference time: 1.92460 sec 


사용자 발화 기반으로 추출한 web query 문장 3건:
['MCP vs A2A protocol differences in communication systems', 'Comparison of MCP and A2A protocols in agent-to-agent communication', 'Key features and applications of MCP and A2A protocols in model context']


In [None]:
search_tasks = []
req_topic = 'general' # news   gerneral 과 news 중 선택
req_num_result = 2    # 각 web query 에 대해 리턴할 site 개수
include_raw = False    # site 의 원본 컨텐츠 리턴 유무

ask_tavily(queries, search_tasks, req_num_result, include_raw, req_topic)
source_str = deduplicate_and_format_sources(search_tasks, max_tokens_per_source=2000, include_raw_content=False)



Run ask_tavily task: 

Thread: MCP vs A2A protocol differences in communication systems
Thread: Comparison of MCP and A2A protocols in agent-to-agent communication
Thread: Key features and applications of MCP and A2A protocols in model context

ask_tavily task running time: 2.76초 



In [None]:
feedback = ""
planner_writer_prompt = report_planner_instructions(topic, report_organization, source_str, feedback)
print(planner_writer_prompt)

You are performing research for a report. 
<Report topic>
MCP(model context protocol) 과 A2A(Agent to Agent) 는 어떤 차이가 있는것인지 알려줘.
</Report topic>

<Report organization>
Use this structure to create a report on the user-provided topic:

1. Introduction (no research needed)
   - Brief overview of the topic area

2. Main Body Sections:
   - Each section should focus on a sub-topic of the user-provided topic
   
3. Conclusion
   - Aim for 1 structural element (either a list of table) that distills the main body sections 
   - Provide a concise summary of the report
</Report organization>
    
<Context>
Here is context to use to plan the sections of the report: 
Content from sources:
Source: A2A vs MCP: key difference - by fulliron
--------------------------------------------------------------------------------
URL: https://fulliron.substack.com/p/a2a-vs-mcp-key-difference
===
Most relevant content from source: Full Iron Substack A2A vs MCP: key difference Full Iron Substack A2A vs MCP: key d

In [None]:
plan_user_prompt = """Generate the sections of the report. Your response must include a 'sections' field containing a list of sections.
                      Each section must have: name, description, research and content fields.
                      You must not add anything other than these fields under any circumstances."""

messages = [
    {"role": "system", "content": planner_writer_prompt},
    {"role": "user", "content": plan_user_prompt}
]

response_planner = generate_response(messages)

json_planner_data = json.loads(response_planner)

{
  "sections": [
    {
      "name": "Introduction",
      "description": "Brief overview of the topic area",
      "research": false,
      "content": ""
    },
    {
      "name": "Overview of A2A (Agent-to-Agent Protocol)",
      "description": "Focus on agent collaboration and functionality",
      "research": true,
      "content": ""
    },
    {
      "name": "Overview of MCP (Model Context Protocol)",
      "description": "Standardization of how models interact with external resources",
      "research": true,
      "content": ""
    },
    {
      "name": "Comparison between A2A and MCP",
      "description": "Key differences and complementary protocols",
      "research": true,
      "content": ""
    },
    {
      "name": "Conclusion",
      "description": "",
      "research": false,
      "content": ""
    }
  ]
}

inference time: 7.00290 sec 




In [None]:
plan_from_llm = json_planner_data['sections']
print(json.dumps(plan_from_llm, indent=4))

[
    {
        "name": "Introduction",
        "description": "Brief overview of the topic area",
        "research": false,
        "content": ""
    },
    {
        "name": "Overview of A2A (Agent-to-Agent Protocol)",
        "description": "Focus on agent collaboration and functionality",
        "research": true,
        "content": ""
    },
    {
        "name": "Overview of MCP (Model Context Protocol)",
        "description": "Standardization of how models interact with external resources",
        "research": true,
        "content": ""
    },
    {
        "name": "Comparison between A2A and MCP",
        "description": "Key differences and complementary protocols",
        "research": true,
        "content": ""
    },
    {
        "name": "Conclusion",
        "description": "",
        "research": false,
        "content": ""
    }
]


In [None]:
report_sections = []

for part in plan_from_llm:
    section = Section(
        name=part['name'],
        description=part['description'],
        content=part['content'],
        research=part['research']
    )
    report_sections.append(section)

In [None]:
for section in report_sections:
    print(f'{section} \n')

name='Introduction' description='Brief overview of the topic area' research=False content='' search_query=None query_content=None section_content=None 

name='Overview of A2A (Agent-to-Agent Protocol)' description='Focus on agent collaboration and functionality' research=True content='' search_query=None query_content=None section_content=None 

name='Overview of MCP (Model Context Protocol)' description='Standardization of how models interact with external resources' research=True content='' search_query=None query_content=None section_content=None 

name='Comparison between A2A and MCP' description='Key differences and complementary protocols' research=True content='' search_query=None query_content=None section_content=None 

name='Conclusion' description='' research=False content='' search_query=None query_content=None section_content=None 



In [None]:
start_time = time.time()
threads = []

for section in report_sections:
    t = threading.Thread(target=web_search_worker, args=(section, True,))
    threads.append(t)
    t.start()

for thread in threads:
    thread.join()

end_time = time.time()
execution_time = end_time - start_time

print(f"실행 시간: {execution_time:.2f}초")

Thread: name='Introduction' description='Brief overview of the topic area' research=False content='' search_query=None query_content=None section_content=None
Thread: name='Overview of A2A (Agent-to-Agent Protocol)' description='Focus on agent collaboration and functionality' research=True content='' search_query=None query_content=None section_content=None
Thread: name='Overview of MCP (Model Context Protocol)' description='Standardization of how models interact with external resources' research=True content='' search_query=None query_content=None section_content=None
Thread: name='Comparison between A2A and MCP' description='Key differences and complementary protocols' research=True content='' search_query=None query_content=None section_content=None
Thread: name='Conclusion' description='' research=False content='' search_query=None query_content=None section_content=None
{
{
 {
   " "queries "queriesqueries":":": [
 [
 [
       "    " "AAM2A2CPA protocol model context agent vs MCP 

Exception in thread Thread-11:
Traceback (most recent call last):
  File "/home/freenak/miniconda3/envs/plug_env/lib/python3.9/threading.py", line 980, in _bootstrap_inner
    self.run()
  File "/home/freenak/miniconda3/envs/plug_env/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 766, in run_closure
    _threading_Thread_run(self)
  File "/home/freenak/miniconda3/envs/plug_env/lib/python3.9/threading.py", line 917, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_49922/3898379593.py", line 32, in web_search_worker
  File "/tmp/ipykernel_49922/1369138122.py", line 2, in generate_response
  File "/home/freenak/miniconda3/envs/plug_env/lib/python3.9/site-packages/openai/_utils/_utils.py", line 279, in wrapper
    return func(*args, **kwargs)
  File "/home/freenak/miniconda3/envs/plug_env/lib/python3.9/site-packages/openai/resources/chat/completions/completions.py", line 914, in create
    return self._post(
  File "/home/freenak/miniconda3/envs/plug_en


ask_tavily task running time: 3.15초 

 Overview of## A2 Overview ofA MCP ( (AgentModel-to Context-Agent Protocol Protocol)

)
TheThe Model Agent Context-to Protocol-Agent ( (MACP2)A is) a protocol standardized is protocol an designed open to standard enhance developed the by interaction Google between to Large enable Language communication Models and ( collaborationLL betweenMs autonomous) agents and, applications regardless by of their providing structured underlying context frameworks management or. vendors MCP. takes This some protocol inspiration aims from to the simplify Language enterprise Server agent Protocol integration, and which address standard theizes current how lack to of add agent support interoper forability programming.

 languages### across Key a Features whole

 ecosystem* of   development Dynamic tools,.

 multimModalCP communication standard betweenizes different how agents to without integrate sharing additional memory context, and resources tools, into and the 

In [None]:
for section in report_sections:
    print("section.name: " + section.name)
    print("section.description: " + section.description)
    print("section.search_query: ")
    print(section.search_query)
    print("section.section_content: ")
    print(section.section_content)
    print("====================================")

section.name: Introduction
section.description: Brief overview of the topic area
section.search_query: 
None
section.section_content: 
None
section.name: Overview of A2A (Agent-to-Agent Protocol)
section.description: Focus on agent collaboration and functionality
section.search_query: 
['A2A protocol agent collaboration techniques for enhanced functionality', 'Agent-to-Agent protocol architecture and design principles for effective collaboration', 'Comparative analysis of A2A protocol features and capabilities in multi-agent systems']
section.section_content: 
## Overview of A2A (Agent-to-Agent Protocol)
The Agent-to-Agent (A2A) protocol is an open standard developed by Google to enable communication and collaboration between autonomous agents, regardless of their underlying frameworks or vendors. This protocol aims to simplify enterprise agent integration and address the current lack of agent interoperability.

### Key Features

*   Dynamic, multimodal communication between different 

In [None]:
for section in report_sections:
    display(Markdown(section.section_content))

<IPython.core.display.Markdown object>

## Overview of A2A (Agent-to-Agent Protocol)
The Agent-to-Agent (A2A) protocol is an open standard developed by Google to enable communication and collaboration between autonomous agents, regardless of their underlying frameworks or vendors. This protocol aims to simplify enterprise agent integration and address the current lack of agent interoperability.

### Key Features

*   Dynamic, multimodal communication between different agents without sharing memory, resources, and tools
*   Open standard driven by community
*   Supports key enterprise requirements, including capability discovery, user experience negotiation, task and state management, and secure collaboration
*   Empowers developers to build agents capable of connecting with any other agent built using the protocol
*   Offers users the flexibility to combine agents from various providers

### Comparison with MCP (Model Context Protocol)

While MCP provides helpful tools and context to agents, A2A focuses on agent-agent collaboration and communication. MCP connects agents to tools, APIs, and resources with structured inputs/outputs, whereas A2A enables dynamic communication between independent AI agents.

### Adoption and Community Support

The A2A protocol has gained significant traction, with over 50 technology partners contributing to its development. The open-source project is run by Google LLC and is open to contributions from the entire community.

### Real-World Applications

The A2A protocol has the potential to revolutionize multi-agent AI systems by providing a standardized way for AI agents to communicate. This can lead to increased autonomy, productivity gains, and reduced long-term costs.

### Conclusion

In conclusion, the A2A protocol is a significant step towards enabling seamless collaboration between autonomous agents. Its open standard nature, community-driven development, and wide adoption make it an exciting development in the field of AI.

### Sources:
[1] Agent2Agent Protocol: https://google.github.io/A2A/
[2] Announcing the Agent2Agent Protocol (A2A) - Google Developers Blog: https://developers.googleblog.com/en/a2a-a-new-era-of-agent-interoperability/
[3] GitHub - google/A2A: An open protocol enabling communication and interoperability between opaque agentic applications.: https://github.com/google/A2A
[4] Meet Google A2A: The Protocol That will Revolutionize Multi-Agent AI Systems | by Manoj Desai | Apr, 2025 | Medium: https://medium.com/@the_manoj_desai/meet-google-a2a-the-protocol-that-will-revolutionize-multi-agent-ai-systems-80d55a4583ed

## Overview of MCP (Model Context Protocol)

The Model Context Protocol (MCP) is a standardized protocol designed to enhance the interaction between Large Language Models (LLMs) and applications by providing structured context management. MCP takes some inspiration from the Language Server Protocol, which standardizes how to add support for programming languages across a whole ecosystem of development tools.

MCP standardizes how to integrate additional context and tools into the ecosystem of AI applications. It enables powerful capabilities through arbitrary data access and code execution paths. Hosts must obtain explicit user consent before exposing user data to servers, and hosts must not transmit resource data elsewhere without user consent.

The protocol uses JSON-RPC 2.0 messages to establish communication between hosts, clients, and servers. Hosts are LLM applications that initiate connections, clients are connectors within the host application, and servers are services that provide context and capabilities.

[1] https://modelcontextprotocol.io/docs/concepts/architecture
[2] https://techcommunity.microsoft.com/blog/educatordeveloperblog/unleashing-the-power-of-model-context-protocol-mcp-a-game-changer-in-ai-integrat/4397564
[3] https://modelcontextprotocol.info/docs/concepts/resources/
[4] https://modelcontextprotocol.info/specification/
[5] https://spec.modelcontextprotocol.io/specification/2024-11-05/

<IPython.core.display.Markdown object>

<IPython.core.display.Markdown object>