# Selector Group Chat

SelectorGroupChat is a group chat similar to RoundRobinGroupChat, but with a model-based next speaker selection mechanism. When the team receives a task through run() or run_stream(), the following steps are executed:
1. The team analyzes the current conversation context, including the conversation history and participants’ name and description attributes, to determine the next speaker using a model. You can override the model by providing a custom selection function.
2. The team prompts the selected speaker agent to provide a response, which is then broadcasted to all other participants.
3. The termination condition is checked to determine if the conversation should end, if not, the process repeats from step 1.
4. When the conversation ends, the team returns the TaskResult containing the conversation history from this task.

Once the team finishes the task, the conversation context is kept within the team and all participants, so the next task can continue from the previous conversation context. You can reset the conversation context by calling reset().

## Load Azure Configurations

In [1]:
from dotenv import load_dotenv
import os

azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_openai_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME")
azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION")

## Create Azure OpenAI Client

In [2]:
from autogen_ext.models.openai import AzureOpenAIChatCompletionClient
from azure.identity import DefaultAzureCredential, get_bearer_token_provider

# Create the token provider
#token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")

azure_model_client = AzureOpenAIChatCompletionClient(
    azure_deployment=azure_openai_deployment,
    model=azure_openai_deployment,
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
    # azure_ad_token_provider=token_provider,  # Optional if you choose key-based authentication.
    api_key=azure_openai_key, # For key-based authentication.
)

## Creating the Agents

In [3]:
import arxiv
import json
import os

from langchain_community.document_loaders import PyPDFLoader
from autogen_agentchat.agents import AssistantAgent, UserProxyAgent
from autogen_agentchat.conditions import SourceMatchTermination, TextMentionTermination
from autogen_agentchat.teams import SelectorGroupChat
from autogen_agentchat.ui import Console
from autogen_core import CancellationToken

PAPER_DIR = "../ArxivResearcher/sgc_papers"

# Create the user proxy agent.
user_proxy_agent = UserProxyAgent("user_proxy", input_func=input)  # Use input() to get user input from console.

# Use arxiv to find the papers 
def search_arxiv(topic: str, max_results: int = 5):
    client = arxiv.Client()

    # Search for the latest articles matching the queried topic
    search = arxiv.Search(
        query=topic,
        max_results=max_results,
        sort_by=arxiv.SortCriterion.SubmittedDate,
        sort_order=arxiv.SortOrder.Descending
    )

    papers = client.results(search)
    # Create directory for this topic
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)
    
    file_path = os.path.join(path, "papers_info.json")

    # Try to load existing papers info
    try:
        with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
            return file_path  # If file exists, return the path without updating
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}

    paper_ids = []
    for paper in papers:
        paper_id = paper.entry_id.split('/')[-1] # Use the last part of the entry_id as a unique identifier
        paper_ids.append(paper_id)
        paper_info = {
            'title': paper.title,
            'authors': [author.name for author in paper.authors],
            'summary': paper.summary,
            'pdf_url': paper.pdf_url,
            'published': str(paper.published.date())
        }
        papers_info[paper_id] = paper_info

    # Save updated papers_info to json file
    with open(file_path, "w") as json_file:
        json_file.write(json.dumps(papers_info, indent=2))
    print(f"Papers metadata saved to {file_path}")

    return file_path

# Create the arxiv search agent.
arxiv_search_agent = AssistantAgent(
    name="arxiv_search",
    description="An agent that can retrieve research papers related to user input on arXiv \
        and save the metadata of the papers in a JSON file.",
    model_client=azure_model_client,
    tools=[search_arxiv],
    system_message="You are an expert academic search assistant. \
        Your task is to find the most relevant and recent papers on arXiv \
        for given user input, henceforth called topic, and stores them in JSON file 'papers_info.json'."
)
# Create the content extractor agent that downloads the PDFs and extracts the content.
def content_extractor(file_path: str):

    with open(file_path, "r") as json_file:
        papers_info = json.load(json_file)

    papers_contents = {}
    # Iterate through each paper's info and download the PDF
    for paper_id, info in papers_info.items():
        pdf_path = os.path.join(os.path.dirname(file_path), f"{paper_id}.pdf")
        
        # Download the PDF if it doesn't exist
        if not os.path.exists(pdf_path):
            paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
            paper.download_pdf(filename=pdf_path)

        # Load the PDF and extract text
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()

        # Extract the raw text from the document
        if documents:
            papers_contents[paper_id] = documents[0].page_content[:5000]  # Limit to first 5000 characters for brevity
        else:
            papers_contents[paper_id] = "No content found."

    # Save the extracted contents to a JSON file
    contents_file_path = os.path.join(os.path.dirname(file_path), "papers_contents.json")
    with open(contents_file_path, "w") as contents_file:
        json.dump(papers_contents, contents_file, indent=2)

    return contents_file_path

# Create the Content Extractor agent.
info_extractor_agent = AssistantAgent(
    name="content_extractor",
    description="An agent that extracts raw text from all the downloaded arxiv research papers \
    obtained from the arxiv search agent.",
    model_client=azure_model_client,
    tools=[content_extractor],
    system_message="You are a research papers content extractor. \
        Read paper_id from papers_info.json. Download PDFs and extract the raw text content from each paper. \
        Ignore the papers whose content could not be extracted.\
        The extracted contents should be saved in a JSON file named 'papers_contents.json'.\
        Return the path to the JSON file."
)

# Function to generate full text from the extracted papers contents.
def generate_full_text(content_file_path: str):
    """Combine the raw texts of the papers from the JSON file."""
    with open(content_file_path, "r") as json_file:
        papers_contents = json.load(json_file)
    full_text = "\n".join(papers_contents.values())
    print(f"Read {len(papers_contents)} papers contents from {content_file_path}.")
    print(f"Total characters in papers contents: {len(full_text)}")
    
    return full_text

# Create the Research Summarizer agent.
research_summarizer_agent = AssistantAgent(
    name="research_summarizer",
    description="An agent that writes a single coherent summary on a topic from the top 5 research papers on arXiv.",
    model_client=azure_model_client,
    tools=[generate_full_text], 
    system_message="You are an expert academic researcher. \
        Your task is to write a single coherent summary relevant to the given topic in under 5000 words. \
        To do this, first generate full text from the contents of all papers by reading \
        the JSON file 'papers_contents.json' located in the topic's subdirectory inside the project directory. \
        The file contains the paper_ids and raw text content from each paper. \
        Focus on common themes and significant advancements. \
        Do not just summarize the papers individually, Always address the topic at hand."   
)

# Create the summary writer agent that writes the generated summary to a text file.
def write_summary_to_file(summary: str, topic: str):
    """Write the generated summary to a text file named 'summary.txt' in the topic's subdirectory."""
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    summary_file_path = os.path.join(path, "summary.txt")
    
    with open(summary_file_path, "w") as summary_file:
        summary_file.write(summary)
    
    print(f"Summary written to {summary_file_path}")
    return summary_file_path

summary_writer_agent = AssistantAgent(
    name="summary_writer",
    description="An agent that writes the generated summary to a text file.",
    model_client=azure_model_client,
    tools=[write_summary_to_file],
    system_message="You are a summary writer. \
        Your task is to write the given summary to a text file named 'summary.txt' in the topic's subdirectory."
)

In [4]:
# create the planning agent
planning_agent = AssistantAgent(
    "PlanningAgent",
    description="An agent for planning tasks, this agent should be the first to engage when given a new task.",
    model_client=azure_model_client,
    system_message=f"""
    You are a planning agent.
    Your job is to break down complex tasks into smaller, manageable subtasks.
    Your team members are:
        user_proxy_agent: The user who provides the initial input from the console.
        arxiv_search_agent: An agent that can retrieve research papers related to user input on arXiv and save the metadata of the papers in a JSON file.
        info_extractor_agent: An agent that extracts raw text from all the downloaded arxiv research papers
                            obtained from the arxiv search agent.
        research_summarizer_agent: An agent that writes a single coherent summary on a topic from the top 5 research papers on arXiv.
        summary_writer_agent: An agent that writes the generated summary to a text file.

    You will follow this sequence:
        arxiv_search_agent will retrieve top 5 research papers based on the initial user prompt and save their metadata in papers_info.json.
        info_extractor_agent will extract the raw text from the downloaded papers and save it in papers_contents.json.
        research_summarizer_agent will generate a coherent summary based on the contents of the papers.
        summary_writer_agent will write the generated summary to a text file.
        
    You only plan and delegate tasks - you do not execute them yourself.

    When assigning tasks, use this format:
    1. <agent> : <task>
    """,
)

## Creating the team
Let’s create the team with two termination conditions: 
- TextMentionTermination to end the conversation when the User Proxy sends "QUIT"
- SourceMatchTermination to limit the conversation to avoid infinite loop.

In [5]:
text_mention_termination = TextMentionTermination("QUIT")
source_match_termination = SourceMatchTermination('summary_writer')
termination = text_mention_termination | source_match_termination

team = SelectorGroupChat(
    [planning_agent, user_proxy_agent, arxiv_search_agent, info_extractor_agent, \
     research_summarizer_agent, summary_writer_agent],
    model_client=azure_model_client,
    termination_condition=termination,
)

## Specify the Task and Run the Team

In [6]:
# Stream the messages to the console.
await Console(team.run_stream())  
#await team.reset()  # Reset the team for the next run.

---------- TextMessage (PlanningAgent) ----------
Please provide the initial input or topic you would like to research.
---------- TextMessage (user_proxy) ----------
LoRA
---------- ToolCallRequestEvent (arxiv_search) ----------
[FunctionCall(id='call_H6mRskFT5vSv2XrNlY1MfSqv', arguments='{"topic":"LoRA","max_results":5}', name='search_arxiv')]
Papers metadata saved to ../ArxivResearcher/sgc_papers/lora/papers_info.json
---------- ToolCallExecutionEvent (arxiv_search) ----------
[FunctionExecutionResult(content='../ArxivResearcher/sgc_papers/lora/papers_info.json', name='search_arxiv', call_id='call_H6mRskFT5vSv2XrNlY1MfSqv', is_error=False)]
---------- ToolCallSummaryMessage (arxiv_search) ----------
../ArxivResearcher/sgc_papers/lora/papers_info.json
---------- ToolCallRequestEvent (content_extractor) ----------
[FunctionCall(id='call_1rNVqG67foHdc30VwBcd8DkN', arguments='{"file_path":"../ArxivResearcher/sgc_papers/lora/papers_info.json"}', name='content_extractor')]


Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 25 0 (offset 0)
Ignoring wrong pointing object 30 0 (offset 0)
Ignoring wrong pointing object 32 0 (offset 0)
Ignoring wrong pointing object 34 0 (offset 0)
Ignoring wrong pointing object 39 0 (offset 0)
Ignoring wrong pointing object 44 0 (offset 0)
Ignoring wrong pointing object 46 0 (offset 0)
Ignoring wrong pointing object 58 0 (offset 0)
Ignoring wrong pointing object 60 0 (offset 0)
Ignoring wrong pointing object 62 0 (offset 0)
Ignoring wrong pointing object 103 0 (offset 0)


---------- ToolCallExecutionEvent (content_extractor) ----------
[FunctionExecutionResult(content='../ArxivResearcher/sgc_papers/lora/papers_contents.json', name='content_extractor', call_id='call_1rNVqG67foHdc30VwBcd8DkN', is_error=False)]
---------- ToolCallSummaryMessage (content_extractor) ----------
../ArxivResearcher/sgc_papers/lora/papers_contents.json
---------- TextMessage (arxiv_search) ----------
The search for recent papers on "LoRA" has been completed. You can find the details stored in the file `papers_info.json`. If you need further information or want to extract specific content from the papers, please let me know!
---------- TextMessage (content_extractor) ----------
The extraction of paper contents has been completed successfully. The extracted content is saved in the JSON file named `papers_contents.json`. You can find the file at the following path:

`../ArxivResearcher/sgc_papers/lora/papers_contents.json` 

If you need any further assistance, feel free to ask!
---

TaskResult(messages=[TextMessage(source='PlanningAgent', models_usage=RequestUsage(prompt_tokens=288, completion_tokens=14), metadata={}, created_at=datetime.datetime(2025, 6, 26, 12, 53, 22, 925120, tzinfo=datetime.timezone.utc), content='Please provide the initial input or topic you would like to research.', type='TextMessage'), UserInputRequestedEvent(source='user_proxy', models_usage=None, metadata={}, created_at=datetime.datetime(2025, 6, 26, 12, 53, 23, 496769, tzinfo=datetime.timezone.utc), request_id='4c834075-73e5-495e-be25-865300fb859d', content='', type='UserInputRequestedEvent'), TextMessage(source='user_proxy', models_usage=None, metadata={}, created_at=datetime.datetime(2025, 6, 26, 12, 53, 27, 67812, tzinfo=datetime.timezone.utc), content='LoRA', type='TextMessage'), ToolCallRequestEvent(source='arxiv_search', models_usage=RequestUsage(prompt_tokens=132, completion_tokens=22), metadata={}, created_at=datetime.datetime(2025, 6, 26, 12, 53, 28, 981379, tzinfo=datetime.time