<a href="https://colab.research.google.com/github/rreimche/genai-exam/blob/main/SLRev.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Prerequisites:
- Google Colab
- Access to a Google Colab Secret named "HF_API_TOKEN" containing a Huggingface API token with read access.

In [None]:
#@title Settings
from google.colab import userdata

download_dir = "papers"  # directory name to download papers to revie to
huggingface_apikey = userdata.get('HF_API_TOKEN')  # use colab secrets to store the huggingface api key
groq_key = userdata.get('GROQ_KEY')  #  use colab secrets to store the huggingface api key


In [None]:
#@title Declare bibtex references and parse them into a variable for later usage


bibtext_references = """


@misc{qian2024chatdevcommunicativeagentssoftware,
      title={ChatDev: Communicative Agents for Software Development},
      author={Chen Qian and Wei Liu and Hongzhang Liu and Nuo Chen and Yufan Dang and Jiahao Li and Cheng Yang and Weize Chen and Yusheng Su and Xin Cong and Juyuan Xu and Dahai Li and Zhiyuan Liu and Maosong Sun},
      year={2024},
      eprint={2307.07924},
      archivePrefix={arXiv},
      primaryClass={cs.SE},
      url={https://arxiv.org/abs/2307.07924},
},

@misc{nguyen2024agilecoderdynamiccollaborativeagents,
      title={AgileCoder: Dynamic Collaborative Agents for Software Development based on Agile Methodology},
      author={Minh Huynh Nguyen and Thang Phan Chau and Phong X. Nguyen and Nghi D. Q. Bui},
      year={2024},
      eprint={2406.11912},
      archivePrefix={arXiv},
      primaryClass={cs.SE},
      url={https://arxiv.org/abs/2406.11912},
},


"""



Entry Type: misc
  Key: qian2024chatdevcommunicativeagentssoftware
  url: https://arxiv.org/abs/2307.07924
  primaryclass: cs.SE
  archiveprefix: arXiv
  eprint: 2307.07924
  year: 2024
  author: Chen Qian and Wei Liu and Hongzhang Liu and Nuo Chen and Yufan Dang and Jiahao Li and Cheng Yang and Weize Chen and Yusheng Su and Xin Cong and Juyuan Xu and Dahai Li and Zhiyuan Liu and Maosong Sun
  title: ChatDev: Communicative Agents for Software Development
--------------------
Entry Type: misc
  Key: nguyen2024agilecoderdynamiccollaborativeagents
  url: https://arxiv.org/abs/2406.11912
  primaryclass: cs.SE
  archiveprefix: arXiv
  eprint: 2406.11912
  year: 2024
  author: Minh Huynh Nguyen and Thang Phan Chau and Phong X. Nguyen and Nghi D. Q. Bui
  title: AgileCoder: Dynamic Collaborative Agents for Software Development based on Agile Methodology
--------------------


In [None]:
#@title Install dependencies
!pip install -q autogen arxiv scholarly crossrefapi beautifulsoup4 requests cloudscraper pymupdf nltk autogen-agentchat autogen-ext[openai] groq pymupdf;
!pip install -q --pre bibtexparser;

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/55.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.6/55.6 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.4/48.4 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m642.5/642.5 kB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.1/143.1 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.7/99.7 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m50.4 MB/s[0m eta [36m0:00:00[

In [None]:
#@title Preparation for agents: model client connections
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.messages import TextMessage
from autogen_core.tools import FunctionTool
from autogen_core.models import UserMessage
#from autogen_agentchat.ui import Console
from autogen_core import CancellationToken
from autogen_ext.models.openai import OpenAIChatCompletionClient

# INSTANTIATE MODEL CLIENTS

# This client will be used for paper summarization
text_model_client = OpenAIChatCompletionClient(
    #model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
    model="mistralai/Mistral-7B-Instruct-v0.3",
    base_url="https://router.huggingface.co/hf-inference/v1",
    api_key=huggingface_apikey,
    model_info={
        "vision": False,
        "function_calling": False,
        "json_output": False,
        "family": "unknown",
    },
)

# We need another inference point for downloader agent,
# because huggingface can't serve reflection on tool use
# so that autogen agent understands it
tool_model_client = OpenAIChatCompletionClient(
    model="llama3-70b-8192",
    base_url="https://api.groq.com/openai/v1",
    api_key=groq_key,
    model_info={
        "vision": False,
        "function_calling": True,
        "json_output": False,
        "family": "unknown",
    },
)


# CHECK CONNECTION

async def check_model_connect() -> None:
    messages = [
        UserMessage(content="What is the capital of France?", source="user"),
    ]
    response = await text_model_client.create(messages=messages)
    response_downloader = await tool_model_client.create(messages=messages)

    print(response.content)
    print(response_downloader.content)

#await check_model_connect()


The capital of France is Paris. It's a city known worldwide for its beautiful architecture, museums, fashion, and cuisine. However, Paris is not a country, it's a city, and France is the country.
The capital of France is Paris.


In [80]:
#@title Downloader agent
import arxiv
import os
import datetime
import json
import sys
from dataclasses import dataclass
from autogen_core.models import ChatCompletionClient
from autogen_core import message_handler

# HELPER AND TOOL FUNCTIONS

def maybe_create_dir(donwload_dir: str) -> None:
    """
        A helper function to create the downloads directory
        in colab root if the directory is not yet present.
    """

    try:
        os.makedirs(donwload_dir, exist_ok=True)
        #print(f"Directory '{download_dir}' created successfully.")
    except OSError as e:
        raise Exception(f"Error creating directory '{download_dir}': {e}")

def download_paper(eprint_id: str) -> str:
    """
        Receives a eprint_id of a paper from arxiv, downloads the specified paper
        and saves it in downloads directory.
        The filepath of the downloaded paper is in the resulting dictionary
        with the key "filepath".

        :param eprint_id: string, the arxiv eprint_id.
        :return: string with filepath of the downloaded paper.
    """

    assert len(eprint_id) > 0, "eprint_id cannot be empty"

    result = {
        "eprint_id": eprint_id,
        "success": False,
        "filepath": None,
        }


    if eprint_id:
        #print(f"{datetime.datetime.now()} - Trying arXiv for: {eprint_id}")
        try:
            client = arxiv.Client()
            search = arxiv.Search(id_list=[eprint_id], max_results=1)

            #arxiv_result = next(search.results(), None)
            arxiv_result = next(client.results(search))
            if arxiv_result:
                #print(f"  Found on arXiv: {arxiv_result.title}")
                maybe_create_dir(download_dir)
                filepath = os.path.join(download_dir, f"{arxiv_result.title.replace(' ', '_')}.pdf")
                arxiv_result.download_pdf(filename=filepath)
                result["success"] = True
                result["filepath"] = filepath
                #print(f"  Download successful: {filepath}")
            else:
                # print(f"  No arXiv result found for ID {eprint_id}.")
                raise Exception(f"ERROR: No arXiv result found for ID {eprint_id}.")
                #return f"ERROR: No arXiv result found for ID {eprint_id}."

        except Exception as e:
            #print(f"arXiv download failed for: {eprint_id}: {e}")
            raise Exception(f"ERROR: arXiv download failed for: {eprint_id}: {e}")
            #return f"ERROR: arXiv download failed for: {eprint_id}: {e}"

    # Check if the download was NOT successful
    if not result["success"]:
        #print(f"{datetime.datetime.now()} - Download failed or not attempted for: {eprint_id}")
        raise Exception(f"ERROR: Download failed or not attempted for: {eprint_id}")
        #return f"ERROR: Download failed or not attempted for: {eprint_id}"

    return result["filepath"]


# DOWNLOADER AGENT WITH TOOL

def create_downloader(given_name: str) -> AssistantAgent:
    downloader_tool = FunctionTool(download_paper, description="Given a string denoting an eprint_id of a page on arxiv, downloads a paper")

    return AssistantAgent(
        name=given_name,
        description="An agent that uses downloader tool to download a paper from Arxiv identified by a eprint_id",
        model_client=tool_model_client,
        tools=[downloader_tool],
        reflect_on_tool_use=True,
        system_message="""
            Use downloader tool to download specified paper(s) from arxiv.
            Here is an example of how you must respond in success case:
              'mypapers/veryinterstingpaper.pdf'

            Very important: You MUST return the full filepath of the downloaded
            paper EXACTLY as it was given by the downloader tool.

            If download was not successful, respond like this:
              'ERROR: Download for paper with eprint id XXX failed for the following reason: REASON'.
          """,
    )

def create_downloader_nameless() -> AssistantAgent:
    downloader_tool = FunctionTool(download_paper, description="Given a string denoting an eprint_id of a page on arxiv, downloads a paper")

    return AssistantAgent(
        description="An agent that uses downloader tool to download a paper from Arxiv identified by a eprint_id",
        model_client=tool_model_client,
        tools=[downloader_tool],
        reflect_on_tool_use=True,
        system_message="""
            Use downloader tool to download specified paper(s) from arxiv.
            Here is an example of how you must respond in success case:
              'mypapers/veryinterstingpaper.pdf'

            Very important: You MUST return the full filepath of the downloaded
            paper EXACTLY as it was given by the downloader tool.

            If download was not successful, respond like this:
              'ERROR: Download for paper with eprint id XXX failed for the following reason: REASON'.
          """,
    )

# THE SAME BUT USING CORE LIBRARY

@dataclass
class DownloadTask:
    # Task for downloader agent
    task: str

@dataclass
class DownloadTaskResult:
    result: str

async def async_download_paper(eprint_id: str) -> str:
    """
        Receives a eprint_id of a paper from arxiv, downloads the specified paper
        and saves it in downloads directory.
        The filepath of the downloaded paper is in the resulting dictionary
        with the key "filepath".

        :param eprint_id: string, the arxiv eprint_id.
        :return: string with filepath of the downloaded paper.
    """
    return download_paper(eprint_id)

async_downloader_tool = FunctionTool(
    async_download_paper,
    description="Given a string denoting an eprint_id of a page on arxiv, downloads a paper"
)

# TODO Maybe finish
class DownloaderAgent(RoutedAgent):
    def __init__(
        self,
        model_client: ChatCompletionClient,
    ) -> None:
        super().__init__(description="Downloader Agent")
        self._model_client = model_client

    @message_handler
    async def handle_task(self, message: DownloadTask, ctx: MessageContext) -> DownloadTaskResult:
        # Run the chat completion with the stock_price_tool defined above.
        cancellation_token = CancellationToken()
        #create_result = await self._model_client.create(
        #    messages=[UserMessage(message.task], tools=[async_downloader_tool], cancellation_token=cancellation_token
        #)
        #create_result.content
        #return DownloadTaskResult(result=model_result.content)


# CHECK IF DOWNLOAD WORKS

async def downloader_run() -> None:
    response = await create_downloader("Test").on_messages(
        [TextMessage(content="Download the paper with eprint_id 2402.01411", source="user")],
        cancellation_token=CancellationToken(),
    )


    #print(response.inner_messages)
    #print(response.chat_message)
    print(response.chat_message.content)



# Use asyncio.run(assistant_run()) when running in a script.
#await downloader_run()

async def custom_downloader_run() -> None:
    runtime = SingleThreadedAgentRuntime()
    await DownloaderAgent.register(
        runtime, "downloader", lambda: DownloaderAgent(model_client=text_model_client)
    )

In [90]:
#@title Parser agent
import pymupdf
#import nltk
#from nltk.tokenize import word_tokenize

#nltk.download('punkt_tab')

def parse_pdf(filepath: str) -> str:
    try:
        with pymupdf.open(filepath) as doc:
            text = ""
            for page in doc:
                text += page.get_text()
            # print(f"Successfully parsed: {title} (arXiv ID: {eprint_id})")
            return text

    except Exception as e:
        # print(f"Error parsing {title} ({filepath}): {e}")
        raise Exception(f"ERROR: Error parsing {filepath}: {e}")




def create_parser(name: str) -> AssistantAgent:
    parser_tool = FunctionTool(
        parse_pdf,
        description="A tool that takes a filepath of a pdf file, parses the file and returns the text"
    )

    return AssistantAgent(
        name="parser",
        model_client=tool_model_client,
        system_message="""
            For a given filepath of a scientific paper,
            you use the provided tool, which parses the paper and returns text.
            Then you return the text in your response.
          """,
        tools=[parser_tool]
    )

# CHECK IF PARSING WORKS

async def parser_run() -> None:
    response = await create_parser("test_parser").on_messages(
        [TextMessage(content="Parse this paper: papers/CodePori:_Large-Scale_System_for_Autonomous_Software_Development_Using_Multi-Agent_Technology.pdf", source="user")],
        cancellation_token=CancellationToken(),
    )


    #print(response.inner_messages)
    #print(response.chat_message)
    text = response.chat_message.content
    #print(text)
    return text



# Use asyncio.run(assistant_run()) when running in a script.
#test_paper_text = await parser_run()  # we'll need the variable to test other agents
#print(test_paper_text)

In [94]:
#@title Conceptualizer


def create_conceptualizer(name: str) -> AssistantAgent:
    return AssistantAgent(
        name=name,
        model_client=text_model_client,
        system_message="""
          You are a professor of computer science specialized in multi agent systems
          and generative AI. Given a text of a scientific paper, write a conceptualization
          of it -- provide structure of the paper and key contributions delivered.
          Write structured, using headers and bullet points. Provide short descriptions
          of key contributions. Fit your answer in 500 words.
        """
    )


# CHECK IF CONCEPTUALIZATION WORKS

async def concept_run(paper_text: str) -> None:
    response = await create_conceptualizer("Test conceptualizer").on_messages(
        [TextMessage(content=paper_text, source="user")],
        cancellation_token=CancellationToken(),
    )

    #print(paper_text)
    #print(response.inner_messages)
    print(response.chat_message.models_usage)
    #print(response.chat_message.content)
    return response.chat_message.content



# Use asyncio.run(assistant_run()) when running in a script.
#conceptualization = await concept_run(test_paper_text)  # we'll need the variable later for testing of other agents
#print(conceptualization)


In [None]:
#@title Summarizer agent

sysprompt1 = """
        You are a professor of computer science
        and also qualified and experienced reviewer and critic of scientific papers.
        You will be given a paper text and it's conceptualisation.
        Summarize the paper using the points provided in conceptualisation.
        You must fit your summarization in 500 words.
        It is very important to depict the key contributions of the paper.
"""


sysprompt2 = """
        You are a professor of computer science
        and also qualified and experienced reviewer and critic of scientific papers.
        You will be given a conceptualisation of a paper.
        Write a summarization based on conceptualization.
        You must fit your summarization in 500 words.
        It is very important to depict the key contributions of the paper.
"""

summarizer_agent = AssistantAgent(
    name="summarizer",
    model_client=text_model_client,
    system_message=sysprompt1
)

# CHECK IF SUMMARIZING WORKS

async def summarizer_run(paper_text: str, conceptualization: str) -> None:

    prompt1 = f"""
      ------start paper text-------
      {paper_text}
      ------end paper text-------
      ------start conceptualization-------
      {conceptualization}
      ------end conceptualization-------
      """

    prompt2 = f"""
      ------start conceptualization-------
      {conceptualization}
      ------end conceptualization-------
      """

    prompt3 = f"""
      ------start paper text-------
      {paper_text}
      ------end paper text-------
      """

    prompt = prompt2


    response = await summarizer_agent.on_messages(
        [TextMessage(content=prompt, source="user")],
        cancellation_token=CancellationToken(),
    )

    #print(paper_text)
    #print(response.inner_messages)
    print(response.chat_message.models_usage)
    return response.chat_message.content


# Use asyncio.run(assistant_run()) when running in a script.
#summarization = await summarizer_run(test_paper_text, conceptualization)
#print(summarization)

RequestUsage(prompt_tokens=711, completion_tokens=454)
Title: Autonomous Software Development Revisited: Leveraging Multi-Agent Technology with CodePori

This paper introduces CodePori, a groundbreaking system that utilizes large-scale language models (LLMs) and multi-agent technology to automate complex software development processes. The paper addresses the growing demand for streamlining software development, particularly in large-scale projects, by focusing on the potential of LLMs in the software industry.

The objective of the study is threefold: to investigate the capabilities of LLM-based agents in software engineering, enhance productivity, and reduce time-to-market for intricate software solutions. The authors aim to shed light on how these agents can revolutionize the development of large-scale software.

The execution of this objective involves CodePori, a novel system designed to automate code generation. It comprises a multi-agent system where each agent specializes in sp

In [None]:
#@title Reviewer agent
from autogen_core.model_context import BufferedChatCompletionContext

sysprompt_rev_1 = """
        You are a professor of computer science preparing a review
        of recent publications on the topic of multi-agent llm-based systems
        for end-to-end software development (such systems receive requirements
        as input and deliver a ready system as output with optional system-user
        interactions inbetween). You will receive the texts
        of several papers in several consequtive messages with the first message
        telling you the total number.

        Write a concise and factually detailed literature review.
        Stick to scientific standards regarding style and rigour.
        You must fit your review in 500 words. Do not produce the review before you
        get all of the papers and until then you must only ask for more papers.

        It is very important to depict the key contributions of the papers.
"""

sysprompt_rev_2 = """
        You are a professor of computer science preparing a review
        of recent publications on the topic of multi-agent llm-based systems
        for end-to-end software development (such systems receive requirements
        as input and deliver a ready system as output with optional system-user
        interactions inbetween). You will receive the conceptualisations of
        several papers (that is, consice enumeration of important contributions of the papers)
        and write a concise and factually detailed literature review.
        Stick to scientific standards regarding style and rigour.
        You must fit your review in 500 words.

        It is very important to depict the key contributions of the papers.
"""

sysprompt_rev_3 = """
        Act as a scientist, specilized in computer science and writing
        a literature review.
"""


reviewer_agent = AssistantAgent(
    name="reviewer",
    model_client=text_model_client,
    model_context=BufferedChatCompletionContext(buffer_size=14),
    system_message=sysprompt_rev_2,
)


# CHECK IF REVIEWING WORKS

async def reviewer_run(papers: list[str]) -> str:

    # from list of paper text make a list of TextMessage
    messages1 = [TextMessage(content=f"""
        You will receive the conceptualizations (that is, consice enumeration of important contributions of the papers)
        of {len(papers)} scientific papers. All of the papers
        are about multi-agent llm-based systems for end-to-end software development
        (such systems receive requirements as input and deliver a ready system as output
        with optional system-user interactions inbetween).
        You must write a concise and factually detailed literature review based
        only on these papers. Infore any other knowlegde you have.

        Stick to scientific standards regarding style and rigour.
        You must fit your review in 500 words.

        It is very important to depict the key contributions of the papers.
    """, source="user"
        )] # [TextMessage(content=f"You need to process the total of {len(papers)} messages", source="user")]
    answers = []
    for paper in papers:
        messages1.append(TextMessage(content=paper, source="user"))
        answers.append(TextMessage(content="OK", source="system"))

    messages2 = "Here are {len(papers)} papers that you need to review: "
    for paper in papers:
        messages2 += f"------start paper text-------\n{paper}\n------end paper text-------\n"


    user_msgs_1 = messages1 #[msg for pair in zip(messages1, answers) for msg in pair]
    #for msg in user_msgs_1:
    #    print(f"Sending message: {msg.content}")
    user_msgs_2 = [TextMessage(content=messages2, source="user")]
    user_msgs = user_msgs_2



    #for msg in messages1:
    #    print(f"Sending message: {msg.content}")
    #    response = await reviewer_agent.on_messages(
    #        [msg],
    #        cancellation_token=CancellationToken(),
    #    )
    #    print(response.chat_message.models_usage)
    #    print(f"LLM response: {response.chat_message.content}")


    response = await reviewer_agent.on_messages(
       user_msgs,
       cancellation_token=CancellationToken(),
    )

    #print(paper_text)
    #print(response.inner_messages)
    #print(response.chat_message.models_usage)
    return response.chat_message.content



# Use asyncio.run(assistant_run()) when running in a script.
#review = await reviewer_run([summarization, summarization, summarization])
#print(review)


Literature Review: Multi-Agent LLM-Based Systems for End-to-End Software Development

This review aims to summarize and analyze recent publications on the application of multi-agent systems integrated with large-scale language models (LLMs) in end-to-end software development. The reviewed papers focus on the potential of LLMs in the software industry and their impact on productivity and software development processes.

The first paper, "Autonomous Software Development Revisited: Leveraging Multi-Agent Technology with CodePori," introduces CodePori, a novel system that utilizes multi-agent technology and LLMs to automate complex software development tasks. CodePori consists of a multi-agent system specialized in tasks such as system design, code development, code review, code verification, and test engineering. The paper presents an evaluation framework to measure CodePori's performance, demonstrating its ability to produce running code for large-scale projects quickly and at reduced co

In [None]:
#@title Feedback processor and Operator-Handoff
from autogen_agentchat.agents import UserProxyAgent
from autogen_agentchat.conditions import TextMentionTermination
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.ui import Console

sysprompt_fp = """
    You are a professor of computer science preparing a review
    of recent publications on the topic of multi-agent llm-based systems
    for end-to-end software development (such systems receive requirements
    as input and deliver a ready system as output with optional system-user
    interactions inbetween). You will receive several pieces of information as input:

    - conceptualisations of several papers (that is, consice enumeration of important contributions of the papers)
    - prior version of the review you wrote
    - feedback of an editor of a scientific journal on your prior review version

    You must rewrite the review taking the feedback of the editor
    into account. The review must be concise and factually detailed.
    Stick to scientific standards regarding style and rigour.
    You must fit your review in 500 words.

    It is very important to depict the key contributions of the papers.
    """

feeback_processor_agent = AssistantAgent(
    name="feedback_processor",
    model_client=text_model_client,
    system_message=sysprompt_fp,
)

user_proxy = UserProxyAgent("user_proxy", input_func=input)

termination = TextMentionTermination("APPROVE")

team = RoundRobinGroupChat([feeback_processor_agent, user_proxy], termination_condition=termination)
stream = team.run_stream(task=f"Incorporate user feedback into this review: {review}")


# CHECK IF FEEDBACK TEAM WORKS

# result = await Console(stream)
# print(f"\n\nFINAL RESULT:\n\n{result.messages[-3].content}")




---------- user ----------
Incorporate user feedback into this review: Literature Review: Multi-Agent LLM-Based Systems for End-to-End Software Development

This review aims to summarize and analyze recent publications on the application of multi-agent systems integrated with large-scale language models (LLMs) in end-to-end software development. The reviewed papers focus on the potential of LLMs in the software industry and their impact on productivity and software development processes.

The first paper, "Autonomous Software Development Revisited: Leveraging Multi-Agent Technology with CodePori," introduces CodePori, a novel system that utilizes multi-agent technology and LLMs to automate complex software development tasks. CodePori consists of a multi-agent system specialized in tasks such as system design, code development, code review, code verification, and test engineering. The paper presents an evaluation framework to measure CodePori's performance, demonstrating its ability to 

In [None]:
#@title Parse bibtex references
from bibtexparser.bparser import BibTexParser
from bibtexparser.bwriter import BibTexWriter
from bibtexparser.bibdatabase import BibDatabase
from bibtexparser.customization import convert_to_unicode
from typing import List

# parse bibtext_references

def parse_bibtex_string(bibtex_string):

    if not bibtex_string:
        return []  # Handle empty input

    try:
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        parser.ignore_nonstandard_strings = True  # Avoid errors with non-standard fields
        db = parser.parse(bibtex_string)
        return db.entries
    except Exception as e:
        print(f"Error parsing BibTeX string: {e}")
        return None  # Indicate parsing failure

def check_parsed_bibtex(items: List[dict])  -> None:
    for entry in bibtex_items:
        print(f"Entry Type: {entry['ENTRYTYPE']}")
        print(f"  Key: {entry['ID']}")
        for key, value in entry.items():
            if key not in ['ENTRYTYPE', 'ID']:
                print(f"  {key}: {value}")
        print("-" * 20)


bibtex_items = parse_bibtex_string(bibtext_references)

#check_parsed_bibtex(bibtex_items)

In [95]:
from functools import reduce
#@title Download all papers
from dataclasses import dataclass
from typing import List

from autogen_core import AgentId, MessageContext, RoutedAgent, SingleThreadedAgentRuntime, message_handler
from autogen_core.models import SystemMessage, UserMessage

import asyncio

@dataclass
class UserTask:
    # Global task for the multi agent system
    task: str



@dataclass
class ParseTask:
    # Task for parser agent
    task: str

@dataclass
class ConceptualizeTask:
    # Task for conceptualizer agent
    task: str

@dataclass
class SummarizeTask:
    # Task for summarizer agent
    task: str

@dataclass
class ReviewTask:
    # Task for reviewer agent
    task: str

@dataclass
class FinalResult:
    # Final result in response to UserTask
    result: str

DOWNLOADER_AGENT_TYPTE = "downloader"
PARSER_AGENT_TYPE = "parser"
CONCEPTS_AGENT_TYPE = "conceptualizer"
SUMMARIZER_AGENT_TYPE = "summarizer"
REVIEWER_AGENT_TYPE = "reviewer"

def parse_all_pdfs(folder_path: str) -> List[str]:
    """
    Parses every file in the specified folder.
    """

    results = []
    if not os.path.exists(folder_path):
        print(f"Error: Folder '{folder_path}' not found.")
        return

    for filename in os.listdir(folder_path):
        filepath = os.path.join(folder_path, filename)

        # Check if it's a file (not a directory)
        if os.path.isfile(filepath):
            try:
                paper_text = parse_pdf(filepath)  # Call the function to process the file
                results.append(paper_text)
                print(f"Processed file '{filepath}'.")
            except Exception as e:
                print(f"Error processing file '{filepath}': {e}")
                # You might want to log the error, skip the file, or take other actions.

    return results

def get_filepaths(folder:str) -> List[str]:
    """
    Returns a list of filepaths in the specified folder.
    """
    results = []
    if not os.path.exists(folder):
        print(f"Error: Folder '{folder}' not found.")
        return

    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)
        results.append(filepath)

    return results

class OrchestratorAgent(RoutedAgent):
    def __init__(
        self,
        model_client: text_model_client,
        bibtex_items: List[dict]
    ) -> None:
        super().__init__(description="Aggregator Agent")
        self._model_client = model_client
        for item in bibtex_items:
            assert item['eprint'] is not None, f"eprint_id is missing for a bibtex item: {item['title']}"
        self._bibtex_items = bibtex_items


    @message_handler
    async def handle_task(self, message: UserTask, ctx: MessageContext) -> FinalResult:

        # we don't need to pass the orchestrator a message, but we leave the
        # parameter for interface consistency
        message = None

        n_papers = len(self._bibtex_items)
        print(f"Orchestrator: Number of papers to process: {n_papers}")



        # I. DOWNLOAD ALL PAPERS CONCURRENTLY

        # I.1 Prepare downloader agents


        downloaders = []
        downloader_tasks = []
        for i, item in enumerate(self._bibtex_items):
            downloader_agent = create_downloader(f"downloader_{i}")
            downloaders.append(downloader_agent)
            downloader_tasks.append(TextMessage(content=f"Download the paper with eprint_id {item['eprint']}", source="user"))

        task_designation = zip(downloaders, downloader_tasks)

        # I.2 Download the papers

        print(f"Orchestrator: Delegating tasks to downloaders")

        download_results = await asyncio.gather(*[
            downloader.on_messages(
                [task],
                cancellation_token=CancellationToken(),
            )
            for (downloader, task) in task_designation
        ])



        # I.2 Aggregate downloads



        download_results_str = "-------------------\n".join([r.chat_message.content for r in download_results])

        print(f"Orchestrator: Intermediary download results")
        print(download_results_str)

        #messages = [
        #  UserMessage(content=f"""
        #      Here are results of multiple downloads:
        #      {download_results_str}
        #      Write a list of downloaded files.
        #      You MUST return the filepaths without any changes. This is very important.
        #  """, source="user"),
        #]
        #downloads_resp = await text_model_client.create(messages=messages)

        #print(f"Orchestrator: Downloading done.")
        #print(downloads_resp.content)

        # open every file in "papers" folder

        # PARSING ON MY OWN
        #print(f"Orchestrator: Parsing papers")
        #paper_texts = parse_all_pdfs("papers")

        #for txt in paper_texts:
        #    # print first 100 words
        #    print(txt[:100])


        # Aggregation by getting the list of filenames to pass to
        # parse agents later


        filepaths = get_filepaths("papers")


        # II.1 Parsing with parser agents

        print(f"Orchestrator: Delegating tasks to parsers")

        parsers = []
        parser_tasks = []
        for i, filepath in enumerate(filepaths):
            parser_agent = create_parser(f"parser_{i}")
            parsers.append(parser_agent)
            parser_tasks.append(TextMessage(content=f"Parse the paper at {filepath}", source="user"))

        parse_task_designation = zip(parsers, parser_tasks)

        parse_results = await asyncio.gather(*[
            parser.on_messages(
                [task],
                cancellation_token=CancellationToken(),
            )
            for (parser, task) in parse_task_designation
        ])

        # II.2 Aggregating parsed texts into one variable for later use

        parse_texts = [r.chat_message.content for r in parse_results]

        #for txt in parse_texts:
            # print first 100 words
            #print(txt[:100])

        # IV

        # III.1 Conceptualization

        print(f"Orchestrator: Delegating tasks to parsers")

        concept_agents = []
        concept_tasks = []
        for i, paper_text in enumerate(parse_texts):
            concept_agents.append(
                create_conceptualizer(f"conceptualizer_{i}")
            )
            concept_tasks.append(
                TextMessage(content=f"Conceptualize the paper: {paper_text}", source="user")
            )

        concept_task_designation = zip(concept_agents, concept_tasks)

        concept_results = await asyncio.gather(*[
            conceptualizer.on_messages(
                [task],
                cancellation_token=CancellationToken(),
            )
            for (conceptualizer, task) in concept_task_designation
        ])

        # III.2 Gathering conceptualization results

        print(f"Orchestrator: conceptualizations:")

        conceptualizations = [r.chat_message.content for r in concept_results]

        for txt in conceptualizations:
            # print first 100 words
            print(txt[:100])





        """# Iterate over layers.
        for i in range(self._num_layers - 1):
            # Assign workers for this layer.
            worker_ids = [
                AgentId(worker_type, f"{self.id.key}/layer_{i}/worker_{j}")
                for j, worker_type in enumerate(self._worker_agent_types)
            ]
            # Dispatch tasks to workers.
            print(f"{'-'*80}\nOrchestrator-{self.id}:\nDispatch to workers at layer {i}")
            results = await asyncio.gather(*[self.send_message(worker_task, worker_id) for worker_id in worker_ids])
            print(f"{'-'*80}\nOrchestrator-{self.id}:\nReceived results from workers at layer {i}")
            # Prepare task for the next layer.
            worker_task = WorkerTask(task=message.task, previous_results=[r.result for r in results])
        # Perform final aggregation.
        print(f"{'-'*80}\nOrchestrator-{self.id}:\nPerforming final aggregation")
        system_prompt = "You have been provided with a set of responses from various open-source models to the latest user query. Your task is to synthesize these responses into a single, high-quality response. It is crucial to critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect. Your response should not simply replicate the given answers but should offer a refined, accurate, and comprehensive reply to the instruction. Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability.\n\nResponses from models:"
        system_prompt += "\n" + "\n\n".join([f"{i+1}. {r}" for i, r in enumerate(worker_task.previous_results)])
        model_result = await self._model_client.create(
            [SystemMessage(content=system_prompt), UserMessage(content=message.task, source="user")]
        )
        assert isinstance(model_result.content, str)
        return FinalResult(result=model_result.content)    )"""

        return FinalResult(result="Stop.")


runtime = SingleThreadedAgentRuntime()

await OrchestratorAgent.register(
    runtime,
    "orchestrator",
    lambda: OrchestratorAgent(
        model_client=text_model_client,
        bibtex_items=bibtex_items
    ),
)

runtime.start()
result = await runtime.send_message(UserTask(task="Doesn't matter"), AgentId("orchestrator", "default"))
await runtime.stop_when_idle()
print(f"{'-'*80}\nFinal result:\n{result.result}")


Orchestrator: Number of papers to process: 2
Orchestrator: Delegating tasks to downloaders
Orchestrator: Intermediary download results
It looks like the tool has successfully downloaded the paper with eprint_id 2307.07924, which is titled "ChatDev: Communicative Agents for Software Development". The paper has been saved as a PDF file named "ChatDev:_Communicative_Agents_for_Software_Development.pdf" in the "papers" directory.-------------------
The paper with eprint_id 2406.11912 has been successfully downloaded. The paper title is "AgileCoder: Dynamic Collaborative Agents for Software Development based on Agile Methodology" and it has been saved as a PDF file named "AgileCoder:_Dynamic_Collaborative_Agents_for_Software_Development_based_on_Agile_Methodology.pdf" in the "papers" directory.
Orchestrator: Delegating tasks to parsers
Orchestrator: Delegating tasks to parsers
Orchestrator: conceptualizations:
Title: ChatDev: Communicative Agents for Software Development

Introduction
- The

In [None]:
#@title New idea: single agent that downloads, parses and then conceptualizes a paper.
# Problem: need to use 2 models instead of one. Custom agent is possible, but expensive to create

# or the orchestrator will parse everything on its own as a single entity