In [1]:
pip install smolagents

Note: you may need to restart the kernel to use updated packages.


In [2]:
from smolagents import tool

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
pip install ipywidgets

Collecting ipywidgets
  Downloading ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)
Collecting widgetsnbextension~=4.0.12 (from ipywidgets)
  Downloading widgetsnbextension-4.0.13-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab-widgets~=3.0.12 (from ipywidgets)
  Downloading jupyterlab_widgets-3.0.13-py3-none-any.whl.metadata (4.1 kB)
Downloading ipywidgets-8.1.5-py3-none-any.whl (139 kB)
Downloading jupyterlab_widgets-3.0.13-py3-none-any.whl (214 kB)
Downloading widgetsnbextension-4.0.13-py3-none-any.whl (2.3 MB)
   ---------------------------------------- 0.0/2.3 MB ? eta -:--:--
   ---------------------------------------- 2.3/2.3 MB 11.1 MB/s eta 0:00:00
Installing collected packages: widgetsnbextension, jupyterlab-widgets, ipywidgets
Successfully installed ipywidgets-8.1.5 jupyterlab-widgets-3.0.13 widgetsnbextension-4.0.13
Note: you may need to restart the kernel to use updated packages.


In [6]:
import ipywidgets as widgets

In [7]:
# import packages that are used in our tools
import requests
from bs4 import BeautifulSoup
import json

In [9]:
@tool
def get_hugging_face_top_daily_paper() -> str:
    """
    Get the top daily paper from Hugging Face
    """
    try:
        url = "https://huggingface.co/papers"
        response = requests.get(url)
        response.raise_for_status() # raise an exception in case of failure (4xx or 5xx status code)

        soup = BeautifulSoup(response.content, "html.parser")

        # Extract the title element from the JSON-like data in the "data-props" attribute of the div element
        containers = soup.find_all("div", class_ = "SVELTE_HYDRATER contents")
        top_paper = ""
        for container in containers:
            data_props = container.get("data-props", '')
            if data_props:
                try:
                    # Parse the JSON-like string
                    json_data = json.loads(data_props.replace('&quot;', '"'))
                    if 'dailyPapers' in json_data:
                        top_paper = json_data['dailyPapers'][0]['title']
                except json.JSONDecodeError:
                        continue
        return top_paper
    except requests.exceptions.RequestException as e:
        print(f"Error occured while fetching the HTML: {e}")
        return None

In [10]:
from huggingface_hub import HfApi

@tool
def get_paper_id_by_title(title: str) -> str:
    """
    This is a tool that returns the arxiv paper id by its title.
    It returns the title of the paper

    Args:
    title: The title of the paper for which we want to get the ID
    """
    try:
        api = HfApi()
        papers = api.list_papers(query=title)
        if papers:
            paper = next(iter(papers))
            return paper.id
        else:
            return None
    except Exception as e:
        print(f"Error occured while fetching the paper ID: {e}")
        return None

#### Downloading the paper using its ID is done using the python arxiv package. Save the paper locally to use it for reading

In [11]:
# install arxiv package
!pip install arxiv



In [18]:
import arxiv

@tool
def download_paper_by_id(paper_id:str) -> None:
    """
    This tool gets the id of a paper and downloads it from arxiv. 
    It saves the paper locally in the current directory as "paper.pdf"

    Args:
    paper_id: The arxiv id of the paper to download
    """
    try:
        paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
        paper.download_pdf(filename="paper.pdf")
        return None
    except Exception as e:
        print(f"Error occured while downloading the paper: {e}")

#### Use the pypdf package to read the PDF file. Read only the first three pages of the paper to save on token usage

In [19]:
pip install pypdf

Note: you may need to restart the kernel to use updated packages.


In [20]:
from pypdf import PdfReader

@tool
def read_pdf_file(file_path: str) -> str:
    """
    This tool reads the content of a PDF file and returns it as a string

    Args:
    file_path: The path to the PDF file
    """
    try:
        content = ""
        reader = PdfReader('paper.pdf')
        print(f"Number of pages in the PDF file: {len(reader.pages)}")
        # Read the first 3 pages to save on tokens
        for page in reader.pages[:3]:
            content += page.extract_text()
        return content
    except Exception as e:
        print(f"Error occured while reading the PDF file: {e}")
        return None

In [21]:
from dotenv import load_dotenv
import os

In [22]:
load_dotenv()

True

### Running the Agent

In [23]:
from smolagents import CodeAgent, HfApiModel

model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"

model = HfApiModel(model_id = model_id, token=os.getenv("HF_TOKEN"))
agent = CodeAgent(tools=[get_hugging_face_top_daily_paper, 
                         get_paper_id_by_title, 
                         download_paper_by_id, 
                         read_pdf_file], 
                         model=model,
                         add_base_tools=True)
agent.run("Summarize today's top paper from Hugging Face daily papers by reading it.",
)

Number of pages in the PDF file: 16


'Abstract: Multi-agent reinforcement learning (MARL) demonstrates significant progress in\nsolving cooperative and competitive multi-agent problems in various environ-\nments. One of the principal challenges in MARL is the need for explicit pre-\ndiction of the agents’ behavior to achieve cooperation. To resolve this issue,\nwe propose the Shared Recurrent Memory Transformer (SRMT) which extends\nmemory transformers to multi-agent settings by pooling and globally broadcast-\ning individual working memories, enabling agents to exchange information im-\nplicitly and coordinate their actions. We evaluate SRMT on the Partially Observ-\nable Multi-Agent Pathfinding problem in a toy Bottleneck navigation task that\nrequires agents to pass through a narrow corridor and on a POGEMA bench-\nmark set of tasks. In the Bottleneck task, SRMT consistently outperforms a va-\nriety of reinforcement learning baselines, especially under sparse rewards, and\ngeneralizes effectively to longer corridors th