In [12]:
import requests
import pandas as pd
import io
import re
from io import BytesIO
import pymupdf

# Possible tools

- query_articles_list: uses the arxiv api to return a markdown table of papers with their summary and the url to access them
- get_article: uses an article url to download the pdf and return its content

In [24]:
def query_articles_list(
    query: str, 
    sortby: str, 
    prefix: str="ti",
    start: int=0
    ) -> str:
 
    """
    Search articles on arvix according to the query value. It returns a markdown table with 20 articles and the following values:
    - pdf: the url to the article pdf
    - updated: the last time the article was updated
    - published: the date when the article was published
    - title: the article title
    - summary: a summary of the article content
    
    Args:
        query: the query used for the search
        sortby: how to sort the results, relevance, lastUpdatedDate or submittedDate
        prefix: how to interpret the query 
        (ti - Title, au - Author, abs - Abstract, co - Comment, jr - Journal Reference, cat - Subject Category, rn - Report Number, all - all prefixes)
        start: the index of the ranking where the table starts, add +20 to get the next table chunk
    """

    if sortby not in ["relevance", "lastUpdatedDate", "submittedDate"]:
        sortby = "relevance"

    if prefix not in ["ti", "au", "abs", "co", "jr", "cat", "rn", "all"]:
        prefix = "ti"


    url = f'http://export.arxiv.org/api/query?search_query={prefix}:"{query}"&sortBy={sortby}&sortOrder=descending&start={start}&max_results=20'

    res = requests.get(url)

    if not res.ok:
        articles = 'No Results'
    else:
        articles = res.text

    i = articles.find("<entry>")
    articles = "<feed>\n" + articles[i:]
    INVALID_CHARS = re.compile("[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]")
    clean = INVALID_CHARS.subn("", str(articles))[0]
    clean = clean.replace("&", "&amp;")
    table = pd.read_xml(io.StringIO(clean))
    
    table = table[["id", "updated", "published", "title", "summary"]]
    table.id = table.id.apply(lambda s: s.replace("/abs/", "/pdf/"))
    table = table.rename(columns={"id": "pdf"})
    markdown = table.to_markdown(index=False)

    markdown = f""" 
---{query}-{sortby}----
{markdown}
------------------------
    """ 

    return markdown

In [25]:
print(query_articles_list("reinforcement learning","relevance", 0))

 
---reinforcement learning-relevance----
| pdf                               | updated              | published            | title                                                         | summary                                                                         |
|:----------------------------------|:---------------------|:---------------------|:--------------------------------------------------------------|:--------------------------------------------------------------------------------|
| http://arxiv.org/pdf/2005.14419v2 | 2020-06-13T05:19:26Z | 2020-05-29T06:53:29Z | Reinforcement Learning                                        | Reinforcement learning (RL) is a general framework for adaptive control,        |
|                                   |                      |                      |                                                               | which has proven to be efficient in many domains, e.g., board games, video      |
|                                   | 

In [26]:
def get_article(
        url: str
    ) -> str:
    """
    Opens an article using its pdf url and reads its content.
    """

    res = requests.get(url)
    if not res.ok:
        article = 'Not Found'

    else:
        bytes_stream = BytesIO(res.content)
        with pymupdf.open(stream=bytes_stream) as doc:  
            article = chr(12).join([page.get_text() for page in doc])

    article = f"""
-------{url}------------
{article}
------END----------------
    """

    return article

print(get_article("http://arxiv.org/pdf/1611.05763v3"))


-------http://arxiv.org/pdf/1611.05763v3------------
LEARNING TO REINFORCEMENT LEARN
JX Wang1, Z Kurth-Nelson1, D Tirumala1, H Soyer1, JZ Leibo1,
R Munos1, C Blundell1, D Kumaran1,3, M Botvinick1,2
1DeepMind, London, UK
2Gatsby Computational Neuroscience Unit, UCL, London, UK
3Institute of Cognitive Neuroscience, UCL, London, UK
{wangjane, zebk, dhruvat, soyer, jzl, munos, cblundell,
dkumaran, botvinick} @google.com
ABSTRACT
In recent years deep reinforcement learning (RL) systems have attained superhuman
performance in a number of challenging task domains. However, a major limitation
of such applications is their demand for massive amounts of training data. A critical
present objective is thus to develop deep RL methods that can adapt rapidly to new
tasks. In the present work we introduce a novel approach to this challenge, which
we refer to as deep meta-reinforcement learning. Previous work has shown that
recurrent networks can support meta-learning in a fully supervised context. We