
In this lesson, you will familiarize yourself with the chatbot example you will work on during this course. The example includes the tool definitions and execution, as well as the chatbot code. Make sure to interact with the chatbot at the end of this notebook.

In [2]:
!pip install arxiv dotenv

Collecting arxiv
  Downloading arxiv-2.2.0-py3-none-any.whl.metadata (6.3 kB)
Collecting dotenv
  Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)
Collecting feedparser~=6.0.10 (from arxiv)
  Downloading feedparser-6.0.12-py3-none-any.whl.metadata (2.7 kB)
Collecting sgmllib3k (from feedparser~=6.0.10->arxiv)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25ldone
Downloading arxiv-2.2.0-py3-none-any.whl (11 kB)
Downloading feedparser-6.0.12-py3-none-any.whl (81 kB)
Downloading dotenv-0.9.9-py2.py3-none-any.whl (1.9 kB)
Building wheels for collected packages: sgmllib3k
[33m  DEPRECATION: Building 'sgmllib3k' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to t

In [2]:
import anthropic
import arxiv
import os 
from dotenv import load_dotenv
import json 
import langextract

In [3]:
load_dotenv()  # This loads the .env file
client = anthropic.Anthropic(
    api_key=os.environ.get("ANTHROPIC_API_KEY")
)
api_key = os.getenv('ANTHROPIC_API_KEY')  # Now you can access the variables
print(api_key[:5])

sk-an


# Tool Function 

In [22]:
PAPER_DIR='papers'

- this tool searches arXiv papers based on a topic and stores the papers info in a JSOn (title, author, paper url, punlication date).
- The JSON files are organized in the `papers` directory.
 - The tool does NOT download the papers 

In [10]:
def search_papers(topic:str, max_results:int=5):
    """Search for papers on arXiv based on atopic and store their infornmation 
    Args:
        topic (str): The topic to search for.
        max_results (int, optional): Maximum number of results to return. Defaults to 5.
    """
    #use arxiv to search papers based on the topic
    client = arxiv.Client()
    search = arxiv.Search(query=topic, 
                          max_results=max_results, 
                          sort_by=arxiv.SortCriterion.Relevance)
    papers = client.results(search)

    #storage folder for papers named by the topic
    path=  os.path.join(PAPER_DIR, topic.lower().replace(' ', '_') )
    os.makedirs(path, exist_ok=True)
    file_path= os.path.join(path, 'papers.json')
    #load existing papers if any 
    try:
        with open(file_path, 'r') as json_file:
            papers_info=json.load(json_file)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info={}
    
    #process each queried paper
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id() )
        paper_info = {
            'title': paper.title,
            'authors': [author.name for author in paper.authors],
            'summary': paper.summary,
            'pdf_url': paper.pdf_url,
            'published': str(paper.published.date()),

        }
        papers_info[paper.get_short_id()] = paper_info

    #save update papers infor
    with open(file_path, 'w') as json_file:
        json.dump(papers_info, json_file, indent=4)
    print(f"Saved {len(paper_ids)} papers to {file_path}")
    return paper_ids


search_papers("RNA")

Saved 5 papers to papers/rna/papers.json


['2411.08900v1', '1512.06979v1', '1405.3390v2', '2303.14065v1', '2502.00647v1']

In [24]:
def extract_info(paper_id:str)->json:
    """Extract information from a paper given its ID.
    Returns a JSON object with the paper's title, authors, summary, and PDF URL.
    
    """
    
    for item in os.listdir(PAPER_DIR):
        item_path = os.path.join(PAPER_DIR, item)
        if os.path.isdir(item_path):
            file_path=os.path.join(item_path, 'papers.json')
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {e}")
                    continue
    return f"There's no saved information related to paper ID {paper_id}."

print(extract_info('2411.08900v1'))


{
  "title": "RNA-GPT: Multimodal Generative System for RNA Sequence Understanding",
  "authors": [
    "Yijia Xiao",
    "Edward Sun",
    "Yiqiao Jin",
    "Wei Wang"
  ],
  "summary": "RNAs are essential molecules that carry genetic information vital for life,\nwith profound implications for drug development and biotechnology. Despite this\nimportance, RNA research is often hindered by the vast literature available on\nthe topic. To streamline this process, we introduce RNA-GPT, a multi-modal RNA\nchat model designed to simplify RNA discovery by leveraging extensive RNA\nliterature. RNA-GPT integrates RNA sequence encoders with linear projection\nlayers and state-of-the-art large language models (LLMs) for precise\nrepresentation alignment, enabling it to process user-uploaded RNA sequences\nand deliver concise, accurate responses. Built on a scalable training pipeline,\nRNA-GPT utilizes RNA-QA, an automated system that gathers RNA annotations from\nRNACentral using a divide-and-con