Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Harrison/arxiv tool #3186

Merged
merged 3 commits into from
Apr 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
156 changes: 156 additions & 0 deletions docs/modules/agents/tools/examples/arxiv.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "245a954a",
"metadata": {},
"source": [
"# Arxiv API\n",
"\n",
"This notebook goes over how to use the `arxiv` component. \n",
"\n",
"First, you need to install `arxiv` python package."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d5a7209e",
"metadata": {
"tags": [],
"vscode": {
"languageId": "shellscript"
}
},
"outputs": [],
"source": [
"!pip install arxiv"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8d32b39a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.utilities import ArxivAPIWrapper"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2a50dd27",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"arxiv = ArxivAPIWrapper()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "34bb5968",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"'Published: 2016-05-26\\nTitle: Heat-bath random walks with Markov bases\\nAuthors: Caprice Stanley, Tobias Windisch\\nSummary: Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\\ndimension.'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"docs = arxiv.run(\"1605.08386\")\n",
"docs"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b0867fda-e119-4b19-9ec6-e354fa821db3",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"'Published: 2017-10-10\\nTitle: On Mixing Behavior of a Family of Random Walks Determined by a Linear Recurrence\\nAuthors: Caprice Stanley, Seth Sullivant\\nSummary: We study random walks on the integers mod $G_n$ that are determined by an\\ninteger sequence $\\\\{ G_n \\\\}_{n \\\\geq 1}$ generated by a linear recurrence\\nrelation. Fourier analysis provides explicit formulas to compute the\\neigenvalues of the transition matrices and we use this to bound the mixing time\\nof the random walks.\\n\\nPublished: 2016-05-26\\nTitle: Heat-bath random walks with Markov bases\\nAuthors: Caprice Stanley, Tobias Windisch\\nSummary: Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\\ndimension.\\n\\nPublished: 2003-03-18\\nTitle: Calculation of fluxes of charged particles and neutrinos from atmospheric showers\\nAuthors: V. Plyaskin\\nSummary: The results on the fluxes of charged particles and neutrinos from a\\n3-dimensional (3D) simulation of atmospheric showers are presented. An\\nagreement of calculated fluxes with data on charged particles from the AMS and\\nCAPRICE detectors is demonstrated. Predictions on neutrino fluxes at different\\nexperimental sites are compared with results from other calculations.'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"docs = arxiv.run(\"Caprice Stanley\")\n",
"docs"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3580aeeb-086f-45ba-bcdc-b46f5134b3dd",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"'No good Arxiv Result was found'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"docs = arxiv.run(\"1605.08386WWW\")\n",
"docs"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4f4e9602",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
2 changes: 2 additions & 0 deletions langchain/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
PromptTemplate,
)
from langchain.sql_database import SQLDatabase
from langchain.utilities import ArxivAPIWrapper
from langchain.utilities.google_search import GoogleSearchAPIWrapper
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
from langchain.utilities.searx_search import SearxSearchWrapper
Expand Down Expand Up @@ -74,6 +75,7 @@
"LLMBashChain",
"LLMCheckerChain",
"LLMMathChain",
"ArxivAPIWrapper",
"SelfAskWithSearchChain",
"SerpAPIWrapper",
"SerpAPIChain",
Expand Down
6 changes: 6 additions & 0 deletions langchain/agents/load_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from langchain.chains.pal.base import PALChain
from langchain.llms.base import BaseLLM
from langchain.requests import TextRequestsWrapper
from langchain.tools.arxiv.tool import ArxivQueryRun
from langchain.tools.base import BaseTool
from langchain.tools.bing_search.tool import BingSearchRun
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
Expand All @@ -26,6 +27,7 @@
from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
from langchain.tools.wikipedia.tool import WikipediaQueryRun
from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun
from langchain.utilities import ArxivAPIWrapper
from langchain.utilities.apify import ApifyWrapper
from langchain.utilities.bash import BashProcess
from langchain.utilities.bing_search import BingSearchAPIWrapper
Expand Down Expand Up @@ -175,6 +177,10 @@ def _get_wikipedia(**kwargs: Any) -> BaseTool:
return WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(**kwargs))


def _get_arxiv(**kwargs: Any) -> BaseTool:
return ArxivQueryRun(api_wrapper=ArxivAPIWrapper(**kwargs))


def _get_google_serper(**kwargs: Any) -> BaseTool:
return Tool(
name="Serper Search",
Expand Down
1 change: 1 addition & 0 deletions langchain/tools/arxiv/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Arxiv API toolkit."""
24 changes: 24 additions & 0 deletions langchain/tools/arxiv/tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Tool for the Wikipedia API."""

from langchain.tools.base import BaseTool
from langchain.utilities.arxiv import ArxivAPIWrapper


class ArxivQueryRun(BaseTool):
"""Tool that adds the capability to search using the Arxiv API."""

name = "Arxiv"
description = (
"A wrapper around Arxiv. "
"Useful for getting summary of articles from arxiv.org. "
"Input should be a search query."
)
api_wrapper: ArxivAPIWrapper

def _run(self, query: str) -> str:
"""Use the Arxiv tool."""
return self.api_wrapper.run(query)

async def _arun(self, query: str) -> str:
"""Use the Arxiv tool asynchronously."""
raise NotImplementedError("ArxivAPIWrapper does not support async")
2 changes: 2 additions & 0 deletions langchain/utilities/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""General utilities."""
from langchain.requests import TextRequestsWrapper
from langchain.utilities.apify import ApifyWrapper
from langchain.utilities.arxiv import ArxivAPIWrapper
from langchain.utilities.bash import BashProcess
from langchain.utilities.bing_search import BingSearchAPIWrapper
from langchain.utilities.google_search import GoogleSearchAPIWrapper
Expand All @@ -14,6 +15,7 @@

__all__ = [
"ApifyWrapper",
"ArxivAPIWrapper",
"BashProcess",
"TextRequestsWrapper",
"GoogleSearchAPIWrapper",
Expand Down
64 changes: 64 additions & 0 deletions langchain/utilities/arxiv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Util that calls Arxiv."""
from typing import Any, Dict

from pydantic import BaseModel, Extra, root_validator


class ArxivAPIWrapper(BaseModel):
"""Wrapper around ArxivAPI.

To use, you should have the ``arxiv`` python package installed.
https://lukasschwab.me/arxiv.py/index.html
This wrapper will use the Arxiv API to conduct searches and
fetch document summaries. By default, it will return the document summaries
of the top-k results of an input search.
"""

arxiv_client: Any #: :meta private:
arxiv_exceptions: Any # :meta private:
top_k_results: int = 3
ARXIV_MAX_QUERY_LENGTH = 300

class Config:
"""Configuration for this pydantic object."""

extra = Extra.forbid

@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that the python package exists in environment."""
try:
import arxiv

values["arxiv_search"] = arxiv.Search
values["arxiv_exceptions"] = (
arxiv.ArxivError,
arxiv.UnexpectedEmptyPageError,
arxiv.HTTPError,
)
except ImportError:
raise ValueError(
"Could not import arxiv python package. "
"Please install it with `pip install arxiv`."
)
return values

def run(self, query: str) -> str:
"""
Run Arxiv search and get the document meta information.
See https://lukasschwab.me/arxiv.py/index.html#Search
See https://lukasschwab.me/arxiv.py/index.html#Result
It uses only the most informative fields of document meta information.
"""
try:
docs = [
f"Published: {result.updated.date()}\nTitle: {result.title}\n"
f"Authors: {', '.join(a.name for a in result.authors)}\n"
f"Summary: {result.summary}"
for result in self.arxiv_search( # type: ignore
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
).results()
]
return "\n\n".join(docs) if docs else "No good Arxiv Result was found"
except self.arxiv_exceptions as ex:
return f"Arxiv exception: {ex}"