langchain-ai · hwchase17 · Apr 19, 2023 · Apr 19, 2023 · Apr 19, 2023 · Apr 19, 2023
diff --git a/docs/modules/agents/tools/examples/arxiv.ipynb b/docs/modules/agents/tools/examples/arxiv.ipynb
@@ -0,0 +1,156 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "245a954a",
+   "metadata": {},
+   "source": [
+    "# Arxiv API\n",
+    "\n",
+    "This notebook goes over how to use the `arxiv` component. \n",
+    "\n",
+    "First, you need to install `arxiv` python package."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d5a7209e",
+   "metadata": {
+    "tags": [],
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "!pip install arxiv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "8d32b39a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.utilities import ArxivAPIWrapper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2a50dd27",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "arxiv = ArxivAPIWrapper()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "34bb5968",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Published: 2016-05-26\\nTitle: Heat-bath random walks with Markov bases\\nAuthors: Caprice Stanley, Tobias Windisch\\nSummary: Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\\ndimension.'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "docs = arxiv.run(\"1605.08386\")\n",
+    "docs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "b0867fda-e119-4b19-9ec6-e354fa821db3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Published: 2017-10-10\\nTitle: On Mixing Behavior of a Family of Random Walks Determined by a Linear Recurrence\\nAuthors: Caprice Stanley, Seth Sullivant\\nSummary: We study random walks on the integers mod $G_n$ that are determined by an\\ninteger sequence $\\\\{ G_n \\\\}_{n \\\\geq 1}$ generated by a linear recurrence\\nrelation. Fourier analysis provides explicit formulas to compute the\\neigenvalues of the transition matrices and we use this to bound the mixing time\\nof the random walks.\\n\\nPublished: 2016-05-26\\nTitle: Heat-bath random walks with Markov bases\\nAuthors: Caprice Stanley, Tobias Windisch\\nSummary: Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\\ndimension.\\n\\nPublished: 2003-03-18\\nTitle: Calculation of fluxes of charged particles and neutrinos from atmospheric showers\\nAuthors: V. Plyaskin\\nSummary: The results on the fluxes of charged particles and neutrinos from a\\n3-dimensional (3D) simulation of atmospheric showers are presented. An\\nagreement of calculated fluxes with data on charged particles from the AMS and\\nCAPRICE detectors is demonstrated. Predictions on neutrino fluxes at different\\nexperimental sites are compared with results from other calculations.'"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "docs = arxiv.run(\"Caprice Stanley\")\n",
+    "docs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "3580aeeb-086f-45ba-bcdc-b46f5134b3dd",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'No good Arxiv Result was found'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "docs = arxiv.run(\"1605.08386WWW\")\n",
+    "docs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f4e9602",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/langchain/__init__.py b/langchain/__init__.py
@@ -47,6 +47,7 @@
     PromptTemplate,
 )
 from langchain.sql_database import SQLDatabase
+from langchain.utilities import ArxivAPIWrapper
 from langchain.utilities.google_search import GoogleSearchAPIWrapper
 from langchain.utilities.google_serper import GoogleSerperAPIWrapper
 from langchain.utilities.searx_search import SearxSearchWrapper
@@ -74,6 +75,7 @@
     "LLMBashChain",
     "LLMCheckerChain",
     "LLMMathChain",
+    "ArxivAPIWrapper",
     "SelfAskWithSearchChain",
     "SerpAPIWrapper",
     "SerpAPIChain",

diff --git a/langchain/agents/load_tools.py b/langchain/agents/load_tools.py
@@ -11,6 +11,7 @@
 from langchain.chains.pal.base import PALChain
 from langchain.llms.base import BaseLLM
 from langchain.requests import TextRequestsWrapper
+from langchain.tools.arxiv.tool import ArxivQueryRun
 from langchain.tools.base import BaseTool
 from langchain.tools.bing_search.tool import BingSearchRun
 from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
@@ -26,6 +27,7 @@
 from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
 from langchain.tools.wikipedia.tool import WikipediaQueryRun
 from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun
+from langchain.utilities import ArxivAPIWrapper
 from langchain.utilities.apify import ApifyWrapper
 from langchain.utilities.bash import BashProcess
 from langchain.utilities.bing_search import BingSearchAPIWrapper
@@ -175,6 +177,10 @@ def _get_wikipedia(**kwargs: Any) -> BaseTool:
     return WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(**kwargs))
 
 
+def _get_arxiv(**kwargs: Any) -> BaseTool:
+    return ArxivQueryRun(api_wrapper=ArxivAPIWrapper(**kwargs))
+
+
 def _get_google_serper(**kwargs: Any) -> BaseTool:
     return Tool(
         name="Serper Search",

diff --git a/langchain/tools/arxiv/__init__.py b/langchain/tools/arxiv/__init__.py
@@ -0,0 +1 @@
+"""Arxiv API toolkit."""
diff --git a/langchain/tools/arxiv/tool.py b/langchain/tools/arxiv/tool.py
@@ -0,0 +1,24 @@
+"""Tool for the Wikipedia API."""
+
+from langchain.tools.base import BaseTool
+from langchain.utilities.arxiv import ArxivAPIWrapper
+
+
+class ArxivQueryRun(BaseTool):
+    """Tool that adds the capability to search using the Arxiv API."""
+
+    name = "Arxiv"
+    description = (
+        "A wrapper around Arxiv. "
+        "Useful for getting summary of articles from arxiv.org. "
+        "Input should be a search query."
+    )
+    api_wrapper: ArxivAPIWrapper
+
+    def _run(self, query: str) -> str:
+        """Use the Arxiv tool."""
+        return self.api_wrapper.run(query)
+
+    async def _arun(self, query: str) -> str:
+        """Use the Arxiv tool asynchronously."""
+        raise NotImplementedError("ArxivAPIWrapper does not support async")
diff --git a/langchain/utilities/__init__.py b/langchain/utilities/__init__.py
@@ -1,6 +1,7 @@
 """General utilities."""
 from langchain.requests import TextRequestsWrapper
 from langchain.utilities.apify import ApifyWrapper
+from langchain.utilities.arxiv import ArxivAPIWrapper
 from langchain.utilities.bash import BashProcess
 from langchain.utilities.bing_search import BingSearchAPIWrapper
 from langchain.utilities.google_search import GoogleSearchAPIWrapper
@@ -14,6 +15,7 @@
 
 __all__ = [
     "ApifyWrapper",
+    "ArxivAPIWrapper",
     "BashProcess",
     "TextRequestsWrapper",
     "GoogleSearchAPIWrapper",

diff --git a/langchain/utilities/arxiv.py b/langchain/utilities/arxiv.py
@@ -0,0 +1,64 @@
+"""Util that calls Arxiv."""
+from typing import Any, Dict
+
+from pydantic import BaseModel, Extra, root_validator
+
+
+class ArxivAPIWrapper(BaseModel):
+    """Wrapper around ArxivAPI.
+
+    To use, you should have the ``arxiv`` python package installed.
+    https://lukasschwab.me/arxiv.py/index.html
+    This wrapper will use the Arxiv API to conduct searches and
+    fetch document summaries. By default, it will return the document summaries
+    of the top-k results of an input search.
+    """
+
+    arxiv_client: Any  #: :meta private:
+    arxiv_exceptions: Any  # :meta private:
+    top_k_results: int = 3
+    ARXIV_MAX_QUERY_LENGTH = 300
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that the python package exists in environment."""
+        try:
+            import arxiv
+
+            values["arxiv_search"] = arxiv.Search
+            values["arxiv_exceptions"] = (
+                arxiv.ArxivError,
+                arxiv.UnexpectedEmptyPageError,
+                arxiv.HTTPError,
+            )
+        except ImportError:
+            raise ValueError(
+                "Could not import arxiv python package. "
+                "Please install it with `pip install arxiv`."
+            )
+        return values
+
+    def run(self, query: str) -> str:
+        """
+        Run Arxiv search and get the document meta information.
+        See https://lukasschwab.me/arxiv.py/index.html#Search
+        See https://lukasschwab.me/arxiv.py/index.html#Result
+        It uses only the most informative fields of document meta information.
+        """
+        try:
+            docs = [
+                f"Published: {result.updated.date()}\nTitle: {result.title}\n"
+                f"Authors: {', '.join(a.name for a in result.authors)}\n"
+                f"Summary: {result.summary}"
+                for result in self.arxiv_search(  # type: ignore
+                    query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
+                ).results()
+            ]
+            return "\n\n".join(docs) if docs else "No good Arxiv Result was found"
+        except self.arxiv_exceptions as ex:
+            return f"Arxiv exception: {ex}"