Skip to content

Commit

Permalink
searx: implement async + helper tool providing json results (#2129)
Browse files Browse the repository at this point in the history
- implemented `arun` and `aresults`. Reuses aiosession if available.
- helper tools `SearxSearchRun` and `SearxSearchResults`
- update doc

Co-authored-by: blob42 <spike@w530>
  • Loading branch information
blob42 and blob42 authored Mar 29, 2023
1 parent ccee1ae commit 031e32f
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 14 deletions.
16 changes: 14 additions & 2 deletions docs/ecosystem/searx.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,24 @@ s.run("what is a large language model?")

### Tool

You can also easily load this wrapper as a Tool (to use with an Agent).
You can also load this wrapper as a Tool (to use with an Agent).

You can do this with:

```python
from langchain.agents import load_tools
tools = load_tools(["searx-search"], searx_host="http://localhost:8888")
tools = load_tools(["searx-search"],
searx_host="http://localhost:8888",
engines=["github"])
```

Note that we could _optionally_ pass custom engines to use.

If you want to obtain results with metadata as *json* you can use:
```python
tools = load_tools(["searx-search-results-json"],
searx_host="http://localhost:8888",
num_results=5)
```

For more information on tools, see [this page](../modules/agents/tools/getting_started.md)
18 changes: 12 additions & 6 deletions langchain/agents/load_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from langchain.tools.base import BaseTool
from langchain.tools.bing_search.tool import BingSearchRun
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
from langchain.tools.human.tool import HumanInputRun
from langchain.tools.python.tool import PythonREPLTool
from langchain.tools.requests.tool import RequestsGetTool
Expand Down Expand Up @@ -167,11 +168,12 @@ def _get_serpapi(**kwargs: Any) -> BaseTool:


def _get_searx_search(**kwargs: Any) -> BaseTool:
return Tool(
name="SearX Search",
description="A meta search engine. Useful for when you need to answer questions about current events. Input should be a search query.",
func=SearxSearchWrapper(**kwargs).run,
)
return SearxSearchRun(wrapper=SearxSearchWrapper(**kwargs))


def _get_searx_search_results_json(**kwargs: Any) -> BaseTool:
wrapper_kwargs = {k: v for k, v in kwargs.items() if k != "num_results"}
return SearxSearchResults(wrapper=SearxSearchWrapper(**wrapper_kwargs), **kwargs)


def _get_bing_search(**kwargs: Any) -> BaseTool:
Expand All @@ -195,10 +197,14 @@ def _get_human_tool(**kwargs: Any) -> BaseTool:
_get_google_search_results_json,
["google_api_key", "google_cse_id", "num_results"],
),
"searx-search-results-json": (
_get_searx_search_results_json,
["searx_host", "engines", "num_results", "aiosession"],
),
"bing-search": (_get_bing_search, ["bing_subscription_key", "bing_search_url"]),
"google-serper": (_get_google_serper, ["serper_api_key"]),
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
"searx-search": (_get_searx_search, ["searx_host"]),
"searx-search": (_get_searx_search, ["searx_host", "engines", "aiosession"]),
"wikipedia": (_get_wikipedia, ["top_k_results"]),
"human": (_get_human_tool, ["prompt_func", "input_func"]),
}
Expand Down
Empty file.
51 changes: 51 additions & 0 deletions langchain/tools/searx_search/tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Tool for the SearxNG search API."""
from pydantic import Extra

from langchain.tools.base import BaseTool
from langchain.utilities.searx_search import SearxSearchWrapper


class SearxSearchRun(BaseTool):
"""Tool that adds the capability to query a Searx instance."""

name = "Searx Search"
description = (
"A meta search engine."
"Useful for when you need to answer questions about current events."
"Input should be a search query."
)
wrapper: SearxSearchWrapper

def _run(self, query: str) -> str:
"""Use the tool."""
return self.wrapper.run(query)

async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
return await self.wrapper.arun(query)


class SearxSearchResults(BaseTool):
"""Tool that has capability to query a Searx instance and get back json."""

name = "Searx Search"
description = (
"A meta search engine."
"Useful for when you need to answer questions about current events."
"Input should be a search query. Output is a JSON array of the query results"
)
wrapper: SearxSearchWrapper
num_results: int = 4

class Config:
"""Pydantic config."""

extra = Extra.allow

def _run(self, query: str) -> str:
"""Use the tool."""
return str(self.wrapper.results(query, self.num_results))

async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
return (await self.wrapper.aresults(query, self.num_results)).__str__()
122 changes: 116 additions & 6 deletions langchain/utilities/searx_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
-----------
In order to use this tool you need to provide the searx host. This can be done
In order to use this utility you need to provide the searx host. This can be done
by passing the named parameter :attr:`searx_host <SearxSearchWrapper.searx_host>`
or exporting the environment variable SEARX_HOST.
Note: this is the only required parameter.
Expand Down Expand Up @@ -129,6 +129,7 @@
import json
from typing import Any, Dict, List, Optional

import aiohttp
import requests
from pydantic import BaseModel, Extra, Field, PrivateAttr, root_validator, validator

Expand Down Expand Up @@ -204,6 +205,13 @@ class SearxSearchWrapper(BaseModel):
engines: Optional[List[str]] = []
query_suffix: Optional[str] = ""
k: int = 10
aiosession: Optional[Any] = None

class Config:
"""Configuration for this pydantic object."""

extra = Extra.forbid
arbitrary_types_allowed = True

@validator("unsecure")
def disable_ssl_warnings(cls, v: bool) -> bool:
Expand Down Expand Up @@ -244,11 +252,6 @@ def validate_params(cls, values: Dict) -> Dict:

return values

class Config:
"""Configuration for this pydantic object."""

extra = Extra.forbid

def _searx_api_query(self, params: dict) -> SearxResults:
"""Actual request to searx API."""
raw_result = requests.get(
Expand All @@ -264,6 +267,33 @@ def _searx_api_query(self, params: dict) -> SearxResults:
self._result = res
return res

async def _asearx_api_query(self, params: dict) -> SearxResults:
if not self.aiosession:
async with aiohttp.ClientSession() as session:
async with session.get(
self.searx_host,
headers=self.headers,
params=params,
ssl=(lambda: False if self.unsecure else None)(),
) as response:
if not response.ok:
raise ValueError("Searx API returned an error: ", response.text)
result = SearxResults(await response.text())
self._result = result
else:
async with self.aiosession.get(
self.searx_host,
headers=self.headers,
params=params,
verify=not self.unsecure,
) as response:
if not response.ok:
raise ValueError("Searx API returned an error: ", response.text)
result = SearxResults(await response.text())
self._result = result

return result

def run(
self,
query: str,
Expand All @@ -281,6 +311,13 @@ def run(
engines: List of engines to use for the query.
**kwargs: extra parameters to pass to the searx API.
Returns:
str: The result of the query.
Raises:
ValueError: If an error occured with the query.
Example:
This will make a query to the qwant engine:
Expand Down Expand Up @@ -321,6 +358,41 @@ def run(

return toret

async def arun(
self,
query: str,
engines: Optional[List[str]] = None,
query_suffix: Optional[str] = "",
**kwargs: Any,
) -> str:
"""Asynchronously version of `run`."""
_params = {
"q": query,
}
params = {**self.params, **_params, **kwargs}

if self.query_suffix and len(self.query_suffix) > 0:
params["q"] += " " + self.query_suffix

if isinstance(query_suffix, str) and len(query_suffix) > 0:
params["q"] += " " + query_suffix

if isinstance(engines, list) and len(engines) > 0:
params["engines"] = ",".join(engines)

res = await self._asearx_api_query(params)

if len(res.answers) > 0:
toret = res.answers[0]

# only return the content of the results list
elif len(res.results) > 0:
toret = "\n\n".join([r.get("content", "") for r in res.results[: self.k]])
else:
toret = "No good search result found"

return toret

def results(
self,
query: str,
Expand Down Expand Up @@ -383,3 +455,41 @@ def results(
}
for result in results
]

async def aresults(
self,
query: str,
num_results: int,
engines: Optional[List[str]] = None,
query_suffix: Optional[str] = "",
**kwargs: Any,
) -> List[Dict]:
"""Asynchronously query with json results.
Uses aiohttp. See `results` for more info.
"""
_params = {
"q": query,
}
params = {**self.params, **_params, **kwargs}

if self.query_suffix and len(self.query_suffix) > 0:
params["q"] += " " + self.query_suffix
if isinstance(query_suffix, str) and len(query_suffix) > 0:
params["q"] += " " + query_suffix
if isinstance(engines, list) and len(engines) > 0:
params["engines"] = ",".join(engines)
results = (await self._asearx_api_query(params)).results[:num_results]
if len(results) == 0:
return [{"Result": "No good Search Result was found"}]

return [
{
"snippet": result.get("content", ""),
"title": result["title"],
"link": result["url"],
"engines": result["engines"],
"category": result["category"],
}
for result in results
]

0 comments on commit 031e32f

Please sign in to comment.