-
Notifications
You must be signed in to change notification settings - Fork 13.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
DocstoreFn
- lookup doc via arbitrary function (#3760)
This **partially** addresses #1524, but it's also useful for some of our use cases. This `DocstoreFn` allows to lookup a document given a function that accepts the `search` string without the need to implement a custom `Docstore`. This could be useful when: * you don't want to implement a `Docstore` just to provide a custom `search` * it's expensive to construct an `InMemoryDocstore`/dict * you retrieve documents from remote sources * you just want to reuse existing objects
- Loading branch information
1 parent
c55ba43
commit 160bfae
Showing
2 changed files
with
42 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from typing import Callable, Union | ||
|
||
from langchain.docstore.base import Docstore | ||
from langchain.schema import Document | ||
|
||
|
||
class DocstoreFn(Docstore): | ||
""" | ||
Langchain Docstore via arbitrary lookup function. | ||
This is useful when: | ||
* it's expensive to construct an InMemoryDocstore/dict | ||
* you retrieve documents from remote sources | ||
* you just want to reuse existing objects | ||
""" | ||
|
||
def __init__( | ||
self, | ||
lookup_fn: Callable[[str], Union[Document, str]], | ||
): | ||
self._lookup_fn = lookup_fn | ||
|
||
def search(self, search: str) -> Document: | ||
r = self._lookup_fn(search) | ||
if isinstance(r, str): | ||
# NOTE: assume the search string is the source ID | ||
return Document(page_content=r, metadata={"source": search}) | ||
elif isinstance(r, Document): | ||
return r | ||
raise ValueError(f"Unexpected type of document {type(r)}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from langchain.docstore.arbitrary_fn import DocstoreFn | ||
from langchain.schema import Document | ||
|
||
|
||
def test_document_found() -> None: | ||
# we use a dict here for simiplicity, but this could be any function | ||
# including a remote lookup | ||
dummy_dict = {"foo": Document(page_content="bar")} | ||
docstore = DocstoreFn(lambda x: dummy_dict[x]) | ||
output = docstore.search("foo") | ||
assert isinstance(output, Document) | ||
assert output.page_content == "bar" |