Use default load() implementation in doc loaders

langchain-ai · Mar 1, 2024 · 707b603 · 707b603
1 parent 1deb8ca
commit 707b603
Show file tree

Hide file tree

Showing 49 changed files with 22 additions and 253 deletions.
diff --git a/libs/community/langchain_community/document_loaders/acreom.py b/libs/community/langchain_community/document_loaders/acreom.py
@@ -1,6 +1,6 @@
 import re
 from pathlib import Path
-from typing import Iterator, List
+from typing import Iterator
 
 from langchain_core.documents import Document
 
@@ -74,6 +74,3 @@ def lazy_load(self) -> Iterator[Document]:
             }
 
             yield Document(page_content=text, metadata=metadata)
-
-    def load(self) -> List[Document]:
-        return list(self.lazy_load())
diff --git a/libs/community/langchain_community/document_loaders/airbyte.py b/libs/community/langchain_community/document_loaders/airbyte.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Iterator, List, Mapping, Optional
+from typing import Any, Callable, Iterator, Mapping, Optional
 
 from langchain_core.documents import Document
 from langchain_core.utils.utils import guard_import
@@ -53,9 +53,6 @@ def _handle_record(
         self._stream_name = stream_name
         self._state = state
 
-    def load(self) -> List[Document]:
-        return list(self.lazy_load())
-
     def lazy_load(self) -> Iterator[Document]:
         return self._integration._load_data(
             stream_name=self._stream_name, state=self._state

diff --git a/libs/community/langchain_community/document_loaders/airtable.py b/libs/community/langchain_community/document_loaders/airtable.py
@@ -1,4 +1,4 @@
-from typing import Iterator, List
+from typing import Iterator
 
 from langchain_core.documents import Document
 
@@ -34,7 +34,3 @@ def lazy_load(self) -> Iterator[Document]:
                     "table_id": self.table_id,
                 },
             )
-
-    def load(self) -> List[Document]:
-        """Load Documents from table."""
-        return list(self.lazy_load())
diff --git a/libs/community/langchain_community/document_loaders/arcgis_loader.py b/libs/community/langchain_community/document_loaders/arcgis_loader.py
@@ -148,7 +148,3 @@ def lazy_load(self) -> Iterator[Document]:
                     )
 
             yield Document(page_content=page_content, metadata=metadata)
-
-    def load(self) -> List[Document]:
-        """Load all records from FeatureLayer."""
-        return list(self.lazy_load())
diff --git a/libs/community/langchain_community/document_loaders/astradb.py b/libs/community/langchain_community/document_loaders/astradb.py
@@ -76,9 +76,6 @@ def __init__(
         self.nb_prefetched = nb_prefetched
         self.extraction_function = extraction_function
 
-    def load(self) -> List[Document]:
-        return list(self.lazy_load())
-
     def lazy_load(self) -> Iterator[Document]:
         for doc in self.collection.paginated_find(
             filter=self.filter,

diff --git a/libs/community/langchain_community/document_loaders/athena.py b/libs/community/langchain_community/document_loaders/athena.py
@@ -157,7 +157,3 @@ def lazy_load(self) -> Iterator[Document]:
             }
             doc = Document(page_content=page_content, metadata=metadata)
             yield doc
-
-    def load(self) -> List[Document]:
-        """Load data into document objects."""
-        return list(self.lazy_load())
diff --git a/libs/community/langchain_community/document_loaders/azure_ai_data.py b/libs/community/langchain_community/document_loaders/azure_ai_data.py
@@ -1,4 +1,4 @@
-from typing import Iterator, List, Optional
+from typing import Iterator, Optional
 
 from langchain_community.docstore.document import Document
 from langchain_community.document_loaders.base import BaseLoader
@@ -16,10 +16,6 @@ def __init__(self, url: str, glob: Optional[str] = None):
         self.glob_pattern = glob
         """Optional glob pattern to select files. Defaults to None."""
 
-    def load(self) -> List[Document]:
-        """Load documents."""
-        return list(self.lazy_load())
-
     def lazy_load(self) -> Iterator[Document]:
         """A lazy loader for Documents."""
         try:

diff --git a/libs/community/langchain_community/document_loaders/baiducloud_bos_directory.py b/libs/community/langchain_community/document_loaders/baiducloud_bos_directory.py
@@ -1,4 +1,4 @@
-from typing import Any, Iterator, List
+from typing import Any, Iterator
 
 from langchain_core.documents import Document
 
@@ -18,9 +18,6 @@ def __init__(self, conf: Any, bucket: str, prefix: str = ""):
         self.bucket = bucket
         self.prefix = prefix
 
-    def load(self) -> List[Document]:
-        return list(self.lazy_load())
-
     def lazy_load(self) -> Iterator[Document]:
         """Load documents."""
         try:

diff --git a/libs/community/langchain_community/document_loaders/baiducloud_bos_file.py b/libs/community/langchain_community/document_loaders/baiducloud_bos_file.py
@@ -1,7 +1,7 @@
 import logging
 import os
 import tempfile
-from typing import Any, Iterator, List
+from typing import Any, Iterator
 
 from langchain_core.documents import Document
 
@@ -24,9 +24,6 @@ def __init__(self, conf: Any, bucket: str, key: str):
         self.bucket = bucket
         self.key = key
 
-    def load(self) -> List[Document]:
-        return list(self.lazy_load())
-
     def lazy_load(self) -> Iterator[Document]:
         """Load documents."""
         try:

diff --git a/libs/community/langchain_community/document_loaders/bibtex.py b/libs/community/langchain_community/document_loaders/bibtex.py
@@ -96,16 +96,3 @@ def lazy_load(self) -> Iterator[Document]:
             doc = self._load_entry(entry)
             if doc:
                 yield doc
-
-    def load(self) -> List[Document]:
-        """Load bibtex file documents from the given bibtex file path.
-
-        See https://bibtexparser.readthedocs.io/en/master/
-
-        Args:
-            file_path: the path to the bibtex file
-
-        Returns:
-            a list of documents with the document.page_content in text format
-        """
-        return list(self.lazy_load())
diff --git a/libs/community/langchain_community/document_loaders/browserless.py b/libs/community/langchain_community/document_loaders/browserless.py
@@ -61,7 +61,3 @@ def lazy_load(self) -> Iterator[Document]:
                         "source": url,
                     },
                 )
-
-    def load(self) -> List[Document]:
-        """Load Documents from URLs."""
-        return list(self.lazy_load())
diff --git a/libs/community/langchain_community/document_loaders/cassandra.py b/libs/community/langchain_community/document_loaders/cassandra.py
@@ -5,7 +5,6 @@
     Any,
     Callable,
     Iterator,
-    List,
     Optional,
     Sequence,
     Union,
@@ -106,9 +105,6 @@ def __init__(
         if query_execution_profile is not _NOT_SET:
             self.query_kwargs["execution_profile"] = query_execution_profile
 
-    def load(self) -> List[Document]:
-        return list(self.lazy_load())
-
     def lazy_load(self) -> Iterator[Document]:
         for row in self.session.execute(self.query, **self.query_kwargs):
             metadata = self.metadata.copy()

diff --git a/libs/community/langchain_community/document_loaders/chromium.py b/libs/community/langchain_community/document_loaders/chromium.py
@@ -78,14 +78,3 @@ def lazy_load(self) -> Iterator[Document]:
             html_content = asyncio.run(self.ascrape_playwright(url))
             metadata = {"source": url}
             yield Document(page_content=html_content, metadata=metadata)
-
-    def load(self) -> List[Document]:
-        """
-        Load and return all Documents from the provided URLs.
-
-        Returns:
-            List[Document]: A list of Document objects
-            containing the scraped content from each URL.
-
-        """
-        return list(self.lazy_load())
diff --git a/libs/community/langchain_community/document_loaders/couchbase.py b/libs/community/langchain_community/document_loaders/couchbase.py
@@ -68,10 +68,6 @@ def __init__(
         self.page_content_fields = page_content_fields
         self.metadata_fields = metadata_fields
 
-    def load(self) -> List[Document]:
-        """Load Couchbase data into Document objects."""
-        return list(self.lazy_load())
-
     def lazy_load(self) -> Iterator[Document]:
         """Load Couchbase data into Document objects lazily."""
         from datetime import timedelta

diff --git a/libs/community/langchain_community/document_loaders/dataframe.py b/libs/community/langchain_community/document_loaders/dataframe.py
@@ -1,4 +1,4 @@
-from typing import Any, Iterator, List
+from typing import Any, Iterator
 
 from langchain_core.documents import Document
 
@@ -26,10 +26,6 @@ def lazy_load(self) -> Iterator[Document]:
             metadata.pop(self.page_content_column)
             yield Document(page_content=text, metadata=metadata)
 
-    def load(self) -> List[Document]:
-        """Load full dataframe."""
-        return list(self.lazy_load())
-
 
 class DataFrameLoader(BaseDataFrameLoader):
     """Load `Pandas` DataFrame."""

diff --git a/libs/community/langchain_community/document_loaders/doc_intelligence.py b/libs/community/langchain_community/document_loaders/doc_intelligence.py
@@ -1,4 +1,4 @@
-from typing import Iterator, List, Optional
+from typing import Iterator, Optional
 
 from langchain_core.documents import Document
 
@@ -77,10 +77,6 @@ def __init__(
             mode=mode,
         )
 
-    def load(self) -> List[Document]:
-        """Load given path as pages."""
-        return list(self.lazy_load())
-
     def lazy_load(
         self,
     ) -> Iterator[Document]:

diff --git a/libs/community/langchain_community/document_loaders/etherscan.py b/libs/community/langchain_community/document_loaders/etherscan.py
@@ -71,10 +71,6 @@ def lazy_load(self) -> Iterator[Document]:
         for doc in result:
             yield doc
 
-    def load(self) -> List[Document]:
-        """Load transactions from spcifc account by Etherscan."""
-        return list(self.lazy_load())
-
     def getNormTx(self) -> List[Document]:
         url = (
             f"https://api.etherscan.io/api?module=account&action=txlist&address={self.account_address}"

diff --git a/libs/community/langchain_community/document_loaders/fauna.py b/libs/community/langchain_community/document_loaders/fauna.py
@@ -1,4 +1,4 @@
-from typing import Iterator, List, Optional, Sequence
+from typing import Iterator, Optional, Sequence
 
 from langchain_core.documents import Document
 
@@ -28,9 +28,6 @@ def __init__(
         self.secret = secret
         self.metadata_fields = metadata_fields
 
-    def load(self) -> List[Document]:
-        return list(self.lazy_load())
-
     def lazy_load(self) -> Iterator[Document]:
         try:
             from fauna import Page, fql

diff --git a/libs/community/langchain_community/document_loaders/generic.py b/libs/community/langchain_community/document_loaders/generic.py
@@ -115,10 +115,6 @@ def lazy_load(
         for blob in self.blob_loader.yield_blobs():
             yield from self.blob_parser.lazy_parse(blob)
 
-    def load(self) -> List[Document]:
-        """Load all documents."""
-        return list(self.lazy_load())
-
     def load_and_split(
         self, text_splitter: Optional[TextSplitter] = None
     ) -> List[Document]:

diff --git a/libs/community/langchain_community/document_loaders/geodataframe.py b/libs/community/langchain_community/document_loaders/geodataframe.py
@@ -1,4 +1,4 @@
-from typing import Any, Iterator, List
+from typing import Any, Iterator
 
 from langchain_core.documents import Document
 
@@ -67,7 +67,3 @@ def lazy_load(self) -> Iterator[Document]:
 
             # using WKT instead of str() to help GIS system interoperability
             yield Document(page_content=geom.wkt, metadata=metadata)
-
-    def load(self) -> List[Document]:
-        """Load full dataframe."""
-        return list(self.lazy_load())
diff --git a/libs/community/langchain_community/document_loaders/github.py b/libs/community/langchain_community/document_loaders/github.py
@@ -127,32 +127,6 @@ def lazy_load(self) -> Iterator[Document]:
             else:
                 url = None
 
-    def load(self) -> List[Document]:
-        """
-        Get issues of a GitHub repository.
-
-        Returns:
-            A list of Documents with attributes:
-                - page_content
-                - metadata
-                    - url
-                    - title
-                    - creator
-                    - created_at
-                    - last_update_time
-                    - closed_time
-                    - number of comments
-                    - state
-                    - labels
-                    - assignee
-                    - assignees
-                    - milestone
-                    - locked
-                    - number
-                    - is_pull_request
-        """
-        return list(self.lazy_load())
-
     def parse_issue(self, issue: dict) -> Document:
         """Create Document objects from a list of GitHub issues."""
         metadata = {

diff --git a/libs/community/langchain_community/document_loaders/hugging_face_dataset.py b/libs/community/langchain_community/document_loaders/hugging_face_dataset.py
@@ -1,5 +1,5 @@
 import json
-from typing import Iterator, List, Mapping, Optional, Sequence, Union
+from typing import Iterator, Mapping, Optional, Sequence, Union
 
 from langchain_core.documents import Document
 
@@ -84,10 +84,6 @@ def lazy_load(
             for row in dataset[key]
         )
 
-    def load(self) -> List[Document]:
-        """Load documents."""
-        return list(self.lazy_load())
-
     def parse_obj(self, page_content: Union[str, object]) -> str:
         if isinstance(page_content, object):
             return json.dumps(page_content)

diff --git a/libs/community/langchain_community/document_loaders/hugging_face_model.py b/libs/community/langchain_community/document_loaders/hugging_face_model.py
@@ -106,7 +106,3 @@ def lazy_load(self) -> Iterator[Document]:
                 page_content=readme_content,
                 metadata=model,
             )
-
-    def load(self) -> List[Document]:
-        """Load model information, including README content."""
-        return list(self.lazy_load())
diff --git a/libs/community/langchain_community/document_loaders/joplin.py b/libs/community/langchain_community/document_loaders/joplin.py
@@ -91,6 +91,3 @@ def _convert_date(self, date: int) -> str:
 
     def lazy_load(self) -> Iterator[Document]:
         yield from self._get_notes()
-
-    def load(self) -> List[Document]:
-        return list(self.lazy_load())
diff --git a/libs/community/langchain_community/document_loaders/larksuite.py b/libs/community/langchain_community/document_loaders/larksuite.py
@@ -1,6 +1,6 @@
 import json
 import urllib.request
-from typing import Any, Iterator, List
+from typing import Any, Iterator
 
 from langchain_core.documents import Document
 
@@ -46,7 +46,3 @@ def lazy_load(self) -> Iterator[Document]:
             "title": metadata_json["data"]["document"]["title"],
         }
         yield Document(page_content=text, metadata=metadata)
-
-    def load(self) -> List[Document]:
-        """Load LarkSuite (FeiShu) document."""
-        return list(self.lazy_load())
diff --git a/libs/community/langchain_community/document_loaders/max_compute.py b/libs/community/langchain_community/document_loaders/max_compute.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, Iterator, List, Optional, Sequence
+from typing import Any, Iterator, Optional, Sequence
 
 from langchain_core.documents import Document
 
@@ -78,6 +78,3 @@ def lazy_load(self) -> Iterator[Document]:
             else:
                 metadata = {k: v for k, v in row.items() if k not in page_content_data}
             yield Document(page_content=page_content, metadata=metadata)
-
-    def load(self) -> List[Document]:
-        return list(self.lazy_load())
diff --git a/libs/community/langchain_community/document_loaders/mediawikidump.py b/libs/community/langchain_community/document_loaders/mediawikidump.py
@@ -1,6 +1,6 @@
 import logging
 from pathlib import Path
-from typing import Iterator, List, Optional, Sequence, Union
+from typing import Iterator, Optional, Sequence, Union
 
 from langchain_core.documents import Document
 
@@ -87,11 +87,6 @@ def _load_single_page_from_dump(self, page) -> Document:  # type: ignore[no-unty
             metadata = {"source": page.title}
             return Document(page_content=text, metadata=metadata)
 
-    def load(self) -> List[Document]:
-        """Load from a file path."""
-
-        return [doc for doc in self.lazy_load()]
-
     def lazy_load(
         self,
     ) -> Iterator[Document]: