Skip to content

Commit

Permalink
Fix the failing unit tests. (langchain-ai#13)
Browse files Browse the repository at this point in the history
Update unit test for lazy_load.
Format code using ruff.
  • Loading branch information
Raj725 committed Jan 25, 2024
1 parent 358cf78 commit ad5c324
Showing 1 changed file with 30 additions and 26 deletions.
56 changes: 30 additions & 26 deletions libs/community/tests/unit_tests/document_loaders/test_pebblo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,42 @@
from pathlib import Path

from langchain_core.documents import Document
from langchain_community.document_loaders import CSVLoader

EXAMPLE_DOCS_DIRECTORY = str(Path(__file__).parent.parent / "examples/")
from langchain_community.document_loaders import CSVLoader, PyPDFLoader

EXAMPLE_DOCS_DIRECTORY = str(Path(__file__).parent.parent.parent / "examples/")


def test_pebblo_import() -> None:
"""Test that the Pebblo safe loader can be imported."""
from langchain_community.document_loaders import PebbloSafeLoader # noqa: F401


def test_empty_filebased_loader() -> None:
"""Test basic file based csv loader."""
# Setup
from langchain_community.document_loaders import PebbloSafeLoader

file_path = os.path.join(EXAMPLE_DOCS_DIRECTORY, "test_empty.csv")
expected_docs: list = []

# Exercise
loader = PebbloSafeLoader(
CSVLoader(file_path=file_path),
"dummy_app_name", "dummy_owner","dummy_description"
)
"dummy_app_name",
"dummy_owner",
"dummy_description",
)
result = loader.load()

# Assert
assert result == expected_docs


def test_csv_loader_load_valid_data() -> None:
# Setup
from langchain_community.document_loaders import PebbloSafeLoader

file_path = os.path.join(EXAMPLE_DOCS_DIRECTORY, "test_nominal.csv")
expected_docs = [
Document(
Expand All @@ -45,37 +53,33 @@ def test_csv_loader_load_valid_data() -> None:
# Exercise
loader = PebbloSafeLoader(
CSVLoader(file_path=file_path),
"dummy_app_name", "dummy_owner","dummy_description"
)
"dummy_app_name",
"dummy_owner",
"dummy_description",
)
result = loader.load()

# Assert
assert result == expected_docs

def test_csv_lazy_load():
# Setup

def test_pdf_lazy_load():
# Setup
from langchain_community.document_loaders import PebbloSafeLoader
file_path = os.path.join(EXAMPLE_DOCS_DIRECTORY, "test_nominal.csv")
expected_docs = [
Document(
page_content="column1: value1\ncolumn2: value2\ncolumn3: value3",
metadata={"source": file_path, "row": 0},
),
Document(
page_content="column1: value4\ncolumn2: value5\ncolumn3: value6",
metadata={"source": file_path, "row": 1},
),
]

file_path = os.path.join(
EXAMPLE_DOCS_DIRECTORY, "multi-page-forms-sample-2-page.pdf"
)

# Exercise
loader = PebbloSafeLoader(
CSVLoader(file_path=file_path),
"dummy_app_name", "dummy_owner","dummy_description"
)
PyPDFLoader(file_path=file_path),
"dummy_app_name",
"dummy_owner",
"dummy_description",
)

result = []
for doc in loader.lazy_load():
result.extend(doc)
result = list(loader.lazy_load())

# Assert
assert result == expected_docs
assert len(result) == 2

0 comments on commit ad5c324

Please sign in to comment.