Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Module for the DefaultConfluenceExtractor class."""

import logging
from langchain_community.document_loaders import ConfluenceLoader

from extractor_api_lib.impl.types.extractor_types import ExtractorTypes
Expand All @@ -10,6 +11,8 @@
ConfluenceLangchainDocument2InformationPiece,
)

logger = logging.getLogger(__name__)


class ConfluenceExtractor(InformationExtractor):
"""Implementation of the InformationExtractor interface for confluence."""
Expand Down Expand Up @@ -54,6 +57,13 @@ async def aextract_content(
confluence_loader_parameters = {
x.key: int(x.value) if x.value.isdigit() else x.value for x in extraction_parameters.kwargs
}
if not confluence_loader_parameters.get("max_pages") or isinstance(
confluence_loader_parameters.get("max_pages"), str
):
logging.warning(
"max_pages parameter is not set or invalid discarding it. ConfluenceLoader will use default value."
)
confluence_loader_parameters.pop("max_pages")
# Drop the document_name parameter as it is not used by the ConfluenceLoader
if "document_name" in confluence_loader_parameters:
confluence_loader_parameters.pop("document_name", None)
Expand Down