From 16f5892ccffbeaa7150050925c9955fe7236e703 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Tue, 2 Sep 2025 07:22:00 +0200 Subject: [PATCH 1/3] fix: remove unused max_pages parameter from confluence loader parameters --- .../impl/extractors/confluence_extractor.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py index 8694aa16..701daf48 100644 --- a/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py +++ b/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py @@ -54,6 +54,11 @@ async def aextract_content( confluence_loader_parameters = { x.key: int(x.value) if x.value.isdigit() else x.value for x in extraction_parameters.kwargs } + if ( + not confluence_loader_parameters.get("max_pages") + or isinstance(confluence_loader_parameters.get("max_pages"), str) + ): + confluence_loader_parameters.pop("max_pages") # Drop the document_name parameter as it is not used by the ConfluenceLoader if "document_name" in confluence_loader_parameters: confluence_loader_parameters.pop("document_name", None) From ac86c339c8fc3dbf4bc5b8b7ca1edae7ad895c63 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Tue, 2 Sep 2025 07:26:23 +0200 Subject: [PATCH 2/3] refactor: black --- .../impl/extractors/confluence_extractor.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py index 701daf48..4a29f772 100644 --- a/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py +++ b/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py @@ -54,9 +54,8 @@ async def aextract_content( confluence_loader_parameters = { x.key: int(x.value) if x.value.isdigit() else x.value for x in extraction_parameters.kwargs } - if ( - not confluence_loader_parameters.get("max_pages") - or isinstance(confluence_loader_parameters.get("max_pages"), str) + if not confluence_loader_parameters.get("max_pages") or isinstance( + confluence_loader_parameters.get("max_pages"), str ): confluence_loader_parameters.pop("max_pages") # Drop the document_name parameter as it is not used by the ConfluenceLoader From 6646c359681ed3cf9a7bd5ab0839e3204119ea1e Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Tue, 2 Sep 2025 11:12:42 +0200 Subject: [PATCH 3/3] fix: add logging for invalid max_pages parameter in ConfluenceExtractor --- .../impl/extractors/confluence_extractor.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py index 4a29f772..0c025062 100644 --- a/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py +++ b/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py @@ -1,5 +1,6 @@ """Module for the DefaultConfluenceExtractor class.""" +import logging from langchain_community.document_loaders import ConfluenceLoader from extractor_api_lib.impl.types.extractor_types import ExtractorTypes @@ -10,6 +11,8 @@ ConfluenceLangchainDocument2InformationPiece, ) +logger = logging.getLogger(__name__) + class ConfluenceExtractor(InformationExtractor): """Implementation of the InformationExtractor interface for confluence.""" @@ -57,6 +60,9 @@ async def aextract_content( if not confluence_loader_parameters.get("max_pages") or isinstance( confluence_loader_parameters.get("max_pages"), str ): + logging.warning( + "max_pages parameter is not set or invalid discarding it. ConfluenceLoader will use default value." + ) confluence_loader_parameters.pop("max_pages") # Drop the document_name parameter as it is not used by the ConfluenceLoader if "document_name" in confluence_loader_parameters: