diff --git a/context_chat_backend/chain/ingest/doc_loader.py b/context_chat_backend/chain/ingest/doc_loader.py index 38ed438..a692f62 100644 --- a/context_chat_backend/chain/ingest/doc_loader.py +++ b/context_chat_backend/chain/ingest/doc_loader.py @@ -16,6 +16,7 @@ from odfdo import Document from pandas import read_csv, read_excel from pypdf import PdfReader +from pypdf.errors import FileNotDecryptedError as PdfFileNotDecryptedError from striprtf import striprtf logger = logging.getLogger('ccb.doc_loader') @@ -133,6 +134,9 @@ def decode_source(source: UploadFile) -> str | None: result = source.file.read().decode('utf-8', 'ignore') source.file.close() return result + except PdfFileNotDecryptedError: + logger.warning(f'PDF file ({source.filename}) is encrypted and cannot be read') + return None except Exception: logger.exception(f'Error decoding source file ({source.filename})', stack_info=True) return None diff --git a/requirements.txt b/requirements.txt index d11ecb6..d8cfaa2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ apscheduler chromadb +cryptography ctransformers epub2txt fastapi