From dad69b7960ad5005ae7d57c93e28ecda26d9760a Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Wed, 15 Oct 2025 15:17:43 +0530 Subject: [PATCH] fix: ensure the decoded text from the document is utf-8 compat (#223) resolves #222 --------- Signed-off-by: Anupam Kumar --- .github/workflows/integration-test.yml | 2 +- context_chat_backend/chain/ingest/doc_loader.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 2ca995b..a998906 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -61,7 +61,7 @@ jobs: # do not stop on another job's failure fail-fast: false matrix: - php-versions: [ '8.1' ] + php-versions: [ '8.2' ] databases: [ 'pgsql' ] server-versions: [ 'stable30', 'stable31' ] diff --git a/context_chat_backend/chain/ingest/doc_loader.py b/context_chat_backend/chain/ingest/doc_loader.py index a692f62..efb81b6 100644 --- a/context_chat_backend/chain/ingest/doc_loader.py +++ b/context_chat_backend/chain/ingest/doc_loader.py @@ -129,7 +129,7 @@ def decode_source(source: UploadFile) -> str | None: if _loader_map.get(mimetype): result = _loader_map[mimetype](source.file) source.file.close() - return result + return result.encode('utf-8', 'ignore').decode('utf-8', 'ignore') result = source.file.read().decode('utf-8', 'ignore') source.file.close()