From 0dc16248c48d5d016bf5d313657dad84f6220b99 Mon Sep 17 00:00:00 2001 From: alcholiclg Date: Tue, 15 Jul 2025 17:59:06 +0800 Subject: [PATCH 1/2] abandon download.py --- ms_agent/utils/download.py | 37 ------------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 ms_agent/utils/download.py diff --git a/ms_agent/utils/download.py b/ms_agent/utils/download.py deleted file mode 100644 index 602f1fff5..000000000 --- a/ms_agent/utils/download.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Model download script for huggingface models. -This script downloads required models from Hugging Face for the application. -""" - -import logging -import os - -from huggingface_hub import snapshot_download - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') -logger = logging.getLogger(__name__) - -# Configuration -os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com' -REPO_IDS = ['ds4sd/DocumentFigureClassifier', 'ds4sd/docling-models'] - - -def download_models(): - """Download all required models from Hugging Face.""" - logger.info(f'Starting download of {len(REPO_IDS)} models') - - for repo_id in REPO_IDS: - logger.info(f'Downloading model: {repo_id}') - try: - model_path = snapshot_download(repo_id=repo_id) - logger.info(f'Successfully downloaded {repo_id} to {model_path}') - except Exception as e: - logger.error(f'Failed to download {repo_id}: {str(e)}') - raise - - -if __name__ == '__main__': - download_models() From 793dbeace3e87f6be258941fcd6f7d5b1262467d Mon Sep 17 00:00:00 2001 From: alcholiclg Date: Fri, 18 Jul 2025 20:15:36 +0800 Subject: [PATCH 2/2] fix bug: return type mismatch --- ms_agent/tools/docling/doc_loader.py | 4 ++-- ms_agent/utils/utils.py | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/ms_agent/tools/docling/doc_loader.py b/ms_agent/tools/docling/doc_loader.py index 46a46efe3..6d8bad654 100644 --- a/ms_agent/tools/docling/doc_loader.py +++ b/ms_agent/tools/docling/doc_loader.py @@ -41,7 +41,7 @@ def html_handle_figure(self, element: Tag, doc: DoclingDocument) -> None: if img_url: if img_url.startswith('data:'): - img_pil, ext = load_image_from_uri_to_pil(img_url) + img_pil = load_image_from_uri_to_pil(img_url) else: if not img_url.startswith('http'): img_url = validate_url(img_url=img_url, backend=self) @@ -98,7 +98,7 @@ def html_handle_image(self, element: Tag, doc: DoclingDocument) -> None: if img_url: if img_url.startswith('data:'): - img_pil, ext = load_image_from_uri_to_pil(img_url) + img_pil = load_image_from_uri_to_pil(img_url) else: if not img_url.startswith('http'): img_url = validate_url(img_url=img_url, backend=self) diff --git a/ms_agent/utils/utils.py b/ms_agent/utils/utils.py index 3ba7116e7..6cd0598dc 100644 --- a/ms_agent/utils/utils.py +++ b/ms_agent/utils/utils.py @@ -334,7 +334,7 @@ def load_image_from_url_to_pil(url: str) -> 'Image.Image': return None -def load_image_from_uri_to_pil(uri: str) -> tuple: +def load_image_from_uri_to_pil(uri: str) -> 'Image.Image': """ Load image from URI as a PIL Image object and extract its format extension. URI format: data:[][;base64], @@ -352,10 +352,8 @@ def load_image_from_uri_to_pil(uri: str) -> tuple: raw = base64.b64decode(encoded) else: raw = encoded.encode('utf-8') - m = re.match(r'data:(image/[^;]+)', header) - ext = m.group(1).split('/')[-1] if m else 'bin' img = Image.open(BytesIO(raw)) - return img, ext + return img except ValueError as e: print(f'Error parsing URI format: {e}') return None @@ -363,7 +361,7 @@ def load_image_from_uri_to_pil(uri: str) -> tuple: print(f'Error decoding base64 data: {e}') return None except IOError as e: - print(f'Error opening image: {e}') + print(f'Error opening image with PIL: {e}') return None except Exception as e: print(f'Unexpected error loading image from URI: {e}')