From 0a9b106bec01e6729d2badec53fc286edfec37e2 Mon Sep 17 00:00:00 2001 From: "ada.drx" Date: Tue, 30 Apr 2024 16:14:40 +0800 Subject: [PATCH 01/13] build llama-index-node-parser-relational-dashscope --- .../.gitignore | 153 ++++++++++++++++++ .../BUILD | 1 + .../Makefile | 17 ++ .../README.md | 1 + .../relational/dashscope/__init__.py | 4 + .../node_parser/relational/dashscope/base.py | 118 ++++++++++++++ .../pyproject.toml | 57 +++++++ .../tests/__init__.py | 0 .../tests/documents.json | 19 +++ .../test_node_parser_relational_dashscope.py | 26 +++ 10 files changed, 396 insertions(+) create mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/.gitignore create mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/BUILD create mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/Makefile create mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/README.md create mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/__init__.py create mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py create mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml create mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/__init__.py create mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/documents.json create mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/.gitignore b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/.gitignore new file mode 100644 index 0000000000000..990c18de22908 --- /dev/null +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/.gitignore @@ -0,0 +1,153 @@ +llama_index/_static +.DS_Store +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +bin/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +etc/ +include/ +lib/ +lib64/ +parts/ +sdist/ +share/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +.ruff_cache + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints +notebooks/ + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pyvenv.cfg + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Jetbrains +.idea +modules/ +*.swp + +# VsCode +.vscode + +# pipenv +Pipfile +Pipfile.lock + +# pyright +pyrightconfig.json diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/BUILD b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/Makefile b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/Makefile new file mode 100644 index 0000000000000..b9eab05aa3706 --- /dev/null +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/Makefile @@ -0,0 +1,17 @@ +GIT_ROOT ?= $(shell git rev-parse --show-toplevel) + +help: ## Show all Makefile targets. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' + +format: ## Run code autoformatters (black). + pre-commit install + git ls-files | xargs pre-commit run black --files + +lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy + pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files + +test: ## Run tests via pytest. + pytest tests + +watch-docs: ## Build and watch documentation. + sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/README.md b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/README.md new file mode 100644 index 0000000000000..0eb24597f9436 --- /dev/null +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/README.md @@ -0,0 +1 @@ +# LlamaIndex Node_Parser-File Integration: Dashscope diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/__init__.py b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/__init__.py new file mode 100644 index 0000000000000..dff0bac5358df --- /dev/null +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/__init__.py @@ -0,0 +1,4 @@ +from llama_index.node_parser.relational.dashscope.base import DashScopeJsonNodeParser + + +__all__ = ["DashScopeJsonNodeParser"] diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py new file mode 100644 index 0000000000000..0ae3d6d8e8df0 --- /dev/null +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py @@ -0,0 +1,118 @@ +from typing import Any, Callable, List, Optional, Dict +import logging +import requests +import os +import json + +from llama_index.core.bridge.pydantic import Field +from llama_index.core.node_parser.relational.base_element import BaseElementNodeParser, Element +from llama_index.core.schema import BaseNode, TextNode + + +class DashScopeJsonNodeParser(BaseElementNodeParser): + """DashScope Json format element node parser. + + Splits a json format document from DashScope Parse into Text Nodes and Index Nodes + corresponding to embedded objects (e.g. tables). + """ + service_url: str = Field( + default="https://int-dashscope.aliyuncs.com/api/v1/indeces/component/configed_transformations/spliter", + description="URL of the service endpoint." + ) + try_count_limit: int = Field( + default=10, + description="Maximum number of retry attempts." + ) + chunk_size: int = Field( + default=500, + description="Size of each chunk to process." + ) + overlap_size: int = Field( + default=100, + description="Overlap size between consecutive chunks." + ) + separator: str = Field( + default=" |,|,|。|?|!|\n|\?|\!", + description="Separator characters for splitting texts." + ) + pip: bool = Field( + default=False, + description="Flag to enable or disable PIP." + ) + input_type: str = Field( + default="idp", + description="parse format type." + ) + + @classmethod + def class_name(cls) -> str: + return "DashScopeJsonNodeParser" + + def get_nodes_from_node(self, node: TextNode) -> List[BaseNode]: + """Get nodes from node.""" + ftype = node.metadata.get('parse_fmt_type', self.input_type) + assert ftype in ['DASHCOPE_DOCMIND', 'idp'], f"Unexpected parse_fmt_type: {node.metadata.get('parse_fmt_type', '')}" + + ftype_map = { + "DASHCOPE_DOCMIND": "idp", + } + my_input = dict() + my_input["text"] = node.get_content() + my_input["file_type"] = ftype_map.get(ftype, ftype) + my_input["chunk_size"] = self.chunk_size + my_input["overlap_size"] = self.overlap_size + my_input["language"] = "cn" + my_input["separator"] = self.separator + my_input["pip"] = self.pip + + try_count = 0 + response_text = self.post_service(my_input) + while response_text is None and try_count < self.try_count_limit: + try_count += 1 + response_text = self.post_service(my_input) + if response_text is None: + logging.error("DashScopeJsonNodeParser Failed to get response from service") + return [] + + return self.parse_result(response_text, node) + + def post_service(self, my_input): + DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", None) + if DASHSCOPE_API_KEY is None: + logging.error("DASHSCOPE_API_KEY is not set") + raise ValueError("DASHSCOPE_API_KEY is not set") + headers = { + "Content-Type": "application/json", + "Accept-Encoding": "utf-8", + 'Authorization': 'Bearer ' + DASHSCOPE_API_KEY, + } + try: + response = requests.post(self.service_url, data=json.dumps(my_input), headers=headers) + response_text = response.json() + if 'chunkService' in response_text: + return response_text['chunkService']['chunkResult'] + else: + logging.error(f"{response_text}, try again.") + return None + except Exception as e: + logging.error(f"{e}, try again.") + return None + + def parse_result(self, content_json, document): + nodes = [] + for data in content_json: + text = '\n'.join([data['title'], data.get('hier_title', ''), data['content']]) + nodes.append(TextNode(metadata=document.metadata, text=text, excluded_embed_metadata_keys=document.excluded_embed_metadata_keys, \ + excluded_llm_metadata_keys=document.excluded_llm_metadata_keys)) + return nodes + + def extract_elements( + self, + text: str, + mode: Optional[str] = "json", + node_id: Optional[str] = None, + node_metadata: Optional[Dict[str, Any]] = None, + table_filters: Optional[List[Callable]] = None, + **kwargs: Any, + ) -> List[Element]: + return [] diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml new file mode 100644 index 0000000000000..dbc5a109a5468 --- /dev/null +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml @@ -0,0 +1,57 @@ +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.codespell] +check-filenames = true +check-hidden = true +# Feel free to un-skip examples, and experimental, you will just need to +# work through many typos (--write-changes and --interactive will help) +skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" + +# [tool.llamahub] +# contains_example = false +# import_path = "" + +# [tool.llamahub.class_authors] +# CLASS = "github-username" + +[tool.mypy] +disallow_untyped_defs = true +# Remove venv skip when integrated with pre-commit +exclude = ["_static", "build", "examples", "notebooks", "venv"] +ignore_missing_imports = true +python_version = "3.8" + +[tool.poetry] +name = "llama-index-node-parser-relational-dashscope" +version = "0.1.0" +description = "llama-index node_parser relational dashscope integration" +authors = ["Ruixue Ding "] +license = "MIT" +readme = "README.md" +packages = [{include = "llama_index/"}] + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +llama-index-core = "^0.10.0" +requests = "*" + +[tool.poetry.group.dev.dependencies] +black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} +codespell = {extras = ["toml"], version = ">=v2.2.6"} +ipython = "8.10.0" +jupyter = "^1.0.0" +mypy = "0.991" +pre-commit = "3.2.0" +pylint = "2.15.10" +pytest = "7.2.1" +pytest-mock = "3.11.1" +ruff = "0.0.292" +tree-sitter-languages = "^1.8.0" +types-Deprecated = ">=0.1.0" +types-PyYAML = "^6.0.12.12" +types-protobuf = "^4.24.0.4" +types-redis = "4.5.5.0" +types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991 +types-setuptools = "67.1.0.0" diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/__init__.py b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/documents.json b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/documents.json new file mode 100644 index 0000000000000..d4aae20f96609 --- /dev/null +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/documents.json @@ -0,0 +1,19 @@ +[ + { + "id_": "09d7dc73-38d3-4c4a-8eee-7f3c87db76f3", + "embedding": null, + "metadata": { + "parse_fmt_type": "DASHCOPE_DOCMIND" + }, + "excluded_embed_metadata_keys": [], + "excluded_llm_metadata_keys": [], + "relationships": {}, + "text": "[{\"firstLinesChars\": 0, \"pos\": [{\"x\": 205, \"y\": 169}, {\"x\": 690, \"y\": 169}, {\"x\": 690, \"y\": 194}, {\"x\": 205, \"y\": 194}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 170}, {\"x\": 223, \"y\": 170}, {\"x\": 223, \"y\": 194}, {\"x\": 205, \"y\": 194}], \"styleId\": 0, \"text\": \"1.\"}, {\"pos\": [{\"x\": 223, \"y\": 170}, {\"x\": 701, \"y\": 170}, {\"x\": 701, \"y\": 194}, {\"x\": 223, \"y\": 194}], \"styleId\": 1, \"text\": \"农担公司主管部门是谁?是否属于越级报批?\"}], \"markdownContent\": null, \"index\": 0, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"1.农担公司主管部门是谁?是否属于越级报批?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"d177b69ab3b7056bda42fac3f788ec4c\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 206, \"y\": 203}, {\"x\": 1150, \"y\": 203}, {\"x\": 1150, \"y\": 338}, {\"x\": 205, \"y\": 338}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 206}, {\"x\": 300, \"y\": 206}, {\"x\": 300, \"y\": 230}, {\"x\": 205, \"y\": 230}], \"styleId\": 2, \"text\": \"根据财农\"}, {\"pos\": [{\"x\": 300, \"y\": 206}, {\"x\": 388, \"y\": 206}, {\"x\": 388, \"y\": 230}, {\"x\": 300, \"y\": 230}], \"styleId\": 3, \"text\": \"[2017]40\"}, {\"pos\": [{\"x\": 397, \"y\": 206}, {\"x\": 1151, \"y\": 206}, {\"x\": 1151, \"y\": 230}, {\"x\": 397, \"y\": 230}], \"styleId\": 2, \"text\": \"号文,“农担公司由财政部门会同农业部门共同负责,财政部门履行出资\"}, {\"pos\": [{\"x\": 205, \"y\": 241}, {\"x\": 1102, \"y\": 241}, {\"x\": 1102, \"y\": 265}, {\"x\": 205, \"y\": 265}], \"styleId\": 2, \"text\": \"人职责,对担保机构的担保费用补助和业务奖补资金进行核定,制定财政支持政策”。\"}, {\"pos\": [{\"x\": 205, \"y\": 277}, {\"x\": 1151, \"y\": 277}, {\"x\": 1151, \"y\": 301}, {\"x\": 205, \"y\": 301}], \"styleId\": 2, \"text\": \"市财政局履行出资人职责,将注册资本金增加至金圆集团,由金圆集团将注册资本金增加至\"}, {\"pos\": [{\"x\": 205, \"y\": 312}, {\"x\": 325, \"y\": 312}, {\"x\": 325, \"y\": 336}, {\"x\": 205, \"y\": 336}], \"styleId\": 2, \"text\": \"农担公司。\"}], \"markdownContent\": null, \"index\": 1, \"subType\": \"none\", \"lineHeight\": 11, \"text\": \"根据财农[2017]40号文,“农担公司由财政部门会同农业部门共同负责,财政部门履行出资人职责,对担保机构的担保费用补助和业务奖补资金进行核定,制定财政支持政策”。市财政局履行出资人职责,将注册资本金增加至金圆集团,由金圆集团将注册资本金增加至农担公司。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"583087a175c40b3006536e851aef0a5d\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 204, \"y\": 347}, {\"x\": 1148, \"y\": 347}, {\"x\": 1148, \"y\": 407}, {\"x\": 204, \"y\": 407}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 348}, {\"x\": 223, \"y\": 348}, {\"x\": 223, \"y\": 372}, {\"x\": 205, \"y\": 372}], \"styleId\": 0, \"text\": \"2.\"}, {\"pos\": [{\"x\": 223, \"y\": 348}, {\"x\": 1151, \"y\": 348}, {\"x\": 1151, \"y\": 372}, {\"x\": 223, \"y\": 372}], \"styleId\": 1, \"text\": \"农担公司运作模式是什么样的?有相应制度规定吗?为什么要和商业银行建立授信,有什\"}, {\"pos\": [{\"x\": 205, \"y\": 384}, {\"x\": 300, \"y\": 384}, {\"x\": 300, \"y\": 408}, {\"x\": 205, \"y\": 408}], \"styleId\": 1, \"text\": \"么好处?\"}], \"markdownContent\": null, \"index\": 2, \"subType\": \"para\", \"lineHeight\": 12, \"text\": \"2.农担公司运作模式是什么样的?有相应制度规定吗?为什么要和商业银行建立授信,有什么好处?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"caf36688120570149da16edbb673bb1a\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 203, \"y\": 417}, {\"x\": 1151, \"y\": 418}, {\"x\": 1152, \"y\": 656}, {\"x\": 203, \"y\": 656}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 419}, {\"x\": 1052, \"y\": 419}, {\"x\": 1052, \"y\": 443}, {\"x\": 205, \"y\": 443}], \"styleId\": 2, \"text\": \"上级制度主要有《关于财政支持建立农业信贷担保体系的指导意见》的通知(财农\"}, {\"pos\": [{\"x\": 1052, \"y\": 419}, {\"x\": 1151, \"y\": 419}, {\"x\": 1151, \"y\": 443}, {\"x\": 1052, \"y\": 443}], \"styleId\": 3, \"text\": \"[2015]121\"}, {\"pos\": [{\"x\": 205, \"y\": 455}, {\"x\": 749, \"y\": 455}, {\"x\": 749, \"y\": 479}, {\"x\": 205, \"y\": 479}], \"styleId\": 2, \"text\": \"号)、(关于做好全国农业信贷担保工作的通知)财农\"}, {\"pos\": [{\"x\": 749, \"y\": 455}, {\"x\": 836, \"y\": 455}, {\"x\": 836, \"y\": 479}, {\"x\": 749, \"y\": 479}], \"styleId\": 3, \"text\": \"[2017]40\"}, {\"pos\": [{\"x\": 843, \"y\": 455}, {\"x\": 1151, \"y\": 455}, {\"x\": 1151, \"y\": 479}, {\"x\": 843, \"y\": 479}], \"styleId\": 2, \"text\": \"号。农业信贷担保体系建设是\"}, {\"pos\": [{\"x\": 205, \"y\": 490}, {\"x\": 1151, \"y\": 490}, {\"x\": 1151, \"y\": 514}, {\"x\": 205, \"y\": 514}], \"styleId\": 2, \"text\": \"中央的要求,也是解决农业“融资贵”“融资难”问题的重要手段。目前农业经营主体由于\"}, {\"pos\": [{\"x\": 205, \"y\": 526}, {\"x\": 1163, \"y\": 526}, {\"x\": 1163, \"y\": 550}, {\"x\": 205, \"y\": 550}], \"styleId\": 2, \"text\": \"普遍缺乏符合银行贷款条件的抵押,通过提供农业信贷政策性担保,为农业经营主体增信,\"}, {\"pos\": [{\"x\": 205, \"y\": 561}, {\"x\": 1151, \"y\": 561}, {\"x\": 1151, \"y\": 585}, {\"x\": 205, \"y\": 585}], \"styleId\": 2, \"text\": \"实现免抵押担保贷款,解决农业经营主体的贷款需求。按照我市规定,融资性担保机构的注\"}, {\"pos\": [{\"x\": 205, \"y\": 597}, {\"x\": 468, \"y\": 597}, {\"x\": 468, \"y\": 621}, {\"x\": 205, \"y\": 621}], \"styleId\": 2, \"text\": \"册资本金原则上应不低于\"}, {\"pos\": [{\"x\": 468, \"y\": 597}, {\"x\": 470, \"y\": 597}, {\"x\": 470, \"y\": 621}, {\"x\": 468, \"y\": 621}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 474, \"y\": 597}, {\"x\": 486, \"y\": 597}, {\"x\": 486, \"y\": 621}, {\"x\": 474, \"y\": 621}], \"styleId\": 3, \"text\": \"1\"}, {\"pos\": [{\"x\": 492, \"y\": 597}, {\"x\": 843, \"y\": 597}, {\"x\": 843, \"y\": 621}, {\"x\": 492, \"y\": 621}], \"styleId\": 2, \"text\": \"亿元,经与市金融监管机构协商,\"}, {\"pos\": [{\"x\": 834, \"y\": 597}, {\"x\": 883, \"y\": 597}, {\"x\": 883, \"y\": 621}, {\"x\": 834, \"y\": 621}], \"styleId\": 3, \"text\": \"2017\"}, {\"pos\": [{\"x\": 889, \"y\": 597}, {\"x\": 1096, \"y\": 597}, {\"x\": 1096, \"y\": 621}, {\"x\": 889, \"y\": 621}], \"styleId\": 2, \"text\": \"年底,同意先行按照\"}, {\"pos\": [{\"x\": 1096, \"y\": 597}, {\"x\": 1098, \"y\": 597}, {\"x\": 1098, \"y\": 621}, {\"x\": 1096, \"y\": 621}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 1102, \"y\": 597}, {\"x\": 1150, \"y\": 597}, {\"x\": 1150, \"y\": 621}, {\"x\": 1102, \"y\": 621}], \"styleId\": 3, \"text\": \"4000\"}, {\"pos\": [{\"x\": 205, \"y\": 632}, {\"x\": 899, \"y\": 632}, {\"x\": 899, \"y\": 656}, {\"x\": 205, \"y\": 656}], \"styleId\": 2, \"text\": \"万元进行注册,但资本金不足的问题,使得银行机构积极性不高。\"}], \"markdownContent\": null, \"index\": 3, \"subType\": \"para\", \"lineHeight\": 11, \"text\": \"上级制度主要有《关于财政支持建立农业信贷担保体系的指导意见》的通知(财农[2015]121号)、(关于做好全国农业信贷担保工作的通知)财农[2017]40号。农业信贷担保体系建设是中央的要求,也是解决农业“融资贵”“融资难”问题的重要手段。目前农业经营主体由于普遍缺乏符合银行贷款条件的抵押,通过提供农业信贷政策性担保,为农业经营主体增信,实现免抵押担保贷款,解决农业经营主体的贷款需求。按照我市规定,融资性担保机构的注册资本金原则上应不低于 1亿元,经与市金融监管机构协商,2017年底,同意先行按照 4000万元进行注册,但资本金不足的问题,使得银行机构积极性不高。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"db07d431b8130eca6e9d2da36dbff3b3\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 204, \"y\": 666}, {\"x\": 689, \"y\": 666}, {\"x\": 690, \"y\": 691}, {\"x\": 204, \"y\": 690}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 668}, {\"x\": 223, \"y\": 668}, {\"x\": 223, \"y\": 692}, {\"x\": 205, \"y\": 692}], \"styleId\": 0, \"text\": \"3.\"}, {\"pos\": [{\"x\": 223, \"y\": 668}, {\"x\": 701, \"y\": 668}, {\"x\": 701, \"y\": 692}, {\"x\": 223, \"y\": 692}], \"styleId\": 1, \"text\": \"增资事项属我局业务?是否涉及到其他部门?\"}], \"markdownContent\": null, \"index\": 4, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"3.增资事项属我局业务?是否涉及到其他部门?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"951d0aa6aab75045560700982c2fbe4d\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 205, \"y\": 705}, {\"x\": 246, \"y\": 705}, {\"x\": 246, \"y\": 727}, {\"x\": 205, \"y\": 727}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 703}, {\"x\": 229, \"y\": 703}, {\"x\": 229, \"y\": 727}, {\"x\": 205, \"y\": 727}], \"styleId\": 2, \"text\": \"见\"}, {\"pos\": [{\"x\": 229, \"y\": 703}, {\"x\": 231, \"y\": 703}, {\"x\": 231, \"y\": 727}, {\"x\": 229, \"y\": 727}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 235, \"y\": 703}, {\"x\": 247, \"y\": 703}, {\"x\": 247, \"y\": 727}, {\"x\": 235, \"y\": 727}], \"styleId\": 3, \"text\": \"1\"}], \"markdownContent\": null, \"index\": 5, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"见 1\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"ff81644ea45f281d48f22210539d56f1\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 205, \"y\": 738}, {\"x\": 546, \"y\": 738}, {\"x\": 546, \"y\": 762}, {\"x\": 204, \"y\": 762}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 739}, {\"x\": 223, \"y\": 739}, {\"x\": 223, \"y\": 763}, {\"x\": 205, \"y\": 763}], \"styleId\": 0, \"text\": \"4.\"}, {\"pos\": [{\"x\": 223, \"y\": 739}, {\"x\": 558, \"y\": 739}, {\"x\": 558, \"y\": 763}, {\"x\": 223, \"y\": 763}], \"styleId\": 1, \"text\": \"担保补贴资金没有主管部门吗?\"}], \"markdownContent\": null, \"index\": 6, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"4.担保补贴资金没有主管部门吗?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"48ed85e2366552ee3248f4065436a73c\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 207, \"y\": 774}, {\"x\": 245, \"y\": 774}, {\"x\": 245, \"y\": 797}, {\"x\": 207, \"y\": 797}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 774}, {\"x\": 229, \"y\": 774}, {\"x\": 229, \"y\": 798}, {\"x\": 205, \"y\": 798}], \"styleId\": 2, \"text\": \"见\"}, {\"pos\": [{\"x\": 229, \"y\": 774}, {\"x\": 231, \"y\": 774}, {\"x\": 231, \"y\": 798}, {\"x\": 229, \"y\": 798}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 235, \"y\": 774}, {\"x\": 247, \"y\": 774}, {\"x\": 247, \"y\": 798}, {\"x\": 235, \"y\": 798}], \"styleId\": 3, \"text\": \"1\"}], \"markdownContent\": null, \"index\": 7, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"见 1\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"33e6bd26c7155f964144ece82d59137b\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 202, \"y\": 809}, {\"x\": 952, \"y\": 809}, {\"x\": 952, \"y\": 832}, {\"x\": 202, \"y\": 833}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 810}, {\"x\": 223, \"y\": 810}, {\"x\": 223, \"y\": 834}, {\"x\": 205, \"y\": 834}], \"styleId\": 0, \"text\": \"5.\"}, {\"pos\": [{\"x\": 223, \"y\": 810}, {\"x\": 965, \"y\": 810}, {\"x\": 965, \"y\": 834}, {\"x\": 223, \"y\": 834}], \"styleId\": 1, \"text\": \"担保补贴资金有由谁来审核?提出意见时由谁来审?由我局直接拨付?\"}], \"markdownContent\": null, \"index\": 8, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"5.担保补贴资金有由谁来审核?提出意见时由谁来审?由我局直接拨付?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"036c7e1bedf395e97173314545f0d975\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 204, \"y\": 847}, {\"x\": 247, \"y\": 846}, {\"x\": 248, \"y\": 869}, {\"x\": 205, \"y\": 869}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 846}, {\"x\": 229, \"y\": 846}, {\"x\": 229, \"y\": 870}, {\"x\": 205, \"y\": 870}], \"styleId\": 2, \"text\": \"见\"}, {\"pos\": [{\"x\": 229, \"y\": 846}, {\"x\": 231, \"y\": 846}, {\"x\": 231, \"y\": 870}, {\"x\": 229, \"y\": 870}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 235, \"y\": 846}, {\"x\": 247, \"y\": 846}, {\"x\": 247, \"y\": 870}, {\"x\": 235, \"y\": 870}], \"styleId\": 3, \"text\": \"1\"}], \"markdownContent\": null, \"index\": 9, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"见 1\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"e8f7f8d29591785b8f17c30e68ae8358\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 204, \"y\": 879}, {\"x\": 837, \"y\": 879}, {\"x\": 837, \"y\": 904}, {\"x\": 204, \"y\": 904}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 881}, {\"x\": 271, \"y\": 881}, {\"x\": 271, \"y\": 905}, {\"x\": 205, \"y\": 905}], \"styleId\": 0, \"text\": \"6.2019\"}, {\"pos\": [{\"x\": 277, \"y\": 881}, {\"x\": 756, \"y\": 881}, {\"x\": 756, \"y\": 905}, {\"x\": 277, \"y\": 905}], \"styleId\": 1, \"text\": \"年目前在保余额多少,增资后能否达到规定的\"}, {\"pos\": [{\"x\": 756, \"y\": 881}, {\"x\": 774, \"y\": 881}, {\"x\": 774, \"y\": 905}, {\"x\": 756, \"y\": 905}], \"styleId\": 0, \"text\": \" 3\"}, {\"pos\": [{\"x\": 780, \"y\": 881}, {\"x\": 852, \"y\": 881}, {\"x\": 852, \"y\": 905}, {\"x\": 780, \"y\": 905}], \"styleId\": 1, \"text\": \"亿元。\"}], \"markdownContent\": null, \"index\": 10, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"6.2019年目前在保余额多少,增资后能否达到规定的 3亿元。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"296c08f06de425ac67f0be3c43f05c96\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 203, \"y\": 921}, {\"x\": 1151, \"y\": 920}, {\"x\": 1151, \"y\": 1040}, {\"x\": 203, \"y\": 1040}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 917}, {\"x\": 372, \"y\": 917}, {\"x\": 372, \"y\": 941}, {\"x\": 205, \"y\": 941}], \"styleId\": 2, \"text\": \"目前在保余额约\"}, {\"pos\": [{\"x\": 372, \"y\": 917}, {\"x\": 374, \"y\": 917}, {\"x\": 374, \"y\": 941}, {\"x\": 372, \"y\": 941}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 378, \"y\": 917}, {\"x\": 426, \"y\": 917}, {\"x\": 426, \"y\": 941}, {\"x\": 378, \"y\": 941}], \"styleId\": 3, \"text\": \"7200\"}, {\"pos\": [{\"x\": 433, \"y\": 917}, {\"x\": 526, \"y\": 917}, {\"x\": 526, \"y\": 941}, {\"x\": 433, \"y\": 941}], \"styleId\": 2, \"text\": \"万元,比\"}, {\"pos\": [{\"x\": 526, \"y\": 917}, {\"x\": 528, \"y\": 917}, {\"x\": 528, \"y\": 941}, {\"x\": 526, \"y\": 941}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 532, \"y\": 917}, {\"x\": 580, \"y\": 917}, {\"x\": 580, \"y\": 941}, {\"x\": 532, \"y\": 941}], \"styleId\": 3, \"text\": \"2018\"}, {\"pos\": [{\"x\": 586, \"y\": 917}, {\"x\": 610, \"y\": 917}, {\"x\": 610, \"y\": 941}, {\"x\": 586, \"y\": 941}], \"styleId\": 2, \"text\": \"年\"}, {\"pos\": [{\"x\": 610, \"y\": 917}, {\"x\": 612, \"y\": 917}, {\"x\": 612, \"y\": 941}, {\"x\": 610, \"y\": 941}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 616, \"y\": 917}, {\"x\": 664, \"y\": 917}, {\"x\": 664, \"y\": 941}, {\"x\": 616, \"y\": 941}], \"styleId\": 3, \"text\": \"9835\"}, {\"pos\": [{\"x\": 670, \"y\": 917}, {\"x\": 742, \"y\": 917}, {\"x\": 742, \"y\": 941}, {\"x\": 670, \"y\": 941}], \"styleId\": 2, \"text\": \"万元、\"}, {\"pos\": [{\"x\": 740, \"y\": 917}, {\"x\": 788, \"y\": 917}, {\"x\": 788, \"y\": 941}, {\"x\": 740, \"y\": 941}], \"styleId\": 3, \"text\": \"2017\"}, {\"pos\": [{\"x\": 794, \"y\": 917}, {\"x\": 818, \"y\": 917}, {\"x\": 818, \"y\": 941}, {\"x\": 794, \"y\": 941}], \"styleId\": 2, \"text\": \"年\"}, {\"pos\": [{\"x\": 818, \"y\": 917}, {\"x\": 820, \"y\": 917}, {\"x\": 820, \"y\": 941}, {\"x\": 818, \"y\": 941}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 824, \"y\": 917}, {\"x\": 884, \"y\": 917}, {\"x\": 884, \"y\": 941}, {\"x\": 824, \"y\": 941}], \"styleId\": 3, \"text\": \"14530\"}, {\"pos\": [{\"x\": 890, \"y\": 917}, {\"x\": 1151, \"y\": 917}, {\"x\": 1151, \"y\": 941}, {\"x\": 890, \"y\": 941}], \"styleId\": 2, \"text\": \"万元均出现下滑,主要原\"}, {\"pos\": [{\"x\": 205, \"y\": 952}, {\"x\": 348, \"y\": 952}, {\"x\": 348, \"y\": 976}, {\"x\": 205, \"y\": 976}], \"styleId\": 2, \"text\": \"因:一是财农\"}, {\"pos\": [{\"x\": 348, \"y\": 952}, {\"x\": 435, \"y\": 952}, {\"x\": 435, \"y\": 976}, {\"x\": 348, \"y\": 976}], \"styleId\": 3, \"text\": \"[2017]40\"}, {\"pos\": [{\"x\": 444, \"y\": 952}, {\"x\": 923, \"y\": 952}, {\"x\": 923, \"y\": 976}, {\"x\": 444, \"y\": 976}], \"styleId\": 2, \"text\": \"号文印发后,要求各地农担公司应专注服务于\"}, {\"pos\": [{\"x\": 923, \"y\": 952}, {\"x\": 925, \"y\": 952}, {\"x\": 925, \"y\": 976}, {\"x\": 923, \"y\": 976}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 931, \"y\": 952}, {\"x\": 999, \"y\": 952}, {\"x\": 999, \"y\": 976}, {\"x\": 931, \"y\": 976}], \"styleId\": 3, \"text\": \"10-200\"}, {\"pos\": [{\"x\": 1007, \"y\": 952}, {\"x\": 1151, \"y\": 952}, {\"x\": 1151, \"y\": 976}, {\"x\": 1007, \"y\": 976}], \"styleId\": 2, \"text\": \"万元的适度规\"}, {\"pos\": [{\"x\": 205, \"y\": 988}, {\"x\": 480, \"y\": 988}, {\"x\": 480, \"y\": 1012}, {\"x\": 205, \"y\": 1012}], \"styleId\": 2, \"text\": \"模经营主体,比例不得低于\"}, {\"pos\": [{\"x\": 480, \"y\": 988}, {\"x\": 482, \"y\": 988}, {\"x\": 482, \"y\": 1012}, {\"x\": 480, \"y\": 1012}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 486, \"y\": 988}, {\"x\": 528, \"y\": 988}, {\"x\": 528, \"y\": 1012}, {\"x\": 486, \"y\": 1012}], \"styleId\": 3, \"text\": \"70%\"}, {\"pos\": [{\"x\": 528, \"y\": 988}, {\"x\": 1151, \"y\": 988}, {\"x\": 1151, \"y\": 1012}, {\"x\": 528, \"y\": 1012}], \"styleId\": 2, \"text\": \",减少了原贷款金额较大的担保对象;二是注册资本金不高,\"}, {\"pos\": [{\"x\": 205, \"y\": 1023}, {\"x\": 947, \"y\": 1023}, {\"x\": 947, \"y\": 1047}, {\"x\": 205, \"y\": 1047}], \"styleId\": 2, \"text\": \"银行机构合作推进较为缓慢;三是农担公司的业务拓展能力还需提升。\"}], \"markdownContent\": null, \"index\": 11, \"subType\": \"para\", \"lineHeight\": 11, \"text\": \"目前在保余额约 7200万元,比 2018年 9835万元、2017年 14530万元均出现下滑,主要原因:一是财农[2017]40号文印发后,要求各地农担公司应专注服务于 10-200万元的适度规模经营主体,比例不得低于 70%,减少了原贷款金额较大的担保对象;二是注册资本金不高,银行机构合作推进较为缓慢;三是农担公司的业务拓展能力还需提升。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"d8e4d08381e1c52b6d31b250d39eba35\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 202, \"y\": 1057}, {\"x\": 1151, \"y\": 1058}, {\"x\": 1152, \"y\": 1118}, {\"x\": 202, \"y\": 1117}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1059}, {\"x\": 1151, \"y\": 1059}, {\"x\": 1151, \"y\": 1083}, {\"x\": 205, \"y\": 1083}], \"styleId\": 2, \"text\": \"最近拟修订农担公司实施细则,拟将农担公司的在保余额与业务奖补资金进行挂钩,促进农\"}, {\"pos\": [{\"x\": 205, \"y\": 1094}, {\"x\": 444, \"y\": 1094}, {\"x\": 444, \"y\": 1118}, {\"x\": 205, \"y\": 1118}], \"styleId\": 2, \"text\": \"担公司加快业务拓展。\"}], \"markdownContent\": null, \"index\": 12, \"subType\": \"para\", \"lineHeight\": 11, \"text\": \"最近拟修订农担公司实施细则,拟将农担公司的在保余额与业务奖补资金进行挂钩,促进农担公司加快业务拓展。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"ed77a21ce5c514110df54e1b492074eb\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 203, \"y\": 1129}, {\"x\": 475, \"y\": 1129}, {\"x\": 475, \"y\": 1153}, {\"x\": 203, \"y\": 1153}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1130}, {\"x\": 223, \"y\": 1130}, {\"x\": 223, \"y\": 1154}, {\"x\": 205, \"y\": 1154}], \"styleId\": 0, \"text\": \"7.\"}, {\"pos\": [{\"x\": 223, \"y\": 1130}, {\"x\": 486, \"y\": 1130}, {\"x\": 486, \"y\": 1154}, {\"x\": 223, \"y\": 1154}], \"styleId\": 1, \"text\": \"为什么要分成两份来报?\"}], \"markdownContent\": null, \"index\": 13, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"7.为什么要分成两份来报?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"a425a1570111663a5939db031ec727ca\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 202, \"y\": 1199}, {\"x\": 414, \"y\": 1199}, {\"x\": 414, \"y\": 1225}, {\"x\": 202, \"y\": 1225}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1201}, {\"x\": 223, \"y\": 1201}, {\"x\": 223, \"y\": 1225}, {\"x\": 205, \"y\": 1225}], \"styleId\": 0, \"text\": \"8.\"}, {\"pos\": [{\"x\": 223, \"y\": 1201}, {\"x\": 414, \"y\": 1201}, {\"x\": 414, \"y\": 1225}, {\"x\": 223, \"y\": 1225}], \"styleId\": 1, \"text\": \"年初预留资金情况\"}], \"markdownContent\": null, \"index\": 14, \"subType\": \"none\", \"lineHeight\": 24, \"text\": \"8.年初预留资金情况\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"7e5d06c6fad4faa171674017c2b99ced\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 203, \"y\": 1271}, {\"x\": 403, \"y\": 1270}, {\"x\": 403, \"y\": 1295}, {\"x\": 203, \"y\": 1296}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1272}, {\"x\": 223, \"y\": 1272}, {\"x\": 223, \"y\": 1296}, {\"x\": 205, \"y\": 1296}], \"styleId\": 0, \"text\": \"9.\"}, {\"pos\": [{\"x\": 223, \"y\": 1272}, {\"x\": 414, \"y\": 1272}, {\"x\": 414, \"y\": 1296}, {\"x\": 223, \"y\": 1296}], \"styleId\": 1, \"text\": \"目前代偿率多高?\"}], \"markdownContent\": null, \"index\": 15, \"subType\": \"none\", \"lineHeight\": 24, \"text\": \"9.目前代偿率多高?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"02d60f5198fa19334e555fa528463064\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 203, \"y\": 1306}, {\"x\": 808, \"y\": 1306}, {\"x\": 808, \"y\": 1332}, {\"x\": 203, \"y\": 1332}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1308}, {\"x\": 253, \"y\": 1308}, {\"x\": 253, \"y\": 1332}, {\"x\": 205, \"y\": 1332}], \"styleId\": 3, \"text\": \"2018\"}, {\"pos\": [{\"x\": 259, \"y\": 1308}, {\"x\": 546, \"y\": 1308}, {\"x\": 546, \"y\": 1332}, {\"x\": 259, \"y\": 1332}], \"styleId\": 2, \"text\": \"年发生一笔需代偿项目,约\"}, {\"pos\": [{\"x\": 546, \"y\": 1308}, {\"x\": 548, \"y\": 1308}, {\"x\": 548, \"y\": 1332}, {\"x\": 546, \"y\": 1332}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 552, \"y\": 1308}, {\"x\": 588, \"y\": 1308}, {\"x\": 588, \"y\": 1332}, {\"x\": 552, \"y\": 1332}], \"styleId\": 3, \"text\": \"660\"}, {\"pos\": [{\"x\": 595, \"y\": 1308}, {\"x\": 762, \"y\": 1308}, {\"x\": 762, \"y\": 1332}, {\"x\": 595, \"y\": 1332}], \"styleId\": 2, \"text\": \"万元,代偿率约\"}, {\"pos\": [{\"x\": 762, \"y\": 1308}, {\"x\": 764, \"y\": 1308}, {\"x\": 764, \"y\": 1332}, {\"x\": 762, \"y\": 1332}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 768, \"y\": 1308}, {\"x\": 797, \"y\": 1308}, {\"x\": 797, \"y\": 1332}, {\"x\": 768, \"y\": 1332}], \"styleId\": 3, \"text\": \"5%\"}, {\"pos\": [{\"x\": 797, \"y\": 1308}, {\"x\": 821, \"y\": 1308}, {\"x\": 821, \"y\": 1332}, {\"x\": 797, \"y\": 1332}], \"styleId\": 2, \"text\": \"。\"}], \"markdownContent\": null, \"index\": 16, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"2018年发生一笔需代偿项目,约 660万元,代偿率约 5%。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"8d8ef9423fb82bb7e97718a1f1322d8d\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 205, \"y\": 1342}, {\"x\": 585, \"y\": 1342}, {\"x\": 585, \"y\": 1366}, {\"x\": 205, \"y\": 1367}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1343}, {\"x\": 276, \"y\": 1343}, {\"x\": 276, \"y\": 1367}, {\"x\": 205, \"y\": 1367}], \"styleId\": 0, \"text\": \"10.20%\"}, {\"pos\": [{\"x\": 276, \"y\": 1343}, {\"x\": 587, \"y\": 1343}, {\"x\": 587, \"y\": 1367}, {\"x\": 276, \"y\": 1367}], \"styleId\": 1, \"text\": \"项目补贴到项目还是担保公司\"}], \"markdownContent\": null, \"index\": 17, \"subType\": \"none\", \"lineHeight\": 24, \"text\": \"10.20%项目补贴到项目还是担保公司\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"dc1262b9070215c8344991c652f21920\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 205, \"y\": 1376}, {\"x\": 1149, \"y\": 1376}, {\"x\": 1149, \"y\": 1438}, {\"x\": 205, \"y\": 1437}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1379}, {\"x\": 372, \"y\": 1379}, {\"x\": 372, \"y\": 1403}, {\"x\": 205, \"y\": 1403}], \"styleId\": 2, \"text\": \"按照现行政策,\"}, {\"pos\": [{\"x\": 371, \"y\": 1379}, {\"x\": 412, \"y\": 1379}, {\"x\": 412, \"y\": 1403}, {\"x\": 371, \"y\": 1403}], \"styleId\": 3, \"text\": \"20%\"}, {\"pos\": [{\"x\": 412, \"y\": 1379}, {\"x\": 1151, \"y\": 1379}, {\"x\": 1151, \"y\": 1403}, {\"x\": 412, \"y\": 1403}], \"styleId\": 2, \"text\": \"的贴息是补助给银行机构,让银行机构提供优惠的贷款利率,目前,贷\"}, {\"pos\": [{\"x\": 205, \"y\": 1414}, {\"x\": 516, \"y\": 1414}, {\"x\": 516, \"y\": 1438}, {\"x\": 205, \"y\": 1438}], \"styleId\": 2, \"text\": \"款利率大部分在基准利率上浮\"}, {\"pos\": [{\"x\": 516, \"y\": 1414}, {\"x\": 518, \"y\": 1414}, {\"x\": 518, \"y\": 1438}, {\"x\": 516, \"y\": 1438}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 522, \"y\": 1414}, {\"x\": 563, \"y\": 1414}, {\"x\": 563, \"y\": 1438}, {\"x\": 522, \"y\": 1438}], \"styleId\": 3, \"text\": \"40%\"}, {\"pos\": [{\"x\": 563, \"y\": 1414}, {\"x\": 970, \"y\": 1414}, {\"x\": 970, \"y\": 1438}, {\"x\": 563, \"y\": 1438}], \"styleId\": 2, \"text\": \"左右,个别项目贷款利率同基准利率。\"}], \"markdownContent\": null, \"index\": 18, \"subType\": \"para\", \"lineHeight\": 11, \"text\": \"按照现行政策,20%的贴息是补助给银行机构,让银行机构提供优惠的贷款利率,目前,贷款利率大部分在基准利率上浮 40%左右,个别项目贷款利率同基准利率。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"ddc4c7cbbd71d073c325b5b0dc97a0e4\"}]", + "start_char_idx": null, + "end_char_idx": null, + "text_template": "{metadata_str}\n\n{content}", + "metadata_template": "{key}: {value}", + "metadata_seperator": "\n", + "class_name": "Document" + } + ] \ No newline at end of file diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py new file mode 100644 index 0000000000000..d32acd2257ac0 --- /dev/null +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py @@ -0,0 +1,26 @@ +import json +import os + +from llama_index.node_parser.relational.dashscope import DashScopeJsonNodeParser +from llama_index.core.ingestion import IngestionPipeline +from llama_index.core.schema import Document + +os.environ['DASHSCOPE_API_KEY'] = 'sk-75878ade82164673a0962a825471e825' + +doc_json = json.load(open('tests/documents.json')) +documents = [] +for doc in doc_json: + documents.append(Document.from_dict(doc)) + +node_parser = DashScopeJsonNodeParser(chunk_size=100, overlap_size=0, separator=' |,|,|。|?|!|\n|\?|\!') + +pipeline = IngestionPipeline( + transformations=[ + node_parser, + ] +) + +nodes = pipeline.run(documents=documents, show_progress=True) + +for node in nodes: + print(node) \ No newline at end of file From 6312d9042ed7c3fdb7bff5c3635507925e7f3025 Mon Sep 17 00:00:00 2001 From: "ada.drx" Date: Tue, 30 Apr 2024 18:14:52 +0800 Subject: [PATCH 02/13] add base url as env,rm test doc --- .../node_parser/relational/dashscope/base.py | 9 +++--- .../tests/documents.json | 19 ------------- .../test_node_parser_relational_dashscope.py | 28 +++---------------- 3 files changed, 8 insertions(+), 48 deletions(-) delete mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/documents.json diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py index 0ae3d6d8e8df0..bcc8bf09a0e87 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py @@ -15,10 +15,6 @@ class DashScopeJsonNodeParser(BaseElementNodeParser): Splits a json format document from DashScope Parse into Text Nodes and Index Nodes corresponding to embedded objects (e.g. tables). """ - service_url: str = Field( - default="https://int-dashscope.aliyuncs.com/api/v1/indeces/component/configed_transformations/spliter", - description="URL of the service endpoint." - ) try_count_limit: int = Field( default=10, description="Maximum number of retry attempts." @@ -86,8 +82,11 @@ def post_service(self, my_input): "Accept-Encoding": "utf-8", 'Authorization': 'Bearer ' + DASHSCOPE_API_KEY, } + service_url = os.getenv('DASHSCOPE_BASE_URL', "https://dashscope.aliyuncs.com") + "/api/v1/indices/component/configed_transformations/spliter" + response = requests.post(service_url, data=json.dumps(my_input), headers=headers) + response_text = response.json() try: - response = requests.post(self.service_url, data=json.dumps(my_input), headers=headers) + response = requests.post(service_url, data=json.dumps(my_input), headers=headers) response_text = response.json() if 'chunkService' in response_text: return response_text['chunkService']['chunkResult'] diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/documents.json b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/documents.json deleted file mode 100644 index d4aae20f96609..0000000000000 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/documents.json +++ /dev/null @@ -1,19 +0,0 @@ -[ - { - "id_": "09d7dc73-38d3-4c4a-8eee-7f3c87db76f3", - "embedding": null, - "metadata": { - "parse_fmt_type": "DASHCOPE_DOCMIND" - }, - "excluded_embed_metadata_keys": [], - "excluded_llm_metadata_keys": [], - "relationships": {}, - "text": "[{\"firstLinesChars\": 0, \"pos\": [{\"x\": 205, \"y\": 169}, {\"x\": 690, \"y\": 169}, {\"x\": 690, \"y\": 194}, {\"x\": 205, \"y\": 194}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 170}, {\"x\": 223, \"y\": 170}, {\"x\": 223, \"y\": 194}, {\"x\": 205, \"y\": 194}], \"styleId\": 0, \"text\": \"1.\"}, {\"pos\": [{\"x\": 223, \"y\": 170}, {\"x\": 701, \"y\": 170}, {\"x\": 701, \"y\": 194}, {\"x\": 223, \"y\": 194}], \"styleId\": 1, \"text\": \"农担公司主管部门是谁?是否属于越级报批?\"}], \"markdownContent\": null, \"index\": 0, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"1.农担公司主管部门是谁?是否属于越级报批?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"d177b69ab3b7056bda42fac3f788ec4c\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 206, \"y\": 203}, {\"x\": 1150, \"y\": 203}, {\"x\": 1150, \"y\": 338}, {\"x\": 205, \"y\": 338}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 206}, {\"x\": 300, \"y\": 206}, {\"x\": 300, \"y\": 230}, {\"x\": 205, \"y\": 230}], \"styleId\": 2, \"text\": \"根据财农\"}, {\"pos\": [{\"x\": 300, \"y\": 206}, {\"x\": 388, \"y\": 206}, {\"x\": 388, \"y\": 230}, {\"x\": 300, \"y\": 230}], \"styleId\": 3, \"text\": \"[2017]40\"}, {\"pos\": [{\"x\": 397, \"y\": 206}, {\"x\": 1151, \"y\": 206}, {\"x\": 1151, \"y\": 230}, {\"x\": 397, \"y\": 230}], \"styleId\": 2, \"text\": \"号文,“农担公司由财政部门会同农业部门共同负责,财政部门履行出资\"}, {\"pos\": [{\"x\": 205, \"y\": 241}, {\"x\": 1102, \"y\": 241}, {\"x\": 1102, \"y\": 265}, {\"x\": 205, \"y\": 265}], \"styleId\": 2, \"text\": \"人职责,对担保机构的担保费用补助和业务奖补资金进行核定,制定财政支持政策”。\"}, {\"pos\": [{\"x\": 205, \"y\": 277}, {\"x\": 1151, \"y\": 277}, {\"x\": 1151, \"y\": 301}, {\"x\": 205, \"y\": 301}], \"styleId\": 2, \"text\": \"市财政局履行出资人职责,将注册资本金增加至金圆集团,由金圆集团将注册资本金增加至\"}, {\"pos\": [{\"x\": 205, \"y\": 312}, {\"x\": 325, \"y\": 312}, {\"x\": 325, \"y\": 336}, {\"x\": 205, \"y\": 336}], \"styleId\": 2, \"text\": \"农担公司。\"}], \"markdownContent\": null, \"index\": 1, \"subType\": \"none\", \"lineHeight\": 11, \"text\": \"根据财农[2017]40号文,“农担公司由财政部门会同农业部门共同负责,财政部门履行出资人职责,对担保机构的担保费用补助和业务奖补资金进行核定,制定财政支持政策”。市财政局履行出资人职责,将注册资本金增加至金圆集团,由金圆集团将注册资本金增加至农担公司。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"583087a175c40b3006536e851aef0a5d\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 204, \"y\": 347}, {\"x\": 1148, \"y\": 347}, {\"x\": 1148, \"y\": 407}, {\"x\": 204, \"y\": 407}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 348}, {\"x\": 223, \"y\": 348}, {\"x\": 223, \"y\": 372}, {\"x\": 205, \"y\": 372}], \"styleId\": 0, \"text\": \"2.\"}, {\"pos\": [{\"x\": 223, \"y\": 348}, {\"x\": 1151, \"y\": 348}, {\"x\": 1151, \"y\": 372}, {\"x\": 223, \"y\": 372}], \"styleId\": 1, \"text\": \"农担公司运作模式是什么样的?有相应制度规定吗?为什么要和商业银行建立授信,有什\"}, {\"pos\": [{\"x\": 205, \"y\": 384}, {\"x\": 300, \"y\": 384}, {\"x\": 300, \"y\": 408}, {\"x\": 205, \"y\": 408}], \"styleId\": 1, \"text\": \"么好处?\"}], \"markdownContent\": null, \"index\": 2, \"subType\": \"para\", \"lineHeight\": 12, \"text\": \"2.农担公司运作模式是什么样的?有相应制度规定吗?为什么要和商业银行建立授信,有什么好处?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"caf36688120570149da16edbb673bb1a\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 203, \"y\": 417}, {\"x\": 1151, \"y\": 418}, {\"x\": 1152, \"y\": 656}, {\"x\": 203, \"y\": 656}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 419}, {\"x\": 1052, \"y\": 419}, {\"x\": 1052, \"y\": 443}, {\"x\": 205, \"y\": 443}], \"styleId\": 2, \"text\": \"上级制度主要有《关于财政支持建立农业信贷担保体系的指导意见》的通知(财农\"}, {\"pos\": [{\"x\": 1052, \"y\": 419}, {\"x\": 1151, \"y\": 419}, {\"x\": 1151, \"y\": 443}, {\"x\": 1052, \"y\": 443}], \"styleId\": 3, \"text\": \"[2015]121\"}, {\"pos\": [{\"x\": 205, \"y\": 455}, {\"x\": 749, \"y\": 455}, {\"x\": 749, \"y\": 479}, {\"x\": 205, \"y\": 479}], \"styleId\": 2, \"text\": \"号)、(关于做好全国农业信贷担保工作的通知)财农\"}, {\"pos\": [{\"x\": 749, \"y\": 455}, {\"x\": 836, \"y\": 455}, {\"x\": 836, \"y\": 479}, {\"x\": 749, \"y\": 479}], \"styleId\": 3, \"text\": \"[2017]40\"}, {\"pos\": [{\"x\": 843, \"y\": 455}, {\"x\": 1151, \"y\": 455}, {\"x\": 1151, \"y\": 479}, {\"x\": 843, \"y\": 479}], \"styleId\": 2, \"text\": \"号。农业信贷担保体系建设是\"}, {\"pos\": [{\"x\": 205, \"y\": 490}, {\"x\": 1151, \"y\": 490}, {\"x\": 1151, \"y\": 514}, {\"x\": 205, \"y\": 514}], \"styleId\": 2, \"text\": \"中央的要求,也是解决农业“融资贵”“融资难”问题的重要手段。目前农业经营主体由于\"}, {\"pos\": [{\"x\": 205, \"y\": 526}, {\"x\": 1163, \"y\": 526}, {\"x\": 1163, \"y\": 550}, {\"x\": 205, \"y\": 550}], \"styleId\": 2, \"text\": \"普遍缺乏符合银行贷款条件的抵押,通过提供农业信贷政策性担保,为农业经营主体增信,\"}, {\"pos\": [{\"x\": 205, \"y\": 561}, {\"x\": 1151, \"y\": 561}, {\"x\": 1151, \"y\": 585}, {\"x\": 205, \"y\": 585}], \"styleId\": 2, \"text\": \"实现免抵押担保贷款,解决农业经营主体的贷款需求。按照我市规定,融资性担保机构的注\"}, {\"pos\": [{\"x\": 205, \"y\": 597}, {\"x\": 468, \"y\": 597}, {\"x\": 468, \"y\": 621}, {\"x\": 205, \"y\": 621}], \"styleId\": 2, \"text\": \"册资本金原则上应不低于\"}, {\"pos\": [{\"x\": 468, \"y\": 597}, {\"x\": 470, \"y\": 597}, {\"x\": 470, \"y\": 621}, {\"x\": 468, \"y\": 621}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 474, \"y\": 597}, {\"x\": 486, \"y\": 597}, {\"x\": 486, \"y\": 621}, {\"x\": 474, \"y\": 621}], \"styleId\": 3, \"text\": \"1\"}, {\"pos\": [{\"x\": 492, \"y\": 597}, {\"x\": 843, \"y\": 597}, {\"x\": 843, \"y\": 621}, {\"x\": 492, \"y\": 621}], \"styleId\": 2, \"text\": \"亿元,经与市金融监管机构协商,\"}, {\"pos\": [{\"x\": 834, \"y\": 597}, {\"x\": 883, \"y\": 597}, {\"x\": 883, \"y\": 621}, {\"x\": 834, \"y\": 621}], \"styleId\": 3, \"text\": \"2017\"}, {\"pos\": [{\"x\": 889, \"y\": 597}, {\"x\": 1096, \"y\": 597}, {\"x\": 1096, \"y\": 621}, {\"x\": 889, \"y\": 621}], \"styleId\": 2, \"text\": \"年底,同意先行按照\"}, {\"pos\": [{\"x\": 1096, \"y\": 597}, {\"x\": 1098, \"y\": 597}, {\"x\": 1098, \"y\": 621}, {\"x\": 1096, \"y\": 621}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 1102, \"y\": 597}, {\"x\": 1150, \"y\": 597}, {\"x\": 1150, \"y\": 621}, {\"x\": 1102, \"y\": 621}], \"styleId\": 3, \"text\": \"4000\"}, {\"pos\": [{\"x\": 205, \"y\": 632}, {\"x\": 899, \"y\": 632}, {\"x\": 899, \"y\": 656}, {\"x\": 205, \"y\": 656}], \"styleId\": 2, \"text\": \"万元进行注册,但资本金不足的问题,使得银行机构积极性不高。\"}], \"markdownContent\": null, \"index\": 3, \"subType\": \"para\", \"lineHeight\": 11, \"text\": \"上级制度主要有《关于财政支持建立农业信贷担保体系的指导意见》的通知(财农[2015]121号)、(关于做好全国农业信贷担保工作的通知)财农[2017]40号。农业信贷担保体系建设是中央的要求,也是解决农业“融资贵”“融资难”问题的重要手段。目前农业经营主体由于普遍缺乏符合银行贷款条件的抵押,通过提供农业信贷政策性担保,为农业经营主体增信,实现免抵押担保贷款,解决农业经营主体的贷款需求。按照我市规定,融资性担保机构的注册资本金原则上应不低于 1亿元,经与市金融监管机构协商,2017年底,同意先行按照 4000万元进行注册,但资本金不足的问题,使得银行机构积极性不高。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"db07d431b8130eca6e9d2da36dbff3b3\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 204, \"y\": 666}, {\"x\": 689, \"y\": 666}, {\"x\": 690, \"y\": 691}, {\"x\": 204, \"y\": 690}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 668}, {\"x\": 223, \"y\": 668}, {\"x\": 223, \"y\": 692}, {\"x\": 205, \"y\": 692}], \"styleId\": 0, \"text\": \"3.\"}, {\"pos\": [{\"x\": 223, \"y\": 668}, {\"x\": 701, \"y\": 668}, {\"x\": 701, \"y\": 692}, {\"x\": 223, \"y\": 692}], \"styleId\": 1, \"text\": \"增资事项属我局业务?是否涉及到其他部门?\"}], \"markdownContent\": null, \"index\": 4, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"3.增资事项属我局业务?是否涉及到其他部门?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"951d0aa6aab75045560700982c2fbe4d\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 205, \"y\": 705}, {\"x\": 246, \"y\": 705}, {\"x\": 246, \"y\": 727}, {\"x\": 205, \"y\": 727}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 703}, {\"x\": 229, \"y\": 703}, {\"x\": 229, \"y\": 727}, {\"x\": 205, \"y\": 727}], \"styleId\": 2, \"text\": \"见\"}, {\"pos\": [{\"x\": 229, \"y\": 703}, {\"x\": 231, \"y\": 703}, {\"x\": 231, \"y\": 727}, {\"x\": 229, \"y\": 727}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 235, \"y\": 703}, {\"x\": 247, \"y\": 703}, {\"x\": 247, \"y\": 727}, {\"x\": 235, \"y\": 727}], \"styleId\": 3, \"text\": \"1\"}], \"markdownContent\": null, \"index\": 5, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"见 1\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"ff81644ea45f281d48f22210539d56f1\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 205, \"y\": 738}, {\"x\": 546, \"y\": 738}, {\"x\": 546, \"y\": 762}, {\"x\": 204, \"y\": 762}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 739}, {\"x\": 223, \"y\": 739}, {\"x\": 223, \"y\": 763}, {\"x\": 205, \"y\": 763}], \"styleId\": 0, \"text\": \"4.\"}, {\"pos\": [{\"x\": 223, \"y\": 739}, {\"x\": 558, \"y\": 739}, {\"x\": 558, \"y\": 763}, {\"x\": 223, \"y\": 763}], \"styleId\": 1, \"text\": \"担保补贴资金没有主管部门吗?\"}], \"markdownContent\": null, \"index\": 6, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"4.担保补贴资金没有主管部门吗?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"48ed85e2366552ee3248f4065436a73c\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 207, \"y\": 774}, {\"x\": 245, \"y\": 774}, {\"x\": 245, \"y\": 797}, {\"x\": 207, \"y\": 797}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 774}, {\"x\": 229, \"y\": 774}, {\"x\": 229, \"y\": 798}, {\"x\": 205, \"y\": 798}], \"styleId\": 2, \"text\": \"见\"}, {\"pos\": [{\"x\": 229, \"y\": 774}, {\"x\": 231, \"y\": 774}, {\"x\": 231, \"y\": 798}, {\"x\": 229, \"y\": 798}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 235, \"y\": 774}, {\"x\": 247, \"y\": 774}, {\"x\": 247, \"y\": 798}, {\"x\": 235, \"y\": 798}], \"styleId\": 3, \"text\": \"1\"}], \"markdownContent\": null, \"index\": 7, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"见 1\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"33e6bd26c7155f964144ece82d59137b\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 202, \"y\": 809}, {\"x\": 952, \"y\": 809}, {\"x\": 952, \"y\": 832}, {\"x\": 202, \"y\": 833}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 810}, {\"x\": 223, \"y\": 810}, {\"x\": 223, \"y\": 834}, {\"x\": 205, \"y\": 834}], \"styleId\": 0, \"text\": \"5.\"}, {\"pos\": [{\"x\": 223, \"y\": 810}, {\"x\": 965, \"y\": 810}, {\"x\": 965, \"y\": 834}, {\"x\": 223, \"y\": 834}], \"styleId\": 1, \"text\": \"担保补贴资金有由谁来审核?提出意见时由谁来审?由我局直接拨付?\"}], \"markdownContent\": null, \"index\": 8, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"5.担保补贴资金有由谁来审核?提出意见时由谁来审?由我局直接拨付?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"036c7e1bedf395e97173314545f0d975\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 204, \"y\": 847}, {\"x\": 247, \"y\": 846}, {\"x\": 248, \"y\": 869}, {\"x\": 205, \"y\": 869}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 846}, {\"x\": 229, \"y\": 846}, {\"x\": 229, \"y\": 870}, {\"x\": 205, \"y\": 870}], \"styleId\": 2, \"text\": \"见\"}, {\"pos\": [{\"x\": 229, \"y\": 846}, {\"x\": 231, \"y\": 846}, {\"x\": 231, \"y\": 870}, {\"x\": 229, \"y\": 870}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 235, \"y\": 846}, {\"x\": 247, \"y\": 846}, {\"x\": 247, \"y\": 870}, {\"x\": 235, \"y\": 870}], \"styleId\": 3, \"text\": \"1\"}], \"markdownContent\": null, \"index\": 9, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"见 1\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"e8f7f8d29591785b8f17c30e68ae8358\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 204, \"y\": 879}, {\"x\": 837, \"y\": 879}, {\"x\": 837, \"y\": 904}, {\"x\": 204, \"y\": 904}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 881}, {\"x\": 271, \"y\": 881}, {\"x\": 271, \"y\": 905}, {\"x\": 205, \"y\": 905}], \"styleId\": 0, \"text\": \"6.2019\"}, {\"pos\": [{\"x\": 277, \"y\": 881}, {\"x\": 756, \"y\": 881}, {\"x\": 756, \"y\": 905}, {\"x\": 277, \"y\": 905}], \"styleId\": 1, \"text\": \"年目前在保余额多少,增资后能否达到规定的\"}, {\"pos\": [{\"x\": 756, \"y\": 881}, {\"x\": 774, \"y\": 881}, {\"x\": 774, \"y\": 905}, {\"x\": 756, \"y\": 905}], \"styleId\": 0, \"text\": \" 3\"}, {\"pos\": [{\"x\": 780, \"y\": 881}, {\"x\": 852, \"y\": 881}, {\"x\": 852, \"y\": 905}, {\"x\": 780, \"y\": 905}], \"styleId\": 1, \"text\": \"亿元。\"}], \"markdownContent\": null, \"index\": 10, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"6.2019年目前在保余额多少,增资后能否达到规定的 3亿元。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"296c08f06de425ac67f0be3c43f05c96\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 203, \"y\": 921}, {\"x\": 1151, \"y\": 920}, {\"x\": 1151, \"y\": 1040}, {\"x\": 203, \"y\": 1040}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 917}, {\"x\": 372, \"y\": 917}, {\"x\": 372, \"y\": 941}, {\"x\": 205, \"y\": 941}], \"styleId\": 2, \"text\": \"目前在保余额约\"}, {\"pos\": [{\"x\": 372, \"y\": 917}, {\"x\": 374, \"y\": 917}, {\"x\": 374, \"y\": 941}, {\"x\": 372, \"y\": 941}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 378, \"y\": 917}, {\"x\": 426, \"y\": 917}, {\"x\": 426, \"y\": 941}, {\"x\": 378, \"y\": 941}], \"styleId\": 3, \"text\": \"7200\"}, {\"pos\": [{\"x\": 433, \"y\": 917}, {\"x\": 526, \"y\": 917}, {\"x\": 526, \"y\": 941}, {\"x\": 433, \"y\": 941}], \"styleId\": 2, \"text\": \"万元,比\"}, {\"pos\": [{\"x\": 526, \"y\": 917}, {\"x\": 528, \"y\": 917}, {\"x\": 528, \"y\": 941}, {\"x\": 526, \"y\": 941}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 532, \"y\": 917}, {\"x\": 580, \"y\": 917}, {\"x\": 580, \"y\": 941}, {\"x\": 532, \"y\": 941}], \"styleId\": 3, \"text\": \"2018\"}, {\"pos\": [{\"x\": 586, \"y\": 917}, {\"x\": 610, \"y\": 917}, {\"x\": 610, \"y\": 941}, {\"x\": 586, \"y\": 941}], \"styleId\": 2, \"text\": \"年\"}, {\"pos\": [{\"x\": 610, \"y\": 917}, {\"x\": 612, \"y\": 917}, {\"x\": 612, \"y\": 941}, {\"x\": 610, \"y\": 941}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 616, \"y\": 917}, {\"x\": 664, \"y\": 917}, {\"x\": 664, \"y\": 941}, {\"x\": 616, \"y\": 941}], \"styleId\": 3, \"text\": \"9835\"}, {\"pos\": [{\"x\": 670, \"y\": 917}, {\"x\": 742, \"y\": 917}, {\"x\": 742, \"y\": 941}, {\"x\": 670, \"y\": 941}], \"styleId\": 2, \"text\": \"万元、\"}, {\"pos\": [{\"x\": 740, \"y\": 917}, {\"x\": 788, \"y\": 917}, {\"x\": 788, \"y\": 941}, {\"x\": 740, \"y\": 941}], \"styleId\": 3, \"text\": \"2017\"}, {\"pos\": [{\"x\": 794, \"y\": 917}, {\"x\": 818, \"y\": 917}, {\"x\": 818, \"y\": 941}, {\"x\": 794, \"y\": 941}], \"styleId\": 2, \"text\": \"年\"}, {\"pos\": [{\"x\": 818, \"y\": 917}, {\"x\": 820, \"y\": 917}, {\"x\": 820, \"y\": 941}, {\"x\": 818, \"y\": 941}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 824, \"y\": 917}, {\"x\": 884, \"y\": 917}, {\"x\": 884, \"y\": 941}, {\"x\": 824, \"y\": 941}], \"styleId\": 3, \"text\": \"14530\"}, {\"pos\": [{\"x\": 890, \"y\": 917}, {\"x\": 1151, \"y\": 917}, {\"x\": 1151, \"y\": 941}, {\"x\": 890, \"y\": 941}], \"styleId\": 2, \"text\": \"万元均出现下滑,主要原\"}, {\"pos\": [{\"x\": 205, \"y\": 952}, {\"x\": 348, \"y\": 952}, {\"x\": 348, \"y\": 976}, {\"x\": 205, \"y\": 976}], \"styleId\": 2, \"text\": \"因:一是财农\"}, {\"pos\": [{\"x\": 348, \"y\": 952}, {\"x\": 435, \"y\": 952}, {\"x\": 435, \"y\": 976}, {\"x\": 348, \"y\": 976}], \"styleId\": 3, \"text\": \"[2017]40\"}, {\"pos\": [{\"x\": 444, \"y\": 952}, {\"x\": 923, \"y\": 952}, {\"x\": 923, \"y\": 976}, {\"x\": 444, \"y\": 976}], \"styleId\": 2, \"text\": \"号文印发后,要求各地农担公司应专注服务于\"}, {\"pos\": [{\"x\": 923, \"y\": 952}, {\"x\": 925, \"y\": 952}, {\"x\": 925, \"y\": 976}, {\"x\": 923, \"y\": 976}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 931, \"y\": 952}, {\"x\": 999, \"y\": 952}, {\"x\": 999, \"y\": 976}, {\"x\": 931, \"y\": 976}], \"styleId\": 3, \"text\": \"10-200\"}, {\"pos\": [{\"x\": 1007, \"y\": 952}, {\"x\": 1151, \"y\": 952}, {\"x\": 1151, \"y\": 976}, {\"x\": 1007, \"y\": 976}], \"styleId\": 2, \"text\": \"万元的适度规\"}, {\"pos\": [{\"x\": 205, \"y\": 988}, {\"x\": 480, \"y\": 988}, {\"x\": 480, \"y\": 1012}, {\"x\": 205, \"y\": 1012}], \"styleId\": 2, \"text\": \"模经营主体,比例不得低于\"}, {\"pos\": [{\"x\": 480, \"y\": 988}, {\"x\": 482, \"y\": 988}, {\"x\": 482, \"y\": 1012}, {\"x\": 480, \"y\": 1012}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 486, \"y\": 988}, {\"x\": 528, \"y\": 988}, {\"x\": 528, \"y\": 1012}, {\"x\": 486, \"y\": 1012}], \"styleId\": 3, \"text\": \"70%\"}, {\"pos\": [{\"x\": 528, \"y\": 988}, {\"x\": 1151, \"y\": 988}, {\"x\": 1151, \"y\": 1012}, {\"x\": 528, \"y\": 1012}], \"styleId\": 2, \"text\": \",减少了原贷款金额较大的担保对象;二是注册资本金不高,\"}, {\"pos\": [{\"x\": 205, \"y\": 1023}, {\"x\": 947, \"y\": 1023}, {\"x\": 947, \"y\": 1047}, {\"x\": 205, \"y\": 1047}], \"styleId\": 2, \"text\": \"银行机构合作推进较为缓慢;三是农担公司的业务拓展能力还需提升。\"}], \"markdownContent\": null, \"index\": 11, \"subType\": \"para\", \"lineHeight\": 11, \"text\": \"目前在保余额约 7200万元,比 2018年 9835万元、2017年 14530万元均出现下滑,主要原因:一是财农[2017]40号文印发后,要求各地农担公司应专注服务于 10-200万元的适度规模经营主体,比例不得低于 70%,减少了原贷款金额较大的担保对象;二是注册资本金不高,银行机构合作推进较为缓慢;三是农担公司的业务拓展能力还需提升。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"d8e4d08381e1c52b6d31b250d39eba35\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 202, \"y\": 1057}, {\"x\": 1151, \"y\": 1058}, {\"x\": 1152, \"y\": 1118}, {\"x\": 202, \"y\": 1117}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1059}, {\"x\": 1151, \"y\": 1059}, {\"x\": 1151, \"y\": 1083}, {\"x\": 205, \"y\": 1083}], \"styleId\": 2, \"text\": \"最近拟修订农担公司实施细则,拟将农担公司的在保余额与业务奖补资金进行挂钩,促进农\"}, {\"pos\": [{\"x\": 205, \"y\": 1094}, {\"x\": 444, \"y\": 1094}, {\"x\": 444, \"y\": 1118}, {\"x\": 205, \"y\": 1118}], \"styleId\": 2, \"text\": \"担公司加快业务拓展。\"}], \"markdownContent\": null, \"index\": 12, \"subType\": \"para\", \"lineHeight\": 11, \"text\": \"最近拟修订农担公司实施细则,拟将农担公司的在保余额与业务奖补资金进行挂钩,促进农担公司加快业务拓展。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"ed77a21ce5c514110df54e1b492074eb\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 203, \"y\": 1129}, {\"x\": 475, \"y\": 1129}, {\"x\": 475, \"y\": 1153}, {\"x\": 203, \"y\": 1153}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1130}, {\"x\": 223, \"y\": 1130}, {\"x\": 223, \"y\": 1154}, {\"x\": 205, \"y\": 1154}], \"styleId\": 0, \"text\": \"7.\"}, {\"pos\": [{\"x\": 223, \"y\": 1130}, {\"x\": 486, \"y\": 1130}, {\"x\": 486, \"y\": 1154}, {\"x\": 223, \"y\": 1154}], \"styleId\": 1, \"text\": \"为什么要分成两份来报?\"}], \"markdownContent\": null, \"index\": 13, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"7.为什么要分成两份来报?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"a425a1570111663a5939db031ec727ca\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 202, \"y\": 1199}, {\"x\": 414, \"y\": 1199}, {\"x\": 414, \"y\": 1225}, {\"x\": 202, \"y\": 1225}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1201}, {\"x\": 223, \"y\": 1201}, {\"x\": 223, \"y\": 1225}, {\"x\": 205, \"y\": 1225}], \"styleId\": 0, \"text\": \"8.\"}, {\"pos\": [{\"x\": 223, \"y\": 1201}, {\"x\": 414, \"y\": 1201}, {\"x\": 414, \"y\": 1225}, {\"x\": 223, \"y\": 1225}], \"styleId\": 1, \"text\": \"年初预留资金情况\"}], \"markdownContent\": null, \"index\": 14, \"subType\": \"none\", \"lineHeight\": 24, \"text\": \"8.年初预留资金情况\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"7e5d06c6fad4faa171674017c2b99ced\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 203, \"y\": 1271}, {\"x\": 403, \"y\": 1270}, {\"x\": 403, \"y\": 1295}, {\"x\": 203, \"y\": 1296}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1272}, {\"x\": 223, \"y\": 1272}, {\"x\": 223, \"y\": 1296}, {\"x\": 205, \"y\": 1296}], \"styleId\": 0, \"text\": \"9.\"}, {\"pos\": [{\"x\": 223, \"y\": 1272}, {\"x\": 414, \"y\": 1272}, {\"x\": 414, \"y\": 1296}, {\"x\": 223, \"y\": 1296}], \"styleId\": 1, \"text\": \"目前代偿率多高?\"}], \"markdownContent\": null, \"index\": 15, \"subType\": \"none\", \"lineHeight\": 24, \"text\": \"9.目前代偿率多高?\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"02d60f5198fa19334e555fa528463064\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 203, \"y\": 1306}, {\"x\": 808, \"y\": 1306}, {\"x\": 808, \"y\": 1332}, {\"x\": 203, \"y\": 1332}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1308}, {\"x\": 253, \"y\": 1308}, {\"x\": 253, \"y\": 1332}, {\"x\": 205, \"y\": 1332}], \"styleId\": 3, \"text\": \"2018\"}, {\"pos\": [{\"x\": 259, \"y\": 1308}, {\"x\": 546, \"y\": 1308}, {\"x\": 546, \"y\": 1332}, {\"x\": 259, \"y\": 1332}], \"styleId\": 2, \"text\": \"年发生一笔需代偿项目,约\"}, {\"pos\": [{\"x\": 546, \"y\": 1308}, {\"x\": 548, \"y\": 1308}, {\"x\": 548, \"y\": 1332}, {\"x\": 546, \"y\": 1332}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 552, \"y\": 1308}, {\"x\": 588, \"y\": 1308}, {\"x\": 588, \"y\": 1332}, {\"x\": 552, \"y\": 1332}], \"styleId\": 3, \"text\": \"660\"}, {\"pos\": [{\"x\": 595, \"y\": 1308}, {\"x\": 762, \"y\": 1308}, {\"x\": 762, \"y\": 1332}, {\"x\": 595, \"y\": 1332}], \"styleId\": 2, \"text\": \"万元,代偿率约\"}, {\"pos\": [{\"x\": 762, \"y\": 1308}, {\"x\": 764, \"y\": 1308}, {\"x\": 764, \"y\": 1332}, {\"x\": 762, \"y\": 1332}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 768, \"y\": 1308}, {\"x\": 797, \"y\": 1308}, {\"x\": 797, \"y\": 1332}, {\"x\": 768, \"y\": 1332}], \"styleId\": 3, \"text\": \"5%\"}, {\"pos\": [{\"x\": 797, \"y\": 1308}, {\"x\": 821, \"y\": 1308}, {\"x\": 821, \"y\": 1332}, {\"x\": 797, \"y\": 1332}], \"styleId\": 2, \"text\": \"。\"}], \"markdownContent\": null, \"index\": 16, \"subType\": \"para\", \"lineHeight\": 24, \"text\": \"2018年发生一笔需代偿项目,约 660万元,代偿率约 5%。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"8d8ef9423fb82bb7e97718a1f1322d8d\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 205, \"y\": 1342}, {\"x\": 585, \"y\": 1342}, {\"x\": 585, \"y\": 1366}, {\"x\": 205, \"y\": 1367}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1343}, {\"x\": 276, \"y\": 1343}, {\"x\": 276, \"y\": 1367}, {\"x\": 205, \"y\": 1367}], \"styleId\": 0, \"text\": \"10.20%\"}, {\"pos\": [{\"x\": 276, \"y\": 1343}, {\"x\": 587, \"y\": 1343}, {\"x\": 587, \"y\": 1367}, {\"x\": 276, \"y\": 1367}], \"styleId\": 1, \"text\": \"项目补贴到项目还是担保公司\"}], \"markdownContent\": null, \"index\": 17, \"subType\": \"none\", \"lineHeight\": 24, \"text\": \"10.20%项目补贴到项目还是担保公司\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"dc1262b9070215c8344991c652f21920\"}, {\"firstLinesChars\": 0, \"pos\": [{\"x\": 205, \"y\": 1376}, {\"x\": 1149, \"y\": 1376}, {\"x\": 1149, \"y\": 1438}, {\"x\": 205, \"y\": 1437}], \"blocks\": [{\"pos\": [{\"x\": 205, \"y\": 1379}, {\"x\": 372, \"y\": 1379}, {\"x\": 372, \"y\": 1403}, {\"x\": 205, \"y\": 1403}], \"styleId\": 2, \"text\": \"按照现行政策,\"}, {\"pos\": [{\"x\": 371, \"y\": 1379}, {\"x\": 412, \"y\": 1379}, {\"x\": 412, \"y\": 1403}, {\"x\": 371, \"y\": 1403}], \"styleId\": 3, \"text\": \"20%\"}, {\"pos\": [{\"x\": 412, \"y\": 1379}, {\"x\": 1151, \"y\": 1379}, {\"x\": 1151, \"y\": 1403}, {\"x\": 412, \"y\": 1403}], \"styleId\": 2, \"text\": \"的贴息是补助给银行机构,让银行机构提供优惠的贷款利率,目前,贷\"}, {\"pos\": [{\"x\": 205, \"y\": 1414}, {\"x\": 516, \"y\": 1414}, {\"x\": 516, \"y\": 1438}, {\"x\": 205, \"y\": 1438}], \"styleId\": 2, \"text\": \"款利率大部分在基准利率上浮\"}, {\"pos\": [{\"x\": 516, \"y\": 1414}, {\"x\": 518, \"y\": 1414}, {\"x\": 518, \"y\": 1438}, {\"x\": 516, \"y\": 1438}], \"styleId\": 0, \"text\": \" \"}, {\"pos\": [{\"x\": 522, \"y\": 1414}, {\"x\": 563, \"y\": 1414}, {\"x\": 563, \"y\": 1438}, {\"x\": 522, \"y\": 1438}], \"styleId\": 3, \"text\": \"40%\"}, {\"pos\": [{\"x\": 563, \"y\": 1414}, {\"x\": 970, \"y\": 1414}, {\"x\": 970, \"y\": 1438}, {\"x\": 563, \"y\": 1438}], \"styleId\": 2, \"text\": \"左右,个别项目贷款利率同基准利率。\"}], \"markdownContent\": null, \"index\": 18, \"subType\": \"para\", \"lineHeight\": 11, \"text\": \"按照现行政策,20%的贴息是补助给银行机构,让银行机构提供优惠的贷款利率,目前,贷款利率大部分在基准利率上浮 40%左右,个别项目贷款利率同基准利率。\", \"alignment\": \"left\", \"type\": \"text\", \"pageNum\": [0], \"uniqueId\": \"ddc4c7cbbd71d073c325b5b0dc97a0e4\"}]", - "start_char_idx": null, - "end_char_idx": null, - "text_template": "{metadata_str}\n\n{content}", - "metadata_template": "{key}: {value}", - "metadata_seperator": "\n", - "class_name": "Document" - } - ] \ No newline at end of file diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py index d32acd2257ac0..7b1c9408de4b8 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py @@ -1,26 +1,6 @@ -import json -import os - from llama_index.node_parser.relational.dashscope import DashScopeJsonNodeParser -from llama_index.core.ingestion import IngestionPipeline -from llama_index.core.schema import Document - -os.environ['DASHSCOPE_API_KEY'] = 'sk-75878ade82164673a0962a825471e825' - -doc_json = json.load(open('tests/documents.json')) -documents = [] -for doc in doc_json: - documents.append(Document.from_dict(doc)) - -node_parser = DashScopeJsonNodeParser(chunk_size=100, overlap_size=0, separator=' |,|,|。|?|!|\n|\?|\!') - -pipeline = IngestionPipeline( - transformations=[ - node_parser, - ] -) - -nodes = pipeline.run(documents=documents, show_progress=True) +from llama_index.core.node_parser.relational.base_element import BaseElementNodeParser -for node in nodes: - print(node) \ No newline at end of file +def test_class(): + names_of_base_classes = [b.__name__ for b in DashScopeJsonNodeParser.__mro__] + assert BaseElementNodeParser.__name__ in names_of_base_classes \ No newline at end of file From c5ba0c530cafd3c5d19795cf3c5d378e4c034694 Mon Sep 17 00:00:00 2001 From: "ada.drx" Date: Tue, 30 Apr 2024 18:31:21 +0800 Subject: [PATCH 03/13] pre commit fix --- .../node_parser/relational/dashscope/base.py | 93 +++++++++++-------- .../pyproject.toml | 19 ++-- .../test_node_parser_relational_dashscope.py | 3 +- 3 files changed, 61 insertions(+), 54 deletions(-) diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py index bcc8bf09a0e87..8df6c1edc142b 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py @@ -5,7 +5,10 @@ import json from llama_index.core.bridge.pydantic import Field -from llama_index.core.node_parser.relational.base_element import BaseElementNodeParser, Element +from llama_index.core.node_parser.relational.base_element import ( + BaseElementNodeParser, + Element, +) from llama_index.core.schema import BaseNode, TextNode @@ -15,30 +18,20 @@ class DashScopeJsonNodeParser(BaseElementNodeParser): Splits a json format document from DashScope Parse into Text Nodes and Index Nodes corresponding to embedded objects (e.g. tables). """ + try_count_limit: int = Field( - default=10, - description="Maximum number of retry attempts." - ) - chunk_size: int = Field( - default=500, - description="Size of each chunk to process." + default=10, description="Maximum number of retry attempts." ) + chunk_size: int = Field(default=500, description="Size of each chunk to process.") overlap_size: int = Field( - default=100, - description="Overlap size between consecutive chunks." + default=100, description="Overlap size between consecutive chunks." ) separator: str = Field( - default=" |,|,|。|?|!|\n|\?|\!", - description="Separator characters for splitting texts." - ) - pip: bool = Field( - default=False, - description="Flag to enable or disable PIP." - ) - input_type: str = Field( - default="idp", - description="parse format type." + default=" |,|,|。|?|!|\n|\\?|\\!", + description="Separator characters for splitting texts.", ) + pip: bool = Field(default=False, description="Flag to enable or disable PIP.") + input_type: str = Field(default="idp", description="parse format type.") @classmethod def class_name(cls) -> str: @@ -46,21 +39,26 @@ def class_name(cls) -> str: def get_nodes_from_node(self, node: TextNode) -> List[BaseNode]: """Get nodes from node.""" - ftype = node.metadata.get('parse_fmt_type', self.input_type) - assert ftype in ['DASHCOPE_DOCMIND', 'idp'], f"Unexpected parse_fmt_type: {node.metadata.get('parse_fmt_type', '')}" + ftype = node.metadata.get("parse_fmt_type", self.input_type) + assert ftype in [ + "DASHCOPE_DOCMIND", + "idp", + ], f"Unexpected parse_fmt_type: {node.metadata.get('parse_fmt_type', '')}" ftype_map = { "DASHCOPE_DOCMIND": "idp", } - my_input = dict() - my_input["text"] = node.get_content() - my_input["file_type"] = ftype_map.get(ftype, ftype) - my_input["chunk_size"] = self.chunk_size - my_input["overlap_size"] = self.overlap_size - my_input["language"] = "cn" - my_input["separator"] = self.separator - my_input["pip"] = self.pip - + + my_input = { + "text": node.get_content(), + "file_type": ftype_map.get(ftype, ftype), + "chunk_size": self.chunk_size, + "overlap_size": self.overlap_size, + "language": "cn", + "separator": self.separator, + "pip": self.pip, + } + try_count = 0 response_text = self.post_service(my_input) while response_text is None and try_count < self.try_count_limit: @@ -69,7 +67,7 @@ def get_nodes_from_node(self, node: TextNode) -> List[BaseNode]: if response_text is None: logging.error("DashScopeJsonNodeParser Failed to get response from service") return [] - + return self.parse_result(response_text, node) def post_service(self, my_input): @@ -80,16 +78,23 @@ def post_service(self, my_input): headers = { "Content-Type": "application/json", "Accept-Encoding": "utf-8", - 'Authorization': 'Bearer ' + DASHSCOPE_API_KEY, + "Authorization": "Bearer " + DASHSCOPE_API_KEY, } - service_url = os.getenv('DASHSCOPE_BASE_URL', "https://dashscope.aliyuncs.com") + "/api/v1/indices/component/configed_transformations/spliter" - response = requests.post(service_url, data=json.dumps(my_input), headers=headers) + service_url = ( + os.getenv("DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com") + + "/api/v1/indices/component/configed_transformations/spliter" + ) + response = requests.post( + service_url, data=json.dumps(my_input), headers=headers + ) response_text = response.json() try: - response = requests.post(service_url, data=json.dumps(my_input), headers=headers) + response = requests.post( + service_url, data=json.dumps(my_input), headers=headers + ) response_text = response.json() - if 'chunkService' in response_text: - return response_text['chunkService']['chunkResult'] + if "chunkService" in response_text: + return response_text["chunkService"]["chunkResult"] else: logging.error(f"{response_text}, try again.") return None @@ -100,9 +105,17 @@ def post_service(self, my_input): def parse_result(self, content_json, document): nodes = [] for data in content_json: - text = '\n'.join([data['title'], data.get('hier_title', ''), data['content']]) - nodes.append(TextNode(metadata=document.metadata, text=text, excluded_embed_metadata_keys=document.excluded_embed_metadata_keys, \ - excluded_llm_metadata_keys=document.excluded_llm_metadata_keys)) + text = "\n".join( + [data["title"], data.get("hier_title", ""), data["content"]] + ) + nodes.append( + TextNode( + metadata=document.metadata, + text=text, + excluded_embed_metadata_keys=document.excluded_embed_metadata_keys, + excluded_llm_metadata_keys=document.excluded_llm_metadata_keys, + ) + ) return nodes def extract_elements( diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml index dbc5a109a5468..71499bdcb119b 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml @@ -1,6 +1,6 @@ [build-system] -requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] [tool.codespell] check-filenames = true @@ -9,13 +9,6 @@ check-hidden = true # work through many typos (--write-changes and --interactive will help) skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" -# [tool.llamahub] -# contains_example = false -# import_path = "" - -# [tool.llamahub.class_authors] -# CLASS = "github-username" - [tool.mypy] disallow_untyped_defs = true # Remove venv skip when integrated with pre-commit @@ -24,13 +17,13 @@ ignore_missing_imports = true python_version = "3.8" [tool.poetry] -name = "llama-index-node-parser-relational-dashscope" -version = "0.1.0" -description = "llama-index node_parser relational dashscope integration" authors = ["Ruixue Ding "] +description = "llama-index node_parser relational dashscope integration" license = "MIT" -readme = "README.md" +name = "llama-index-node-parser-relational-dashscope" packages = [{include = "llama_index/"}] +readme = "README.md" +version = "0.1.0" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" @@ -53,5 +46,5 @@ types-Deprecated = ">=0.1.0" types-PyYAML = "^6.0.12.12" types-protobuf = "^4.24.0.4" types-redis = "4.5.5.0" -types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991 +types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991 types-setuptools = "67.1.0.0" diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py index 7b1c9408de4b8..c19021114499f 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py @@ -1,6 +1,7 @@ from llama_index.node_parser.relational.dashscope import DashScopeJsonNodeParser from llama_index.core.node_parser.relational.base_element import BaseElementNodeParser + def test_class(): names_of_base_classes = [b.__name__ for b in DashScopeJsonNodeParser.__mro__] - assert BaseElementNodeParser.__name__ in names_of_base_classes \ No newline at end of file + assert BaseElementNodeParser.__name__ in names_of_base_classes From 77f54211b380e9d6bb23e1a4dd3c64db4efa1d5a Mon Sep 17 00:00:00 2001 From: "ada.drx" Date: Tue, 30 Apr 2024 18:46:16 +0800 Subject: [PATCH 04/13] pre commit --- .../README.md | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/README.md b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/README.md index 0eb24597f9436..5cb5daf4a19cd 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/README.md +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/README.md @@ -1 +1,42 @@ -# LlamaIndex Node_Parser-File Integration: Dashscope +# LlamaIndex Node_Parser-Relational Integration: Dashscope + +Transform your documents into nodes with ease using the Dashscope integration for LlamaIndex. This tool allows for precise control over chunk size, overlap size, and more, tailored for the Dashscope reader output format. + +## Quick Start + +Get up and running with just a few lines of code: + +```python +import json +import os +from llama_index.node_parser.relational.dashscope import ( + DashScopeJsonNodeParser, +) +from llama_index.core.ingestion import IngestionPipeline +from llama_index.core.schema import Document + +# Set your Dashscope API key in the environment +os.environ["DASHSCOPE_API_KEY"] = "your_api_key_here" + +documents = [ + # Prepare your documents obtained from the Dashscope reader +] + +# Initialize the DashScope JsonNodeParser +node_parser = DashScopeJsonNodeParser( + chunk_size=100, overlap_size=0, separator=" |,|,|。|?|!|\n|\?|\!" +) + +# Set up the ingestion pipeline with the node parser +pipeline = IngestionPipeline(transformations=[node_parser]) + +# Process the documents and print the resulting nodes +nodes = pipeline.run(documents=documents, show_progress=True) +for node in nodes: + print(node) +``` + +## Configuration + +- API Key: You need a Dashscope API key to begin. Set it in your environment as shown in the Quick Start section. +- Document Preparation: Your documents must be in the Dashscope reader output format. From 1d4db04518a7bd9dad5630f3c5b7490d2ab8ac58 Mon Sep 17 00:00:00 2001 From: "ada.drx" Date: Sat, 4 May 2024 18:41:37 +0800 Subject: [PATCH 05/13] add dashscope reader and dashscope managed index --- .../.gitignore | 153 +++++++ .../BUILD | 1 + .../Makefile | 17 + .../README.md | 40 ++ .../indices/managed/dashscope/__init__.py | 5 + .../indices/managed/dashscope/api_utils.py | 73 +++ .../indices/managed/dashscope/base.py | 242 ++++++++++ .../indices/managed/dashscope/constants.py | 8 + .../indices/managed/dashscope/retriever.py | 195 ++++++++ .../managed/dashscope/transformations.py | 191 ++++++++ .../indices/managed/dashscope/utils.py | 27 ++ .../pyproject.toml | 52 +++ .../tests/__init__.py | 0 .../tests/test_indices_managed_dashscope.py | 7 + .../node_parser/relational/dashscope/base.py | 10 +- .../llama-index-readers-dashscope/.gitignore | 153 +++++++ .../llama-index-readers-dashscope/BUILD | 1 + .../llama-index-readers-dashscope/Makefile | 17 + .../llama-index-readers-dashscope/README.md | 44 ++ .../llama_index/readers/dashscope/__init__.py | 3 + .../llama_index/readers/dashscope/base.py | 418 ++++++++++++++++++ .../readers/dashscope/domain/base_domains.py | 7 + .../readers/dashscope/domain/lease_domains.py | 295 ++++++++++++ .../llama_index/readers/dashscope/utils.py | 151 +++++++ .../pyproject.toml | 53 +++ .../tests/__init__.py | 0 .../tests/test_readers_dashscope.py | 7 + 27 files changed, 2166 insertions(+), 4 deletions(-) create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/.gitignore create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/BUILD create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/Makefile create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/README.md create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/__init__.py create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/api_utils.py create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/base.py create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/constants.py create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/retriever.py create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/transformations.py create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/utils.py create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/tests/__init__.py create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/tests/test_indices_managed_dashscope.py create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/.gitignore create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/BUILD create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/Makefile create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/README.md create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/__init__.py create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/base.py create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/domain/base_domains.py create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/domain/lease_domains.py create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/utils.py create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/pyproject.toml create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/tests/__init__.py create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/tests/test_readers_dashscope.py diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/.gitignore b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/.gitignore new file mode 100644 index 0000000000000..990c18de22908 --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/.gitignore @@ -0,0 +1,153 @@ +llama_index/_static +.DS_Store +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +bin/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +etc/ +include/ +lib/ +lib64/ +parts/ +sdist/ +share/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +.ruff_cache + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints +notebooks/ + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pyvenv.cfg + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Jetbrains +.idea +modules/ +*.swp + +# VsCode +.vscode + +# pipenv +Pipfile +Pipfile.lock + +# pyright +pyrightconfig.json diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/BUILD b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/Makefile b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/Makefile new file mode 100644 index 0000000000000..b9eab05aa3706 --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/Makefile @@ -0,0 +1,17 @@ +GIT_ROOT ?= $(shell git rev-parse --show-toplevel) + +help: ## Show all Makefile targets. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' + +format: ## Run code autoformatters (black). + pre-commit install + git ls-files | xargs pre-commit run black --files + +lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy + pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files + +test: ## Run tests via pytest. + pytest tests + +watch-docs: ## Build and watch documentation. + sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/README.md b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/README.md new file mode 100644 index 0000000000000..4da52939fb294 --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/README.md @@ -0,0 +1,40 @@ +# LlamaIndex Indices Integration: Managed-Dashscope + +## Usage + +```python +import os +from llama_index.core.schema import QueryBundle +from llama_index.readers.dashscope.base import DashScopeParse +from llama_index.readers.dashscope.utils import ResultType + +os.environ["DASHSCOPE_API_KEY"] = "your_api_key_here" +os.environ["DASHSCOPE_WORKSPACE_ID"] = "your_workspace_here" + +# init retriever from scratch +from llama_index.indices.managed.dashscope.retriever import ( + DashScopeCloudRetriever, +) + + +file_list = [ + # your files (accept doc, docx, pdf) +] + +parse = DashScopeParse(result_type=ResultType.DASHCOPE_DOCMIND) +documents = parse.load_data(file_path=file_list) + +# create a new index +index = DashScopeCloudIndex.from_documents( + documents, + "my_first_index", + verbose=True, +) + +# # connect to an existing index +# index = DashScopeCloudIndex("my_first_index") + +retriever = index.as_retriever() +nodes = retriever.retrieve("test query") +print(nodes) +``` diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/__init__.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/__init__.py new file mode 100644 index 0000000000000..7ade8ae4c86b7 --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/__init__.py @@ -0,0 +1,5 @@ +from llama_index.indices.managed.dashscope.base import DashScopeCloudIndex +from llama_index.indices.managed.dashscope.retriever import DashScopeCloudRetriever + + +__all__ = ["DashScopeCloudIndex", "DashScopeCloudRetriever"] diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/api_utils.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/api_utils.py new file mode 100644 index 0000000000000..699e74a9df65e --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/api_utils.py @@ -0,0 +1,73 @@ +import json +from typing import List, Optional + +from llama_index.indices.managed.dashscope.transformations import ( + ConfiguredTransformation, +) +from llama_index.core.schema import BaseNode, TransformComponent + + +def default_transformations() -> List[TransformComponent]: + """Default transformations.""" + from llama_index.node_parser.relational.dashscope import DashScopeJsonNodeParser + from llama_index.embeddings.dashscope import ( + DashScopeEmbedding, + DashScopeTextEmbeddingModels, + DashScopeTextEmbeddingType, + ) + + node_parser = DashScopeJsonNodeParser() + document_embedder = DashScopeEmbedding( + model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2, + text_type=DashScopeTextEmbeddingType.TEXT_TYPE_DOCUMENT, + ) + return [ + node_parser, + document_embedder, + ] + + +def get_pipeline_create( + name: str, + transformations: Optional[List[TransformComponent]] = None, + documents: Optional[List[BaseNode]] = None, +) -> str: + configured_transformations: List[ConfiguredTransformation] = [] + for transformation in transformations: + try: + configured_transformations.append( + ConfiguredTransformation.from_component(transformation) + ) + except ValueError: + raise ValueError(f"Unsupported transformation: {type(transformation)}") + + configured_transformation_items: List[Dict] = [] + for item in configured_transformations: + configured_transformation_items.append( + { + "component": json.loads(item.component.json()), + "configurable_transformation_type": item.configurable_transformation_type.name, + } + ) + data_sources = [ + { + "source_type": "DATA_CENTER_FILE", + "component": { + "doc_ids": [doc.node_id for doc in documents], + }, + } + ] + return { + "name": name, + "pipeline_type": "MANAGED_SHARED", + "configured_transformations": configured_transformation_items, + "data_sources": data_sources, + "data_sinks": [ + { + "sink_type": "ES", + } + ], + # for debug + "data_type": "structured", + "config_model": "recommend", + } diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/base.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/base.py new file mode 100644 index 0000000000000..e00b9f6adfaa0 --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/base.py @@ -0,0 +1,242 @@ +"""Managed index. + +A managed Index - where the index is accessible via some API that +interfaces a managed service. + +""" +import os +import time +from typing import Any, List, Optional, Sequence, Type +from enum import Enum +import time +import requests +import json + +from llama_index.core.base.base_query_engine import BaseQueryEngine +from llama_index.core.base.base_retriever import BaseRetriever +from llama_index.core.callbacks.base import CallbackManager +from llama_index.core.indices.managed.base import BaseManagedIndex +from llama_index.core.schema import BaseNode, Document, TransformComponent +from llama_index.core.settings import Settings + +from llama_index.indices.managed.dashscope.api_utils import ( + get_pipeline_create, + default_transformations, +) + +from llama_index.indices.managed.dashscope.constants import ( + DASHSCOPE_DEFAULT_BASE_URL, + UPSERT_PIPELINE_ENDPOINT, + START_PIPELINE_ENDPOINT, + CHECK_INGESTION_ENDPOINT, +) + + +class Status(Enum): + ERROR = "ERROR" + SUCCESS = "Success" + PENDING = "PENDING" + RUNNING = "RUNNING" + CANCELED = "CANCELED" + FAILED = "FAILED" + FINISHED = "FINISHED" + + +class DashScopeCloudIndex(BaseManagedIndex): + """DashScope Cloud Platform Index.""" + + def __init__( + self, + name: str, + nodes: Optional[List[BaseNode]] = None, + transformations: Optional[List[TransformComponent]] = None, + timeout: int = 60, + workspace_id: Optional[str] = None, + api_key: Optional[str] = None, + base_url: Optional[str] = DASHSCOPE_DEFAULT_BASE_URL, + show_progress: bool = False, + callback_manager: Optional[CallbackManager] = None, + **kwargs: Any, + ) -> None: + """Initialize the Platform Index.""" + self.name = name + self.transformations = transformations or [] + + if nodes is not None: + raise ValueError( + "DashScopeCloudIndex does not support nodes on initialization" + ) + + self.workspace_id = workspace_id or os.environ.get("DASHSCOPE_WORKSPACE_ID") + self._api_key = api_key or os.environ.get("DASHSCOPE_API_KEY") + self._base_url = os.environ.get("DASHSCOPE_BASE_URL", None) or base_url + self._timeout = timeout + self._show_progress = show_progress + self._service_context = None + self._callback_manager = callback_manager or Settings.callback_manager + + @classmethod + def from_documents( # type: ignore + cls: Type["DashScopeCloudIndex"], + documents: List[Document], + name: str, + transformations: Optional[List[TransformComponent]] = None, + workspace_id: Optional[str] = None, + api_key: Optional[str] = None, + base_url: Optional[str] = None, + timeout: int = 60, + verbose: bool = True, + **kwargs: Any, + ) -> "DashScopeCloudIndex": + """Build a DashScope index from a sequence of documents.""" + pipeline_create = get_pipeline_create( + name, transformations or default_transformations(), documents + ) + # for debug + json.dump( + pipeline_create, open("pipeline_create.json", "w"), ensure_ascii=False + ) + + workspace_id = workspace_id or os.environ.get("DASHSCOPE_WORKSPACE_ID") + api_key = api_key or os.environ.get("DASHSCOPE_API_KEY") + base_url = ( + base_url + or os.environ.get("DASHSCOPE_BASE_URL", None) + or DASHSCOPE_DEFAULT_BASE_URL + ) + headers = { + "Content-Type": "application/json", + "Accept-Encoding": "utf-8", + "X-DashScope-WorkSpace": workspace_id, + "Authorization": "Bearer " + api_key, + "X-DashScope-OpenAPISource": "CloudSDK", + # for debug + # 'X-DashScope-ApiKeyId': 'test_api_key_id_123456', + # 'X-DashScope-Uid': "test_uid_123456", + # "X-DashScope-SubUid": "test_sub_uid_123456" + } + print(base_url + UPSERT_PIPELINE_ENDPOINT) + print(json.dumps(pipeline_create)) + response = requests.put( + base_url + UPSERT_PIPELINE_ENDPOINT, + data=json.dumps(pipeline_create), + headers=headers, + ) + response_text = response.json() + pipeline_id = response_text.get("id", None) + + if response_text.get("code", "") != Status.SUCCESS.value or pipeline_id is None: + raise ValueError( + f"Failed to create index: {response_text.get('message', '')}\n{response_text}" + ) + if verbose: + print(f"Starting creating index {name}, pipeline_id: {pipeline_id}") + + print(base_url + START_PIPELINE_ENDPOINT.format(pipeline_id=pipeline_id)) + + response = requests.post( + base_url + START_PIPELINE_ENDPOINT.format(pipeline_id=pipeline_id), + headers=headers, + ) + response_text = response.json() + ingestion_id = response_text.get("ingestionId", None) + + if ( + response_text.get("code", "") != Status.SUCCESS.value + or ingestion_id is None + ): + raise ValueError( + f"Failed to start ingestion: {response_text.get('message', '')}\n{response_text}" + ) + if verbose: + print(f"Starting ingestion for index {name}, ingestion_id: {ingestion_id}") + + ingestion_status = "" + failed_docs = [] + + while True: + print( + base_url + + CHECK_INGESTION_ENDPOINT.format( + pipeline_id=pipeline_id, ingestion_id=ingestion_id + ) + ) + response = requests.get( + base_url + + CHECK_INGESTION_ENDPOINT.format( + pipeline_id=pipeline_id, ingestion_id=ingestion_id + ), + headers=headers, + ) + try: + response_text = response.json() + except Exception as e: + print(f"Failed to get response: \n{response.text}\nretrying...") + continue + + if response_text.get("code", "") != Status.SUCCESS.value: + print( + f"Failed to get ingestion status: {response_text.get('message', '')}\n{response_text}\nretrying..." + ) + continue + ingestion_status = response_text.get("ingestion_status", "") + failed_docs = response_text.get("failed_docs", "") + if verbose: + print(f"Current status: {ingestion_status}") + if ingestion_status in ["COMPLETED", "FAILED"]: + break + time.sleep(5) + + if verbose: + print(f"ingestion_status {ingestion_status}") + print(f"failed_docs: {failed_docs}") + + if ingestion_status == "FAILED": + print("Index {name} created failed!") + return None + + if verbose: + print(f"Index {name} created successfully!") + + return cls( + name, + transformations=transformations, + workspace_id=workspace_id, + api_key=api_key, + base_url=base_url, + timeout=timeout, + **kwargs, + ) + + def as_retriever(self, **kwargs: Any) -> BaseRetriever: + """Return a Retriever for this managed index.""" + from llama_index.indices.managed.dashscope.retriever import ( + DashScopeCloudRetriever, + ) + + return DashScopeCloudRetriever( + self.name, + **kwargs, + ) + + def as_query_engine(self, **kwargs: Any) -> BaseQueryEngine: + from llama_index.core.query_engine.retriever_query_engine import ( + RetrieverQueryEngine, + ) + + kwargs["retriever"] = self.as_retriever(**kwargs) + return RetrieverQueryEngine.from_args(**kwargs) + + def _insert(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None: + """Insert a set of documents (each a node).""" + raise NotImplementedError("_insert not implemented.") + + def delete_ref_doc( + self, ref_doc_id: str, delete_from_docstore: bool = False, **delete_kwargs: Any + ) -> None: + """Delete a document and it's nodes by using ref_doc_id.""" + raise NotImplementedError("delete_ref_doc not implemented.") + + def update_ref_doc(self, document: Document, **update_kwargs: Any) -> None: + """Update a document and it's corresponding nodes.""" + raise NotImplementedError("update_ref_doc not implemented.") diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/constants.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/constants.py new file mode 100644 index 0000000000000..a2704b3aef5e1 --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/constants.py @@ -0,0 +1,8 @@ +DASHSCOPE_DEFAULT_BASE_URL = "https://dashscope.aliyuncs.com" +UPSERT_PIPELINE_ENDPOINT = "/api/v1/indices/pipeline" +START_PIPELINE_ENDPOINT = "/api/v1/indices/pipeline/{pipeline_id}/managed_ingest" +CHECK_INGESTION_ENDPOINT = ( + "/api/v1/indices/pipeline/{pipeline_id}/managed_ingest/{ingestion_id}/status" +) +RETRIEVE_PIPELINE_ENDPOINT = "/api/v1/indices/pipeline/{pipeline_id}/retrieve" +PIPELINE_SIMPLE_ENDPOINT = "/api/v1/indices/pipeline_simple" diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/retriever.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/retriever.py new file mode 100644 index 0000000000000..3a51902103d3d --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/retriever.py @@ -0,0 +1,195 @@ +import logging +from typing import List, Dict, Optional +import os + +from llama_index.core.base.base_retriever import BaseRetriever +from llama_index.core.callbacks.base import CallbackManager +from llama_index.core.callbacks.schema import CBEventType, EventPayload +from llama_index.core.schema import NodeWithScore, QueryBundle, TextNode, QueryType +from llama_index.core.instrumentation.events.retrieval import ( + RetrievalEndEvent, + RetrievalStartEvent, +) +import llama_index.core.instrumentation as instrument + +from llama_index.indices.managed.dashscope import utils +from llama_index.indices.managed.dashscope.constants import ( + DASHSCOPE_DEFAULT_BASE_URL, + RETRIEVE_PIPELINE_ENDPOINT, + PIPELINE_SIMPLE_ENDPOINT, +) + +dispatcher = instrument.get_dispatcher(__name__) + +logger = logging.getLogger(__name__) + + +class DashScopeCloudRetriever(BaseRetriever): + """Initialize the DashScopeCloud Retriever.""" + + def __init__( + self, + index_name: str, + api_key: Optional[str] = None, + workspace_id: Optional[str] = None, + dense_similarity_top_k: Optional[int] = 100, + sparse_similarity_top_k: Optional[int] = 100, + enable_rewrite: Optional[bool] = False, + rewrite_model_name: Optional[str] = "conv-rewrite-qwen-1.8b", + enable_reranking: Optional[bool] = True, + rerank_model_name: Optional[str] = "gte-rerank-hybrid", + rerank_min_score: Optional[float] = 0.0, + rerank_top_n: Optional[int] = 5, + callback_manager: Optional[CallbackManager] = None, + **kwargs, + ) -> None: + self.index_name = index_name + self.workspace_id = workspace_id or os.environ.get("DASHSCOPE_WORKSPACE_ID") + self._api_key = api_key or os.environ.get("DASHSCOPE_API_KEY") + self.dense_similarity_top_k = dense_similarity_top_k + self.sparse_similarity_top_k = sparse_similarity_top_k + self.enable_rewrite = enable_rewrite + self.rewrite_model_name = rewrite_model_name + self.enable_reranking = enable_reranking + self.rerank_model_name = rerank_model_name + self.rerank_min_score = rerank_min_score + self.rerank_top_n = rerank_top_n + + self.headers = { + "Content-Type": "application/json", + "Accept-Encoding": "utf-8", + "X-DashScope-WorkSpace": self.workspace_id, + "Authorization": self._api_key, + "X-DashScope-OpenAPISource": "CloudSDK", + } + + base_url = ( + os.environ.get("DASHSCOPE_BASE_URL", None) or DASHSCOPE_DEFAULT_BASE_URL + ) + self.pipeline_id = utils.get_pipeline_id( + base_url + PIPELINE_SIMPLE_ENDPOINT, + self.headers, + {"pipeline_name": self.index_name}, + ) + + self.base_url = base_url + RETRIEVE_PIPELINE_ENDPOINT.format( + pipeline_id=self.pipeline_id + ) + super().__init__(callback_manager) + + @dispatcher.span + def retrieve( + self, str_or_query_bundle: QueryType, query_history: List[Dict] = None + ) -> List[NodeWithScore]: + """Retrieve nodes given query. + + Args: + str_or_query_bundle (QueryType): Either a query string or + a QueryBundle object. + + """ + dispatch_event = dispatcher.get_dispatch_event() + + self._check_callback_manager() + dispatch_event( + RetrievalStartEvent( + str_or_query_bundle=str_or_query_bundle, + ) + ) + if isinstance(str_or_query_bundle, str): + query_bundle = QueryBundle(str_or_query_bundle) + else: + query_bundle = str_or_query_bundle + with self.callback_manager.as_trace("query"): + with self.callback_manager.event( + CBEventType.RETRIEVE, + payload={EventPayload.QUERY_STR: query_bundle.query_str}, + ) as retrieve_event: + nodes = self._retrieve(query_bundle, query_history=query_history) + nodes = self._handle_recursive_retrieval(query_bundle, nodes) + retrieve_event.on_end( + payload={EventPayload.NODES: nodes}, + ) + dispatch_event( + RetrievalEndEvent( + str_or_query_bundle=str_or_query_bundle, + nodes=nodes, + ) + ) + return nodes + + async def _aretrieve( + self, query_bundle: QueryBundle, query_history: List[Dict] = None + ) -> List[NodeWithScore]: + return self._retrieve(query_bundle, query_history=query_history) + + @dispatcher.span + async def aretrieve( + self, str_or_query_bundle: QueryType, query_history: List[Dict] = None + ) -> List[NodeWithScore]: + self._check_callback_manager() + dispatch_event = dispatcher.get_dispatch_event() + + dispatch_event( + RetrievalStartEvent( + str_or_query_bundle=str_or_query_bundle, + ) + ) + if isinstance(str_or_query_bundle, str): + query_bundle = QueryBundle(str_or_query_bundle) + else: + query_bundle = str_or_query_bundle + with self.callback_manager.as_trace("query"): + with self.callback_manager.event( + CBEventType.RETRIEVE, + payload={EventPayload.QUERY_STR: query_bundle.query_str}, + ) as retrieve_event: + nodes = await self._aretrieve( + query_bundle=query_bundle, query_history=query_history + ) + nodes = await self._ahandle_recursive_retrieval( + query_bundle=query_bundle, nodes=nodes + ) + retrieve_event.on_end( + payload={EventPayload.NODES: nodes}, + ) + dispatch_event( + RetrievalEndEvent( + str_or_query_bundle=str_or_query_bundle, + nodes=nodes, + ) + ) + return nodes + + def _retrieve(self, query_bundle: QueryBundle, **kwargs) -> List[NodeWithScore]: + # init params + params = { + "query": query_bundle.query_str, + "dense_similarity_top_k": self.dense_similarity_top_k, + "sparse_similarity_top_k": self.sparse_similarity_top_k, + "enable_rewrite": self.enable_rewrite, + "rewrite": [ + { + "model_name": self.rewrite_model_name, + "class_name": "DashScopeTextRewrite", + } + ], + "enable_reranking": self.enable_reranking, + "rerank": [ + { + "model_name": self.rerank_model_name, + } + ], + "rerank_min_score": self.rerank_min_score, + "rerank_top_n": self.rerank_top_n, + } + # extract query_history for multi-turn query rewrite + if "query_history" in kwargs: + params["query_hisory"] = kwargs.get("query_history") + + response_data = utils.post(self.base_url, headers=self.headers, params=params) + nodes = [] + for ele in response_data["nodes"]: + text_node = TextNode.parse_obj(ele["node"]) + nodes.append(NodeWithScore(node=text_node, score=ele["score"])) + return nodes diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/transformations.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/transformations.py new file mode 100644 index 0000000000000..44329521ebfa3 --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/transformations.py @@ -0,0 +1,191 @@ +""" +This module maintains the list of transformations that are supported by the system. +""" + +from enum import Enum +from typing import Generic, Sequence, Type, TypeVar + +from llama_index.core.bridge.pydantic import ( + BaseModel, + Field, + GenericModel, + ValidationError, +) + +from llama_index.core.schema import BaseComponent, BaseNode, Document + + +# Transform Input/Output Types +class TransformationIOType(BaseModel): + name: str = Field(description="Name of the input/output type") + description: str = Field(description="Description of the input/output type") + python_type: str = Field(description="Python type of the input/output type") + + +class TransformationIOTypes(Enum): + DOCUMENTS = TransformationIOType( + name="Documents", + description="A sequence of Documents", + python_type=str(Sequence[Document]), + ) + NODES = TransformationIOType( + name="Nodes", + description="A sequence of Nodes from a sequence of Documents", + python_type=str(Sequence[BaseNode]), + ) + + +class TransformationCategory(BaseModel): + """A description for a category of transformation within a pipeline.""" + + name: str = Field(description="Unique name of the type of transformation") + description: str = Field(description="Description for the type of transformation") + input_type: TransformationIOType = Field( + description="Input type for the transformation type" + ) + output_type: TransformationIOType = Field( + description="Output type for the transformation type" + ) + + +class TransformationCategories(Enum): + """Supported transformation categories.""" + + NODE_PARSER = TransformationCategory( + name="NodeParser", + description="Applies a function to parse nodes from documents", + input_type=TransformationIOTypes.DOCUMENTS.value, + output_type=TransformationIOTypes.NODES.value, + ) + EMBEDDING = TransformationCategory( + name="Embedding", + description="Applies a function to embed nodes", + input_type=TransformationIOTypes.NODES.value, + output_type=TransformationIOTypes.NODES.value, + ) + + +class ConfigurableTransformation(BaseModel): + """ + A class containing metadata for a type of transformation that can be in a pipeline. + """ + + name: str = Field( + description="Unique and human-readable name for the type of transformation" + ) + transformation_category: TransformationCategories = Field( + description="Type of transformation" + ) + component_type: Type[BaseComponent] = Field( + description="Type of component that implements the transformation" + ) + + +def build_configurable_transformation_enum(): + """ + Build an enum of configurable transformations. + But conditional on if the corresponding component is available. + """ + + class ConfigurableComponent(Enum): + @classmethod + def from_component( + cls, component: BaseComponent + ) -> "ConfigurableTransformations": + component_class = type(component) + for component_type in cls: + if component_type.value.component_type == component_class: + return component_type + raise ValueError( + f"Component {component} is not a supported transformation component." + ) + + def build_configured_transformation( + self, component: BaseComponent + ) -> "ConfiguredTransformation": + component_type = self.value.component_type + if not isinstance(component, component_type): + raise ValueError( + f"The enum value {self} is not compatible with component of " + f"type {type(component)}" + ) + return ConfiguredTransformation[component_type]( # type: ignore + component=component, name=self.value.name + ) + + enum_members = [] + + # Node parsers + try: + from llama_index.node_parser.relational.dashscope import DashScopeJsonNodeParser + + enum_members.append( + ( + "DASHSCOPE_JSON_NODE_PARSER", + ConfigurableTransformation( + name="DashScope Json Node Parser", + transformation_category=TransformationCategories.NODE_PARSER, + component_type=DashScopeJsonNodeParser, + ), + ) + ) + except (ImportError, ValidationError): + pass + + # Embeddings + try: + from llama_index.embeddings.dashscope import ( + DashScopeEmbedding, + ) # pants: no-infer-dep + + enum_members.append( + ( + "DASHSCOPE_EMBEDDING", + ConfigurableTransformation( + name="DashScope Embedding", + transformation_category=TransformationCategories.EMBEDDING, + component_type=DashScopeEmbedding, + ), + ) + ) + except (ImportError, ValidationError): + pass + + return ConfigurableComponent("ConfigurableTransformations", enum_members) + + +ConfigurableTransformations = build_configurable_transformation_enum() + +T = TypeVar("T", bound=BaseComponent) + + +class ConfiguredTransformation(GenericModel, Generic[T]): + """ + A class containing metadata & implementation for a transformation in a pipeline. + """ + + name: str + component: T = Field(description="Component that implements the transformation") + + @classmethod + def from_component(cls, component: BaseComponent) -> "ConfiguredTransformation": + """ + Build a ConfiguredTransformation from a component. + + This should be the preferred way to build a ConfiguredTransformation + as it will ensure that the component is supported as indicated by having a + corresponding enum value in ConfigurableTransformations. + + This has the added bonus that you don't need to specify the generic type + like ConfiguredTransformation[SentenceSplitter]. The return value of + this ConfiguredTransformation.from_component(simple_node_parser) will be + ConfiguredTransformation[SentenceSplitter] if simple_node_parser is + a SentenceSplitter. + """ + return ConfigurableTransformations.from_component( + component + ).build_configured_transformation(component) + + @property + def configurable_transformation_type(self) -> ConfigurableTransformations: + return ConfigurableTransformations.from_component(self.component) diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/utils.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/utils.py new file mode 100644 index 0000000000000..4b595e8f08231 --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/utils.py @@ -0,0 +1,27 @@ +import requests + + +def post(base_url, headers, params): + response = requests.post(base_url, headers=headers, json=params) + if response.status_code != 200: + raise RuntimeError(response.text) + response_dict = response.json() + if response_dict["code"] != "Success": + raise RuntimeError(response_dict) + return response_dict + + +def get(base_url, headers, params): + response = requests.get(base_url, headers=headers, params=params) + if response.status_code != 200: + raise RuntimeError(response.text) + + response_dict = response.json() + if response_dict["code"] != "Success": + raise RuntimeError(response_dict) + return response_dict + + +def get_pipeline_id(base_url, headers, params): + response_dict = get(base_url, headers, params) + return response_dict.get("id", "") diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml new file mode 100644 index 0000000000000..34ea61275e7a6 --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml @@ -0,0 +1,52 @@ +[build-system] +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] + +[tool.codespell] +check-filenames = true +check-hidden = true +# Feel free to un-skip examples, and experimental, you will just need to +# work through many typos (--write-changes and --interactive will help) +skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" + +[tool.mypy] +disallow_untyped_defs = true +# Remove venv skip when integrated with pre-commit +exclude = ["_static", "build", "examples", "notebooks", "venv"] +ignore_missing_imports = true +python_version = "3.8" + +[tool.poetry] +authors = ["Ruixue Ding "] +description = "llama-index indices managed-dashscope integration" +license = "MIT" +name = "llama-index-indices-managed-dashscope" +packages = [{include = "llama_index/"}] +readme = "README.md" +version = "0.1.0" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +llama-index-core = "^0.10.0" +llama-index-embeddings-dashscope = ">=0.1.3" +llama-index-readers-dashscope = ">=0.1.0" +llama-index-node-parser-relational-dashscope = ">=0.1.0" + +[tool.poetry.group.dev.dependencies] +black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} +codespell = {extras = ["toml"], version = ">=v2.2.6"} +ipython = "8.10.0" +jupyter = "^1.0.0" +mypy = "0.991" +pre-commit = "3.2.0" +pylint = "2.15.10" +pytest = "7.2.1" +pytest-mock = "3.11.1" +ruff = "0.0.292" +tree-sitter-languages = "^1.8.0" +types-Deprecated = ">=0.1.0" +types-PyYAML = "^6.0.12.12" +types-protobuf = "^4.24.0.4" +types-redis = "4.5.5.0" +types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991 +types-setuptools = "67.1.0.0" diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/tests/__init__.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/tests/test_indices_managed_dashscope.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/tests/test_indices_managed_dashscope.py new file mode 100644 index 0000000000000..2e3900ba2c064 --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/tests/test_indices_managed_dashscope.py @@ -0,0 +1,7 @@ +from llama_index.indices.managed.dashscope import DashScopeCloudIndex +from llama_index.core.indices.managed.base import BaseManagedIndex + + +def test_class(): + names_of_base_classes = [b.__name__ for b in DashScopeCloudIndex.__mro__] + assert BaseManagedIndex.__name__ in names_of_base_classes diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py index 8df6c1edc142b..6175fa811c3fd 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py @@ -30,8 +30,11 @@ class DashScopeJsonNodeParser(BaseElementNodeParser): default=" |,|,|。|?|!|\n|\\?|\\!", description="Separator characters for splitting texts.", ) - pip: bool = Field(default=False, description="Flag to enable or disable PIP.") input_type: str = Field(default="idp", description="parse format type.") + language: str = Field( + default="cn", + description="language of tokenizor, accept cn, en, any. Notice that mode will be slow.", + ) @classmethod def class_name(cls) -> str: @@ -41,12 +44,12 @@ def get_nodes_from_node(self, node: TextNode) -> List[BaseNode]: """Get nodes from node.""" ftype = node.metadata.get("parse_fmt_type", self.input_type) assert ftype in [ - "DASHCOPE_DOCMIND", + "DASHSCOPE_DOCMIND", "idp", ], f"Unexpected parse_fmt_type: {node.metadata.get('parse_fmt_type', '')}" ftype_map = { - "DASHCOPE_DOCMIND": "idp", + "DASHSCOPE_DOCMIND": "idp", } my_input = { @@ -56,7 +59,6 @@ def get_nodes_from_node(self, node: TextNode) -> List[BaseNode]: "overlap_size": self.overlap_size, "language": "cn", "separator": self.separator, - "pip": self.pip, } try_count = 0 diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/.gitignore b/llama-index-integrations/readers/llama-index-readers-dashscope/.gitignore new file mode 100644 index 0000000000000..990c18de22908 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/.gitignore @@ -0,0 +1,153 @@ +llama_index/_static +.DS_Store +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +bin/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +etc/ +include/ +lib/ +lib64/ +parts/ +sdist/ +share/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +.ruff_cache + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints +notebooks/ + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pyvenv.cfg + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Jetbrains +.idea +modules/ +*.swp + +# VsCode +.vscode + +# pipenv +Pipfile +Pipfile.lock + +# pyright +pyrightconfig.json diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/BUILD b/llama-index-integrations/readers/llama-index-readers-dashscope/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/Makefile b/llama-index-integrations/readers/llama-index-readers-dashscope/Makefile new file mode 100644 index 0000000000000..b9eab05aa3706 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/Makefile @@ -0,0 +1,17 @@ +GIT_ROOT ?= $(shell git rev-parse --show-toplevel) + +help: ## Show all Makefile targets. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' + +format: ## Run code autoformatters (black). + pre-commit install + git ls-files | xargs pre-commit run black --files + +lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy + pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files + +test: ## Run tests via pytest. + pytest tests + +watch-docs: ## Build and watch documentation. + sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/README.md b/llama-index-integrations/readers/llama-index-readers-dashscope/README.md new file mode 100644 index 0000000000000..8024144b92cb7 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/README.md @@ -0,0 +1,44 @@ +# LlamaIndex Readers Integration: Dashscope + +## Usage + +```python +from llama_index.readers.dashscope.base import DashScopeParse +from llama_index.readers.dashscope.utils import ResultType + +file_list = [ + # your files (accept doc, docx, pdf) +] + +parse = DashScopeParse(result_type=ResultType.DASHCOPE_DOCMIND) +documents = parse.load_data(file_path=file_list) +``` + +## Reader Setting: + +A full list of retriever settings/kwargs is below: + +- api_key: Optional[str] -- Your dashscope API key, which can be passed in through environment variables or parameters. + The parameter settings will override the results from the environment variables +- workspace_id: Optional[str] -- Your dashscope workspace_id, which can be passed in through environment variables or + parameters. The parameter settings will override the results from the environment variables +- base_url: Optional[str] -- The base url for the Dashscope API. The default value is "https://dashscope.aliyuncs.com". + The parameter settings will override the results from the environment variables. +- result_type: Optional[ResultType] -- The result type for the parser. The default value is ResultType.DASHCOPE_DOCMIND. +- num_workers: Optional[int] -- The number of workers to use sending API requests for parsing. The default value is 4, + greater than 0, less than 10. +- check_interval: Optional[int] -- The interval in seconds to check if the parsing is done. The default value is 2. +- max_timeout: Optional[int] -- The maximum timeout in seconds to wait for the parsing to finish. The default value is 20000. +- verbose: Optional[bool] -- Whether to print the progress of the parsing. The default value is True. +- show_progress: Optional[bool] -- Show progress when parsing multiple files. The default value is True. +- ignore_errors: Optional[bool] -- Whether or not to ignore and skip errors raised during parsing. The default value is + True. + +## Reader Input: + +- file_path: Union[str, List[str]] -- The file path or list of file paths to parse. + +## Reader Output: + +- List[llama_index.core.schema.Document] -- The list of documents parsed from the file. + - text: str -- The text of the document from DASHCOPE_DOCMIND. diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/__init__.py b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/__init__.py new file mode 100644 index 0000000000000..141dfbaa5c9ee --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/__init__.py @@ -0,0 +1,3 @@ +from llama_index.readers.dashscope.base import DashScopeParse, ResultType + +__all__ = ["DashScopeParse", "ResultType"] diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/base.py b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/base.py new file mode 100644 index 0000000000000..a4c4746486f51 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/base.py @@ -0,0 +1,418 @@ +import os +import asyncio +import httpx +import time +from pathlib import Path +from tenacity import ( + retry, + wait_exponential, + before_sleep_log, + after_log, + retry_if_exception_type, + stop_after_delay, +) +from typing import List, Optional, Union + +from llama_index.core.async_utils import run_jobs +from llama_index.core.bridge.pydantic import Field, validator +from llama_index.core.readers.base import BasePydanticReader +from llama_index.core.schema import Document +from llama_index.readers.dashscope.utils import * + +from llama_index.readers.dashscope.domain.lease_domains import ( + DownloadFileLeaseResult, + UploadFileLeaseResult, + AddFileResult, + QueryFileResult, + DatahubDataStatusEnum, +) + +DASHSCOPE_DEFAULT_BASE_URL = "https://dashscope.aliyuncs.com" +DASHSCOPE_DEFAULT_DC_CATEGORY = os.getenv( + "DASHSCOPE_DEFAULT_DC_CATEGORY", default="default" +) + +logger = get_stream_logger(name=__name__) + + +class DashScopeParse(BasePydanticReader): + """A smart-parser for files.""" + + api_key: str = Field(default="", description="The API key for the DashScope API.") + workspace_id: str = Field( + default="", + description="The Workspace for the DashScope API.If not set, " + "it will use the default workspace.", + ) + base_url: str = Field( + default=DASHSCOPE_DEFAULT_BASE_URL, + description="The base URL of the DashScope Parsing API.", + ) + result_type: ResultType = Field( + default=ResultType.DASHSCOPE_DOCMIND, + description="The result type for the parser.", + ) + num_workers: int = Field( + default=4, + gt=0, + lt=10, + description="The number of workers to use sending API requests for parsing.", + ) + check_interval: int = Field( + default=5, + description="The interval in seconds to check if the parsing is done.", + ) + max_timeout: int = Field( + default=3600, + description="The maximum timeout in seconds to wait for the parsing to finish.", + ) + verbose: bool = Field( + default=True, description="Whether to print the progress of the parsing." + ) + show_progress: bool = Field( + default=True, description="Show progress when parsing multiple files." + ) + ignore_errors: bool = Field( + default=True, + description="Whether or not to ignore and skip errors raised during parsing.", + ) + parse_result: bool = Field( + default=True, + description="Whether or not to return parsed text content.", + ) + + @validator("api_key", pre=True, always=True) + def validate_api_key(cls, v: str) -> str: + """Validate the API key.""" + if not v: + import os + + api_key = os.getenv("DASHSCOPE_API_KEY", None) + if api_key is None: + raise ValueError("The API key [DASHSCOPE_API_KEY] is required.") + return api_key + + return v + + @validator("workspace_id", pre=True, always=True) + def validate_workspace_id(cls, v: str) -> str: + """Validate the Workspace.""" + if not v: + import os + + return os.getenv("DASHSCOPE_WORKSPACE_ID", "") + + return v + + @validator("base_url", pre=True, always=True) + def validate_base_url(cls, v: str) -> str: + """Validate the base URL.""" + if v and v != DASHSCOPE_DEFAULT_BASE_URL: + return v + else: + url = ( + os.getenv("DASHSCOPE_BASE_URL", None) + or "https://dashscope.aliyuncs.com" + ) + if url and not url.startswith(("http://", "https://")): + raise ValueError( + "The DASHSCOPE_BASE_URL must start with http or https. " + ) + return url or DASHSCOPE_DEFAULT_BASE_URL + + def _get_dashscope_header(self): + return { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + "X-DashScope-WorkSpace": f"{self.workspace_id}", + "X-DashScope-OpenAPISource": "CloudSDK", + } + + # upload a document and get back a job_id + async def _create_job( + self, file_path: str, extra_info: Optional[dict] = None + ) -> str: + file_path = str(file_path) + UploadFileLeaseResult.is_file_valid(file_path=file_path) + + headers = self._get_dashscope_header() + + # load data + with open(file_path, "rb") as f: + upload_file_lease_result = self.__upload_lease(file_path, headers) + + upload_file_lease_result.upload(file_path, f) + + url = f"{self.base_url}/api/v1/datacenter/category/{DASHSCOPE_DEFAULT_DC_CATEGORY}/add_file" + async with httpx.AsyncClient(timeout=self.max_timeout) as client: + response = await client.post( + url, + headers=headers, + json={ + "lease_id": upload_file_lease_result.lease_id, + "parser": ResultType.DASHSCOPE_DOCMIND.value, + }, + ) + add_file_result = dashscope_response_handler( + response, "add_file", AddFileResult, url=url + ) + + return add_file_result.file_id + + @retry( + stop=stop_after_delay(60), + wait=wait_exponential(multiplier=5, max=60), + before_sleep=before_sleep_log(logger, logging.INFO), + after=after_log(logger, logging.INFO), + reraise=True, + retry=retry_if_exception_type(RetryException), + ) + def __upload_lease(self, file_path, headers): + url = f"{self.base_url}/api/v1/datacenter/category/{DASHSCOPE_DEFAULT_DC_CATEGORY}/upload_lease" + try: + with httpx.Client(timeout=self.max_timeout) as client: + response = client.post( + url, + headers=headers, + json={ + "file_name": os.path.basename(file_path), + "size_bytes": os.path.getsize(file_path), + "content_md5": get_file_md5(file_path), + }, + ) + except httpx.ConnectTimeout: + raise RetryException("Connect timeout") + except httpx.ReadTimeout: + raise RetryException("Read timeout") + except httpx.NetworkError: + raise RetryException("Network error") + + upload_file_lease_result = dashscope_response_handler( + response, "upload_lease", UploadFileLeaseResult, url=url + ) + logger.info( + f"{file_path} upload lease result: {upload_file_lease_result.lease_id}" + ) + return upload_file_lease_result + + async def _get_job_result( + self, data_id: str, result_type: str, verbose: bool = False + ) -> dict: + result_url = f"{self.base_url}/api/v1/datacenter/category/{DASHSCOPE_DEFAULT_DC_CATEGORY}/file/{data_id}/download_lease" + status_url = f"{self.base_url}/api/v1/datacenter/category/{DASHSCOPE_DEFAULT_DC_CATEGORY}/file/{data_id}/query" + + headers = self._get_dashscope_header() + + start = time.time() + tries = 0 + while True: + await asyncio.sleep(1) + tries += 1 + query_file_result = await self._dashscope_query( + data_id, headers, status_url + ) + + status = query_file_result.status + if DatahubDataStatusEnum.PARSE_SUCCESS.value == status: + async with httpx.AsyncClient(timeout=self.max_timeout) as client: + response = await client.post( + result_url, headers=headers, json={"file_id": data_id} + ) + down_file_lease_result = dashscope_response_handler( + response, + "download_lease", + DownloadFileLeaseResult, + url=result_url, + ) + if self.parse_result: + return { + result_type: down_file_lease_result.download(escape=True), + "job_id": data_id, + } + else: + return {result_type: "{}", "job_id": data_id} + elif ( + DatahubDataStatusEnum.PARSING.value == status + or DatahubDataStatusEnum.INIT.value == status + ): + end = time.time() + if end - start > self.max_timeout: + raise Exception(f"Timeout while parsing the file: {data_id}") + if verbose and tries % 5 == 0: + print(".", end="", flush=True) + + await asyncio.sleep(self.check_interval) + + continue + else: + raise Exception( + f"Failed to parse the file: {data_id}, status: {status}" + ) + + @retry( + stop=stop_after_delay(60), + wait=wait_exponential(multiplier=5, max=60), + before_sleep=before_sleep_log(logger, logging.INFO), + after=after_log(logger, logging.INFO), + reraise=True, + retry=retry_if_exception_type(RetryException), + ) + async def _dashscope_query(self, data_id, headers, status_url): + try: + async with httpx.AsyncClient(timeout=self.max_timeout) as client: + response = await client.post( + status_url, headers=headers, json={"file_id": data_id} + ) + return dashscope_response_handler( + response, "query", QueryFileResult, url=status_url + ) + except httpx.ConnectTimeout: + raise RetryException("Connect timeout") + except httpx.ReadTimeout: + raise RetryException("Read timeout") + except httpx.NetworkError: + raise RetryException("Network error") + + async def _aload_data( + self, file_path: str, extra_info: Optional[dict] = None, verbose: bool = False + ) -> List[Document]: + """Load data from the input path.""" + try: + data_id = await self._create_job(file_path, extra_info=extra_info) + logger.info(f"Started parsing the file [{file_path}] under [{data_id}]") + + result = await self._get_job_result( + data_id, self.result_type.value, verbose=verbose + ) + + document = Document( + text=result[self.result_type.value], + metadata=extra_info or {}, + ) + document.id_ = data_id + + return [document] + + except Exception as e: + logger.error(f"Error while parsing the file '{file_path}':{e!s}") + if self.ignore_errors: + return [] + else: + raise + + async def aload_data( + self, file_path: Union[List[str], str], extra_info: Optional[dict] = None + ) -> List[Document]: + """Load data from the input path.""" + if isinstance(file_path, (str, Path)): + return await self._aload_data( + file_path, extra_info=extra_info, verbose=self.verbose + ) + elif isinstance(file_path, list): + jobs = [ + self._aload_data( + f, + extra_info=extra_info, + verbose=self.verbose and not self.show_progress, + ) + for f in file_path + ] + try: + results = await run_jobs( + jobs, + workers=self.num_workers, + desc="Parsing files", + show_progress=self.show_progress, + ) + + # return flattened results + return [item for sublist in results for item in sublist] + except RuntimeError as e: + if nest_asyncio_err in str(e): + raise RuntimeError(nest_asyncio_msg) + else: + raise + else: + raise ValueError( + "The input file_path must be a string or a list of strings." + ) + + def load_data( + self, file_path: Union[List[str], str], extra_info: Optional[dict] = None + ) -> List[Document]: + extra_info = {"parse_fmt_type": ResultType.DASHSCOPE_DOCMIND.value} + """Load data from the input path.""" + try: + return asyncio.run(self.aload_data(file_path, extra_info)) + except RuntimeError as e: + if nest_asyncio_err in str(e): + raise RuntimeError(nest_asyncio_msg) + else: + raise + + async def _aget_json( + self, file_path: str, extra_info: Optional[dict] = None + ) -> List[dict]: + """Load data from the input path.""" + try: + job_id = await self._create_job(file_path, extra_info=extra_info) + if self.verbose: + logger.info("Started parsing the file under job_id %s" % job_id) + + result = await self._get_job_result( + job_id, ResultType.DASHSCOPE_DOCMIND.value + ) + result["job_id"] = job_id + result["file_path"] = file_path + return [result] + + except Exception as e: + logger.info(f"Error while parsing the file '{file_path}':", e) + if self.ignore_errors: + return [] + else: + raise + + async def aget_json( + self, file_path: Union[List[str], str], extra_info: Optional[dict] = None + ) -> List[dict]: + """Load data from the input path.""" + if isinstance(file_path, (str, Path)): + return await self._aget_json(file_path, extra_info=extra_info) + elif isinstance(file_path, list): + jobs = [self._aget_json(f, extra_info=extra_info) for f in file_path] + try: + results = await run_jobs( + jobs, + workers=self.num_workers, + desc="Parsing files", + show_progress=self.show_progress, + ) + + # return flattened results + return [item for sublist in results for item in sublist] + except RuntimeError as e: + if nest_asyncio_err in str(e): + raise RuntimeError(nest_asyncio_msg) + else: + raise + else: + raise ValueError( + "The input file_path must be a string or a list of strings." + ) + + def get_json_result( + self, file_path: Union[List[str], str], extra_info: Optional[dict] = None + ) -> List[dict]: + extra_info = {"parse_fmt_type": ResultType.DASHSCOPE_DOCMIND.value} + """Parse the input path.""" + try: + return asyncio.run(self.aget_json(file_path, extra_info)) + except RuntimeError as e: + if nest_asyncio_err in str(e): + raise RuntimeError(nest_asyncio_msg) + else: + raise + + def get_images(self, json_result: List[dict], download_path: str) -> List[dict]: + raise NotImplementedError diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/domain/base_domains.py b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/domain/base_domains.py new file mode 100644 index 0000000000000..d6e0e77cc32dc --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/domain/base_domains.py @@ -0,0 +1,7 @@ +from abc import ABC + + +class DictToObject(ABC): + @classmethod + def from_dict(cls, data: dict): + pass diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/domain/lease_domains.py b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/domain/lease_domains.py new file mode 100644 index 0000000000000..bfe0b56ab36d4 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/domain/lease_domains.py @@ -0,0 +1,295 @@ +import os +import json +import requests +from enum import Enum +from llama_index.readers.dashscope.domain.base_domains import DictToObject +from llama_index.readers.dashscope.utils import get_stream_logger + +logger = get_stream_logger(name=__name__) + + +class FileUploadMethod(Enum): + OSS_PreSignedUrl = "OSS.PreSignedUrl" + + @classmethod + def from_value(cls, value): + for member in cls: + if member.value == value: + return member + raise ValueError(f"No enum member found for value '{value}'") + + +class UploadFileParameter: + def upload(self, file_name: str, file: object): + pass + + +class OssPreSignedUrlParameter(UploadFileParameter, DictToObject): + def __init__(self, url: str, method: str, headers: dict): + self.url = url + self.method = method + self.headers = headers + + @classmethod + def from_dict(cls, data: dict) -> "OssPreSignedUrlParameter": + if "method" not in data: + raise ValueError("OssPreSignedUrlParameter method key is required") + if "headers" not in data: + raise ValueError("OssPreSignedUrlParameter headers key is required") + if "url" not in data: + raise ValueError("OssPreSignedUrlParameter url key is required") + else: + return OssPreSignedUrlParameter( + data["url"], data["method"], data["headers"] + ) + + def upload(self, file_name: str, file: object): + logger.info(f"Start upload {file_name}.") + try: + if self.method == "PUT": + response = requests.put(self.url, data=file, headers=self.headers) + elif self.method == "POST": + response = requests.post(self.url, data=file, headers=self.headers) + else: + raise Exception(f"Upload {file_name} unsupported method: {self.method}") + if response.status_code != 200: + raise Exception( + f"Upload {file_name} failed with status code: {response.status_code} \n {self.url} \n {self.headers} \n {response.text}" + ) + logger.info(f"Upload {file_name} success.") + except requests.ConnectionError as ce: + logger.info(f"Upload {file_name} Error connecting to {self.url}: {ce}") + raise + except requests.RequestException as e: + logger.info( + f"Upload {file_name} An error occurred while uploading to {self.url}: {e}" + ) + raise + except Exception as e: + logger.info( + f"Upload {file_name} An error occurred while uploading to {self.url}: {e}" + ) + raise + + +class UploadFileLeaseResult(DictToObject): + def __init__(self, type: str, param: UploadFileParameter, lease_id: str): + self.type: str = type + self.param: UploadFileParameter = param + self.lease_id: str = lease_id + + @classmethod + def from_dict(cls, data: dict) -> "UploadFileLeaseResult": + if "lease_id" not in data: + raise ValueError("UploadFileLeaseResult lease_id key is required") + if "param" not in data: + raise ValueError("UploadFileLeaseResult param key is required") + if "type" not in data: + raise ValueError("UploadFileLeaseResult type key is required") + else: + if data["type"] == FileUploadMethod.OSS_PreSignedUrl.value: + return cls( + data["type"], + OssPreSignedUrlParameter.from_dict(data["param"]), + data["lease_id"], + ) + else: + raise ValueError(f"Unsupported upload type: {data['type']}") + + @staticmethod + def is_file_valid(file_path: str) -> None: + if file_path is None or file_path.strip() == "": + raise ValueError(f"file_path can't blank.") + file_path = str(file_path) + + # file_ext = os.path.splitext(file_path)[1] + # if file_ext not in SUPPORTED_FILE_TYPES: + # raise ValueError( + # f"Currently, only the following file types are supported: {SUPPORTED_FILE_TYPES} " + # f"Current file type: {file_ext}" + # ) + + if not os.path.exists(file_path): + raise FileNotFoundError(f"The file {file_path} does not exist.") + + def upload(self, file_name: str, file: object): + if self.type == FileUploadMethod.OSS_PreSignedUrl.value: + self.param.upload(file_name, file) + else: + raise ValueError(f"Invalid upload method: {self.type}") + + +class AddFileResult(DictToObject): + def __init__(self, file_id: str, parser: str): + self.file_id = file_id + self.parser = parser + + @classmethod + def from_dict(cls, data: dict): + default_values = {"file_id": "", "parser": ""} + + file_id = data.get("file_id", default_values["file_id"]) + parser = data.get("parser", default_values["parser"]) + + return cls(file_id, parser) + + +class QueryFileResult(DictToObject): + def __init__( + self, + file_id: str, + status: str, + file_name: str, + file_type: str, + parser: str, + size_bytes: int, + upload_time: str, + category: str, + ): + self.file_id = file_id + self.status = status + self.file_name = file_name + self.file_type = file_type + self.parser = parser + self.size_bytes = size_bytes + self.upload_time = upload_time + self.category = category + + @classmethod + def from_dict(cls, data: dict): + """ + Creates an instance of `QueryFileResult` from a dictionary. + + Args: + data (dict): A dictionary containing the necessary keys and values corresponding to the class attributes. + + Returns: + QueryFileResult: An instance of `QueryFileResult` populated with data from the input dictionary. + """ + default_values = { + "file_id": "", + "status": "", + "file_name": "", + "file_type": "", + "parser": "", + "size_bytes": 0, + "upload_time": "", + "category": "", + } + + return cls( + file_id=data.get("file_id", default_values["file_id"]), + status=data.get("status", default_values["status"]), + file_name=data.get("file_name", default_values["file_name"]), + file_type=data.get("file_type", default_values["file_type"]), + parser=data.get("parser", default_values["parser"]), + size_bytes=data.get("size_bytes", default_values["size_bytes"]), + upload_time=data.get("upload_time", default_values["upload_time"]), + category=data.get("category", default_values["category"]), + ) + + +class FileDownloadType(Enum): + HTTP = "HTTP" + + @classmethod + def from_value(cls, value): + for member in cls: + if member.value == value: + return member + raise ValueError(f"No enum member found for value '{value}'") + + +class HttpDownloadParameter(DictToObject): + def __init__(self, url, method, headers) -> None: + self.url = url + self.method = method + self.headers = headers + + @classmethod + def from_dict(cls, data: dict): + """ + Creates an instance of `QueryFileResult` from a dictionary. + + Args: + data (dict): A dictionary containing the necessary keys and values corresponding to the class attributes. + + Returns: + QueryFileResult: An instance of `QueryFileResult` populated with data from the input dictionary. + """ + default_values = {"url": "", "method": "GET", "headers": {}} + + return cls( + url=data.get("url", default_values["url"]), + method=data.get("method", default_values["method"]), + headers=data.get("headers", default_values["headers"]), + ) + + +class DownloadFileLeaseResult(DictToObject): + def __init__(self, file_id, lease_id, file_name, type, param) -> None: + self.file_id = file_id + self.lease_id = lease_id + self.file_name = file_name + self.type = type + self.param = param + + @classmethod + def from_dict(cls, data: dict): + """ + Creates an instance of `QueryFileResult` from a dictionary. + + Args: + data (dict): A dictionary containing the necessary keys and values corresponding to the class attributes. + + Returns: + QueryFileResult: An instance of `QueryFileResult` populated with data from the input dictionary. + """ + if "param" not in data: + raise ValueError("download_lease result param is required") + + default_values = { + "file_id": "", + "lease_id": "", + "file_name": "", + "type": FileDownloadType.HTTP.value, + "param": HttpDownloadParameter.from_dict(data["param"]), + } + + return cls( + file_id=data.get("file_id", default_values["file_id"]), + lease_id=data.get("lease_id", default_values["lease_id"]), + file_name=data.get("file_name", default_values["file_name"]), + type=FileDownloadType.from_value(data.get("type", default_values["type"])), + param=default_values["param"], + ) + + def download(self, escape: bool = False): + if self.type == FileDownloadType.HTTP: + if self.param.method == "GET": + json_bytes = requests.get( + url=self.param.url, headers=self.param.headers + ).content + json_str = json_bytes.decode("utf-8") + if escape: + return json.dumps(json_str, ensure_ascii=False) + else: + return json_str + else: + raise ValueError(f"Invalid download method: {self.param.method}") + else: + raise ValueError(f"Invalid download type: {self.type}") + + +class DatahubDataStatusEnum(Enum): + INIT = "INIT" + PARSING = "PARSING" + PARSE_SUCCESS = "PARSE_SUCCESS" + PARSE_FAILED = "PARSE_FAILED" + + @classmethod + def from_value(cls, value): + for member in cls: + if member.value == value: + return member + raise ValueError(f"No enum member found for value '{value}'") diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/utils.py b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/utils.py new file mode 100644 index 0000000000000..e27734878acb9 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/utils.py @@ -0,0 +1,151 @@ +import hashlib +import logging + +from enum import Enum +from httpx._models import Response +from typing import Dict, Any, Type, TypeVar +from llama_index.readers.dashscope.domain.base_domains import DictToObject + +T = TypeVar("T", bound=DictToObject) + +# Asyncio error messages +nest_asyncio_err = "cannot be called from a running event loop" +nest_asyncio_msg = "The event loop is already running. Add `import nest_asyncio; nest_asyncio.apply()` to your code to fix this issue." + + +def get_stream_logger(name="dashscope-parser", level=logging.INFO, format_string=None): + if not format_string: + format_string = "%(asctime)s %(name)s [%(levelname)s] %(thread)d : %(message)s" + logger = logging.getLogger(name) + logger.setLevel(level) + formatter = logging.Formatter(format_string) + fh = logging.StreamHandler() + fh.setLevel(level) + fh.setFormatter(formatter) + logger.addHandler(fh) + return logger + + +def get_file_md5(file_path): + with open(file_path, "rb") as f: + md5 = hashlib.md5() + while chunk := f.read(8192): + md5.update(chunk) + return md5.hexdigest() + + +def generate_request_id(): + """Generate a random request id.""" + import uuid + + return str(uuid.uuid4()) + + +def __is_response_successful(response_data: Dict[str, Any]) -> bool: + """Check if the response data indicates a successful operation.""" + return ("code" in response_data) and ( + response_data["code"] == "Success" or response_data["code"] == "success" + ) + + +def __raise_exception(response: Response, process: str) -> None: + """Log the error and raise a specific exception based on the response.""" + error_message = f"Failed to {process}: {response.text}" + raise ValueError(error_message) + + +class RetryException(Exception): + """ + Custom exception class to indicate a situation where an operation needs to be retried. + + This exception should be raised when an operation fails due to anticipated recoverable reasons, + suggesting to the caller that a retry logic might be appropriate. + """ + + def __init__( + self, message="Operation failed, requiring a retry", cause=None + ) -> None: + """ + Initialize a RetryException instance. + + :param message: Detailed information about the exception, a string by default set as "Operation failed, requiring a retry" + :param cause: The original exception object that caused this exception, optional + """ + super().__init__(message) + self.cause = cause + + def __str__(self) -> str: + """ + Return a string representation of the exception, including the original exception information if present. + + :return: String representation of the exception details + """ + if self.cause: + return f"{super().__str__()} caused by: {self.cause}" + else: + return super().__str__() + + +def __raise_exception_for_retry(response: Response, process: str) -> None: + """Log the error and raise a specific exception based on the response.""" + error_message = f"Failed to {process}: {response.text}" + raise RetryException(cause=error_message) + + +logger = get_stream_logger(name="DashScopeResponseHandler") + + +def dashscope_response_handler( + response: Response, process: str, result_class: Type[T], url: str = "" +) -> T: + """Handle the response from the DashScope API.""" + if response is None: + raise ValueError( + f"DashScopeParse {process} [URL:{url}] http response object is none." + ) + + if not isinstance(process, str) or not process: + raise ValueError( + "DashScopeParse func [dashscope_response_handler] process parameter is empty." + ) + + if response.status_code != 200: + logger.error( + f"DashScopeParse {process} [URL:{url}] response http status code is not 200: [{response.status_code}:{response.text}]" + ) + if response.status_code == 429: + __raise_exception_for_retry(response, process) + __raise_exception(response, process) + try: + response_data = response.json() + except Exception as e: + logger.error( + f"DashScopeParse {process} [URL:{url}] response data is not json: {response.text}." + ) + __raise_exception(response, process) + + if not __is_response_successful(response_data): + logger.error( + f"DashScopeParse {process} [URL:{url}] response fail: {response.text}." + ) + __raise_exception(response, process) + + if "data" not in response_data: + logger.error( + f"DashScopeParse {process} [URL:{url}] response data does not contain 'data' key: {response_data}." + ) + __raise_exception(response, process) + if "request_id" in response_data and process != "query": + logger.info( + f"DashScopeParse {process} [URL:{url}] request_id: {response_data['request_id']}." + ) + return result_class.from_dict(response_data["data"]) + + +class ResultType(Enum): + """The result type for the parser.""" + + DASHSCOPE_DOCMIND = "DASHSCOPE_DOCMIND" + + +SUPPORTED_FILE_TYPES = [".pdf", ".doc", ".docx"] diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-dashscope/pyproject.toml new file mode 100644 index 0000000000000..f0527b3dcc5df --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/pyproject.toml @@ -0,0 +1,53 @@ +[build-system] +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] + +[tool.codespell] +check-filenames = true +check-hidden = true +# Feel free to un-skip examples, and experimental, you will just need to +# work through many typos (--write-changes and --interactive will help) +skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" + +[tool.mypy] +disallow_untyped_defs = true +# Remove venv skip when integrated with pre-commit +exclude = ["_static", "build", "examples", "notebooks", "venv"] +ignore_missing_imports = true +python_version = "3.8" + +[tool.poetry] +authors = ["Your Name "] +description = "llama-index readers dashscope integration" +license = "MIT" +name = "llama-index-readers-dashscope" +packages = [{include = "llama_index/"}] +readme = "README.md" +version = "0.1.0" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +llama-index-core = "^0.10.0" +oss2 = "^2.18.4" +retrying = "^1.3.4" + +[tool.poetry.group.dev.dependencies] +black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} +codespell = {extras = ["toml"], version = ">=v2.2.6"} +flask = "3.0.3" +ipython = "8.10.0" +jupyter = "^1.0.0" +mypy = "0.991" +pre-commit = "3.2.0" +pylint = "2.15.10" +pytest = "7.2.1" +pytest-mock = "3.11.1" +reportlab = "4.2.0" +ruff = "0.0.292" +tree-sitter-languages = "^1.8.0" +types-Deprecated = ">=0.1.0" +types-PyYAML = "^6.0.12.12" +types-protobuf = "^4.24.0.4" +types-redis = "4.5.5.0" +types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991 +types-setuptools = "67.1.0.0" diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/tests/__init__.py b/llama-index-integrations/readers/llama-index-readers-dashscope/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/tests/test_readers_dashscope.py b/llama-index-integrations/readers/llama-index-readers-dashscope/tests/test_readers_dashscope.py new file mode 100644 index 0000000000000..2f7f1686b1527 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/tests/test_readers_dashscope.py @@ -0,0 +1,7 @@ +from llama_index.readers.dashscope import DashScopeParse +from llama_index.core.readers.base import BasePydanticReader + + +def test_class(): + names_of_base_classes = [b.__name__ for b in DashScopeParse.__mro__] + assert BasePydanticReader.__name__ in names_of_base_classes From 49e9e43b74b593085c7453f073351ad172695c31 Mon Sep 17 00:00:00 2001 From: "ada.drx" Date: Tue, 7 May 2024 10:16:45 +0800 Subject: [PATCH 06/13] update tool.llamahub --- .../llama-index-indices-managed-dashscope/pyproject.toml | 7 +++++++ .../pyproject.toml | 7 +++++++ .../readers/llama-index-readers-dashscope/pyproject.toml | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml index 34ea61275e7a6..9dd0d96fa136e 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml @@ -9,6 +9,13 @@ check-hidden = true # work through many typos (--write-changes and --interactive will help) skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" +[tool.llamahub] +contains_example = false +import_path = "llama_index.indices.managed.dashscope" + +[tool.llamahub.class_authors] +DashScopeEmbedding = "phantomgrapes" + [tool.mypy] disallow_untyped_defs = true # Remove venv skip when integrated with pre-commit diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml index 71499bdcb119b..eff1a18850658 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml @@ -9,6 +9,13 @@ check-hidden = true # work through many typos (--write-changes and --interactive will help) skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" +[tool.llamahub] +contains_example = false +import_path = "llama_index.node_parser.relational.dashscope" + +[tool.llamahub.class_authors] +DashScopeEmbedding = "phantomgrapes" + [tool.mypy] disallow_untyped_defs = true # Remove venv skip when integrated with pre-commit diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-dashscope/pyproject.toml index f0527b3dcc5df..178e33aaa20cd 100644 --- a/llama-index-integrations/readers/llama-index-readers-dashscope/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/pyproject.toml @@ -9,6 +9,13 @@ check-hidden = true # work through many typos (--write-changes and --interactive will help) skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" +[tool.llamahub] +contains_example = false +import_path = "llama_index.readers.dashscope.base" + +[tool.llamahub.class_authors] +DashScopeEmbedding = "phantomgrapes" + [tool.mypy] disallow_untyped_defs = true # Remove venv skip when integrated with pre-commit From 4c74a74c0d2ff10574d21e669b5349ca2f711b99 Mon Sep 17 00:00:00 2001 From: "ada.drx" Date: Wed, 8 May 2024 15:14:13 +0800 Subject: [PATCH 07/13] adapt for new reader output --- .../llama-index-indices-managed-dashscope/pyproject.toml | 2 +- .../llama_index/node_parser/relational/dashscope/base.py | 2 +- .../llama-index-node-parser-relational-dashscope/pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml index 9dd0d96fa136e..e8ebc990801ae 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml @@ -37,7 +37,7 @@ python = ">=3.8.1,<4.0" llama-index-core = "^0.10.0" llama-index-embeddings-dashscope = ">=0.1.3" llama-index-readers-dashscope = ">=0.1.0" -llama-index-node-parser-relational-dashscope = ">=0.1.0" +llama-index-node-parser-relational-dashscope = ">=0.1.1" [tool.poetry.group.dev.dependencies] black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py index 6175fa811c3fd..39197f2889d18 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py @@ -53,7 +53,7 @@ def get_nodes_from_node(self, node: TextNode) -> List[BaseNode]: } my_input = { - "text": node.get_content(), + "text": json.loads(node.get_content()), "file_type": ftype_map.get(ftype, ftype), "chunk_size": self.chunk_size, "overlap_size": self.overlap_size, diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml index eff1a18850658..0be2f59b9c0c1 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml @@ -30,7 +30,7 @@ license = "MIT" name = "llama-index-node-parser-relational-dashscope" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.1.0" +version = "0.1.1" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" From 70b99edb6fc203624c9f10672466e1d75d428c6a Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Thu, 16 May 2024 20:49:46 -0600 Subject: [PATCH 08/13] build files --- .../indices/llama-index-indices-managed-dashscope/BUILD | 4 +++- .../llama_index/indices/managed/dashscope/BUILD | 1 + .../indices/llama-index-indices-managed-dashscope/tests/BUILD | 1 + .../llama-index-node-parser-relational-dashscope/BUILD | 4 +++- .../llama_index/node_parser/relational/dashscope/BUILD | 1 + .../llama-index-node-parser-relational-dashscope/tests/BUILD | 1 + .../readers/llama-index-readers-dashscope/BUILD | 4 +++- .../llama_index/readers/dashscope/BUILD | 1 + .../llama_index/readers/dashscope/domain/BUILD | 1 + .../readers/llama-index-readers-dashscope/tests/BUILD | 1 + 10 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/BUILD create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-dashscope/tests/BUILD create mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/BUILD create mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/BUILD create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/BUILD create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/domain/BUILD create mode 100644 llama-index-integrations/readers/llama-index-readers-dashscope/tests/BUILD diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/BUILD b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/BUILD index db46e8d6c978c..0896ca890d8bf 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/BUILD +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/BUILD @@ -1 +1,3 @@ -python_sources() +poetry_requirements( + name="poetry", +) diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/BUILD b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/tests/BUILD b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/tests/BUILD new file mode 100644 index 0000000000000..dabf212d7e716 --- /dev/null +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/tests/BUILD @@ -0,0 +1 @@ +python_tests() diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/BUILD b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/BUILD index db46e8d6c978c..0896ca890d8bf 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/BUILD +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/BUILD @@ -1 +1,3 @@ -python_sources() +poetry_requirements( + name="poetry", +) diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/BUILD b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/BUILD b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/BUILD new file mode 100644 index 0000000000000..dabf212d7e716 --- /dev/null +++ b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/BUILD @@ -0,0 +1 @@ +python_tests() diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/BUILD b/llama-index-integrations/readers/llama-index-readers-dashscope/BUILD index db46e8d6c978c..0896ca890d8bf 100644 --- a/llama-index-integrations/readers/llama-index-readers-dashscope/BUILD +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/BUILD @@ -1 +1,3 @@ -python_sources() +poetry_requirements( + name="poetry", +) diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/BUILD b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/domain/BUILD b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/domain/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/domain/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/tests/BUILD b/llama-index-integrations/readers/llama-index-readers-dashscope/tests/BUILD new file mode 100644 index 0000000000000..dabf212d7e716 --- /dev/null +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/tests/BUILD @@ -0,0 +1 @@ +python_tests() From 796129469d0dee1f8ea96b2109502f5be37671d0 Mon Sep 17 00:00:00 2001 From: "ada.drx" Date: Tue, 21 May 2024 17:31:14 +0800 Subject: [PATCH 09/13] pre commit --- .../.gitignore | 3 + .../README.md | 6 + .../indices/managed/dashscope/api_utils.py | 57 ++++++- .../indices/managed/dashscope/base.py | 150 +++++++++++------- .../indices/managed/dashscope/constants.py | 2 + .../managed/dashscope/transformations.py | 103 ++---------- .../indices/managed/dashscope/utils.py | 37 ++++- .../pyproject.toml | 8 +- .../.gitignore | 0 .../BUILD | 0 .../Makefile | 0 .../README.md | 6 + .../node_parser/dashscope/__init__.py | 4 + .../node_parser}/dashscope/base.py | 7 +- .../pyproject.toml | 10 +- .../tests/__init__.py | 0 .../test_node_parser_relational_dashscope.py | 2 +- .../relational/dashscope/__init__.py | 4 - .../llama-index-readers-dashscope/README.md | 10 +- .../llama_index/readers/dashscope/base.py | 22 ++- .../pyproject.toml | 6 +- 21 files changed, 261 insertions(+), 176 deletions(-) rename llama-index-integrations/node_parser/{relational => }/llama-index-node-parser-relational-dashscope/.gitignore (100%) rename llama-index-integrations/node_parser/{relational => }/llama-index-node-parser-relational-dashscope/BUILD (100%) rename llama-index-integrations/node_parser/{relational => }/llama-index-node-parser-relational-dashscope/Makefile (100%) rename llama-index-integrations/node_parser/{relational => }/llama-index-node-parser-relational-dashscope/README.md (94%) create mode 100644 llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/__init__.py rename llama-index-integrations/node_parser/{relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational => llama-index-node-parser-relational-dashscope/llama_index/node_parser}/dashscope/base.py (95%) rename llama-index-integrations/node_parser/{relational => }/llama-index-node-parser-relational-dashscope/pyproject.toml (85%) rename llama-index-integrations/node_parser/{relational => }/llama-index-node-parser-relational-dashscope/tests/__init__.py (100%) rename llama-index-integrations/node_parser/{relational => }/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py (75%) delete mode 100644 llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/__init__.py diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/.gitignore b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/.gitignore index 990c18de22908..8ead961e42aed 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/.gitignore +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/.gitignore @@ -151,3 +151,6 @@ Pipfile.lock # pyright pyrightconfig.json + +# local test file +tests/test_local.py diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/README.md b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/README.md index 4da52939fb294..e2e9e3cd50231 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/README.md +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/README.md @@ -1,5 +1,11 @@ # LlamaIndex Indices Integration: Managed-Dashscope +## Installation + +```shell +pip install llama-index-indices-managed-dashscope +``` + ## Usage ```python diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/api_utils.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/api_utils.py index 699e74a9df65e..be6113c7d9a27 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/api_utils.py +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/api_utils.py @@ -2,7 +2,7 @@ from typing import List, Optional from llama_index.indices.managed.dashscope.transformations import ( - ConfiguredTransformation, + DashScopeConfiguredTransformation, ) from llama_index.core.schema import BaseNode, TransformComponent @@ -31,12 +31,12 @@ def get_pipeline_create( name: str, transformations: Optional[List[TransformComponent]] = None, documents: Optional[List[BaseNode]] = None, -) -> str: - configured_transformations: List[ConfiguredTransformation] = [] +) -> dict: + configured_transformations: List[DashScopeConfiguredTransformation] = [] for transformation in transformations: try: configured_transformations.append( - ConfiguredTransformation.from_component(transformation) + DashScopeConfiguredTransformation.from_component(transformation) ) except ValueError: raise ValueError(f"Unsupported transformation: {type(transformation)}") @@ -71,3 +71,52 @@ def get_pipeline_create( "data_type": "structured", "config_model": "recommend", } + + +def get_doc_insert( + transformations: Optional[List[TransformComponent]] = None, + documents: Optional[List[BaseNode]] = None, +) -> dict: + configured_transformations: List[DashScopeConfiguredTransformation] = [] + for transformation in transformations: + try: + configured_transformations.append( + DashScopeConfiguredTransformation.from_component(transformation) + ) + except ValueError: + raise ValueError(f"Unsupported transformation: {type(transformation)}") + + configured_transformation_items: List[Dict] = [] + for item in configured_transformations: + configured_transformation_items.append( + { + "component": json.loads(item.component.json()), + "configurable_transformation_type": item.configurable_transformation_type.name, + } + ) + data_sources = [ + { + "source_type": "DATA_CENTER_FILE", + "component": { + "doc_ids": [doc.node_id for doc in documents], + }, + } + ] + return { + "configured_transformations": configured_transformation_items, + "data_sources": data_sources, + } + + +def get_doc_delete(ref_doc_ids: List[str]) -> dict: + data_sources = [ + { + "source_type": "DATA_CENTER_FILE", + "component": { + "doc_ids": ref_doc_ids, + }, + } + ] + return { + "data_sources": data_sources, + } diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/base.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/base.py index e00b9f6adfaa0..09a2cca2ba38d 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/base.py +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/base.py @@ -5,10 +5,8 @@ """ import os -import time -from typing import Any, List, Optional, Sequence, Type +from typing import Any, List, Optional, Type, Union from enum import Enum -import time import requests import json @@ -22,13 +20,21 @@ from llama_index.indices.managed.dashscope.api_utils import ( get_pipeline_create, default_transformations, + get_doc_insert, + get_doc_delete, +) +from llama_index.indices.managed.dashscope.utils import ( + run_ingestion, + get_pipeline_id, ) - from llama_index.indices.managed.dashscope.constants import ( DASHSCOPE_DEFAULT_BASE_URL, UPSERT_PIPELINE_ENDPOINT, START_PIPELINE_ENDPOINT, CHECK_INGESTION_ENDPOINT, + PIPELINE_SIMPLE_ENDPOINT, + INSERT_DOC_ENDPOINT, + DELETE_DOC_ENDPOINT, ) @@ -70,6 +76,13 @@ def __init__( self.workspace_id = workspace_id or os.environ.get("DASHSCOPE_WORKSPACE_ID") self._api_key = api_key or os.environ.get("DASHSCOPE_API_KEY") self._base_url = os.environ.get("DASHSCOPE_BASE_URL", None) or base_url + self._headers = { + "Content-Type": "application/json", + "Accept-Encoding": "utf-8", + "X-DashScope-WorkSpace": self.workspace_id, + "Authorization": "Bearer " + self._api_key, + "X-DashScope-OpenAPISource": "CloudSDK", + } self._timeout = timeout self._show_progress = show_progress self._service_context = None @@ -92,10 +105,6 @@ def from_documents( # type: ignore pipeline_create = get_pipeline_create( name, transformations or default_transformations(), documents ) - # for debug - json.dump( - pipeline_create, open("pipeline_create.json", "w"), ensure_ascii=False - ) workspace_id = workspace_id or os.environ.get("DASHSCOPE_WORKSPACE_ID") api_key = api_key or os.environ.get("DASHSCOPE_API_KEY") @@ -110,13 +119,8 @@ def from_documents( # type: ignore "X-DashScope-WorkSpace": workspace_id, "Authorization": "Bearer " + api_key, "X-DashScope-OpenAPISource": "CloudSDK", - # for debug - # 'X-DashScope-ApiKeyId': 'test_api_key_id_123456', - # 'X-DashScope-Uid': "test_uid_123456", - # "X-DashScope-SubUid": "test_sub_uid_123456" } - print(base_url + UPSERT_PIPELINE_ENDPOINT) - print(json.dumps(pipeline_create)) + response = requests.put( base_url + UPSERT_PIPELINE_ENDPOINT, data=json.dumps(pipeline_create), @@ -132,8 +136,6 @@ def from_documents( # type: ignore if verbose: print(f"Starting creating index {name}, pipeline_id: {pipeline_id}") - print(base_url + START_PIPELINE_ENDPOINT.format(pipeline_id=pipeline_id)) - response = requests.post( base_url + START_PIPELINE_ENDPOINT.format(pipeline_id=pipeline_id), headers=headers, @@ -151,41 +153,14 @@ def from_documents( # type: ignore if verbose: print(f"Starting ingestion for index {name}, ingestion_id: {ingestion_id}") - ingestion_status = "" - failed_docs = [] - - while True: - print( - base_url - + CHECK_INGESTION_ENDPOINT.format( - pipeline_id=pipeline_id, ingestion_id=ingestion_id - ) - ) - response = requests.get( - base_url - + CHECK_INGESTION_ENDPOINT.format( - pipeline_id=pipeline_id, ingestion_id=ingestion_id - ), - headers=headers, - ) - try: - response_text = response.json() - except Exception as e: - print(f"Failed to get response: \n{response.text}\nretrying...") - continue - - if response_text.get("code", "") != Status.SUCCESS.value: - print( - f"Failed to get ingestion status: {response_text.get('message', '')}\n{response_text}\nretrying..." - ) - continue - ingestion_status = response_text.get("ingestion_status", "") - failed_docs = response_text.get("failed_docs", "") - if verbose: - print(f"Current status: {ingestion_status}") - if ingestion_status in ["COMPLETED", "FAILED"]: - break - time.sleep(5) + ingestion_status, failed_docs = run_ingestion( + base_url + + CHECK_INGESTION_ENDPOINT.format( + pipeline_id=pipeline_id, ingestion_id=ingestion_id + ), + headers, + verbose, + ) if verbose: print(f"ingestion_status {ingestion_status}") @@ -227,15 +202,78 @@ def as_query_engine(self, **kwargs: Any) -> BaseQueryEngine: kwargs["retriever"] = self.as_retriever(**kwargs) return RetrieverQueryEngine.from_args(**kwargs) - def _insert(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None: + def _insert( + self, + documents: List[Document], + transformations: Optional[List[TransformComponent]] = None, + verbose: bool = True, + **insert_kwargs: Any, + ) -> None: """Insert a set of documents (each a node).""" - raise NotImplementedError("_insert not implemented.") + pipeline_id = get_pipeline_id( + self._base_url + PIPELINE_SIMPLE_ENDPOINT, + self._headers, + {"pipeline_name": self.name}, + ) + doc_insert = get_doc_insert( + transformations or default_transformations(), + documents, + ) + response = requests.put( + self._base_url + INSERT_DOC_ENDPOINT.format(pipeline_id=pipeline_id), + data=json.dumps(doc_insert), + headers=self._headers, + ) + response_text = response.json() + ingestion_id = response_text.get("ingestionId", None) + if ( + response_text.get("code", "") != Status.SUCCESS.value + or ingestion_id is None + ): + raise ValueError( + f"Failed to insert documents: {response_text.get('message', '')}\n{response_text}" + ) + + ingestion_status, failed_docs = run_ingestion( + self._base_url + + CHECK_INGESTION_ENDPOINT.format( + pipeline_id=pipeline_id, ingestion_id=ingestion_id + ), + self._headers, + verbose, + ) + + if verbose: + print(f"ingestion_status {ingestion_status}") + print(f"failed_docs: {failed_docs}") def delete_ref_doc( - self, ref_doc_id: str, delete_from_docstore: bool = False, **delete_kwargs: Any + self, + ref_doc_ids: Union[str, List[str]], + verbose: bool = True, + **delete_kwargs: Any, ) -> None: - """Delete a document and it's nodes by using ref_doc_id.""" - raise NotImplementedError("delete_ref_doc not implemented.") + """Delete documents in index.""" + if isinstance(ref_doc_ids, str): + ref_doc_ids = [ref_doc_ids] + pipeline_id = get_pipeline_id( + self._base_url + PIPELINE_SIMPLE_ENDPOINT, + self._headers, + {"pipeline_name": self.name}, + ) + doc_delete = get_doc_delete(ref_doc_ids) + response = requests.post( + self._base_url + DELETE_DOC_ENDPOINT.format(pipeline_id=pipeline_id), + json=doc_delete, + headers=self._headers, + ) + response_text = response.json() + if response_text.get("code", "") != Status.SUCCESS.value: + raise ValueError( + f"Failed to delete documents: {response_text.get('message', '')}\n{response_text}" + ) + if verbose: + print(f"Delete documents {ref_doc_ids} successfully!") def update_ref_doc(self, document: Document, **update_kwargs: Any) -> None: """Update a document and it's corresponding nodes.""" diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/constants.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/constants.py index a2704b3aef5e1..0e1f769fc2d39 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/constants.py +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/constants.py @@ -6,3 +6,5 @@ ) RETRIEVE_PIPELINE_ENDPOINT = "/api/v1/indices/pipeline/{pipeline_id}/retrieve" PIPELINE_SIMPLE_ENDPOINT = "/api/v1/indices/pipeline_simple" +INSERT_DOC_ENDPOINT = "/api/v1/indices/pipeline/{pipeline_id}/documents" +DELETE_DOC_ENDPOINT = "/api/v1/indices/pipeline/{pipeline_id}/delete" diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/transformations.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/transformations.py index 44329521ebfa3..a43822c1576c8 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/transformations.py +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/transformations.py @@ -1,87 +1,24 @@ """ -This module maintains the list of transformations that are supported by the system. +This module maintains the list of transformations that are supported by dashscope. """ from enum import Enum -from typing import Generic, Sequence, Type, TypeVar +from typing import Generic, TypeVar from llama_index.core.bridge.pydantic import ( - BaseModel, Field, GenericModel, ValidationError, ) -from llama_index.core.schema import BaseComponent, BaseNode, Document - - -# Transform Input/Output Types -class TransformationIOType(BaseModel): - name: str = Field(description="Name of the input/output type") - description: str = Field(description="Description of the input/output type") - python_type: str = Field(description="Python type of the input/output type") - - -class TransformationIOTypes(Enum): - DOCUMENTS = TransformationIOType( - name="Documents", - description="A sequence of Documents", - python_type=str(Sequence[Document]), - ) - NODES = TransformationIOType( - name="Nodes", - description="A sequence of Nodes from a sequence of Documents", - python_type=str(Sequence[BaseNode]), - ) - - -class TransformationCategory(BaseModel): - """A description for a category of transformation within a pipeline.""" - - name: str = Field(description="Unique name of the type of transformation") - description: str = Field(description="Description for the type of transformation") - input_type: TransformationIOType = Field( - description="Input type for the transformation type" - ) - output_type: TransformationIOType = Field( - description="Output type for the transformation type" - ) - - -class TransformationCategories(Enum): - """Supported transformation categories.""" - - NODE_PARSER = TransformationCategory( - name="NodeParser", - description="Applies a function to parse nodes from documents", - input_type=TransformationIOTypes.DOCUMENTS.value, - output_type=TransformationIOTypes.NODES.value, - ) - EMBEDDING = TransformationCategory( - name="Embedding", - description="Applies a function to embed nodes", - input_type=TransformationIOTypes.NODES.value, - output_type=TransformationIOTypes.NODES.value, - ) - - -class ConfigurableTransformation(BaseModel): - """ - A class containing metadata for a type of transformation that can be in a pipeline. - """ - - name: str = Field( - description="Unique and human-readable name for the type of transformation" - ) - transformation_category: TransformationCategories = Field( - description="Type of transformation" - ) - component_type: Type[BaseComponent] = Field( - description="Type of component that implements the transformation" - ) +from llama_index.core.schema import BaseComponent +from llama_index.core.ingestion.transformations import ( + TransformationCategories, + ConfigurableTransformation, +) -def build_configurable_transformation_enum(): +def dashscope_build_configurable_transformation_enum(): """ Build an enum of configurable transformations. But conditional on if the corresponding component is available. @@ -102,14 +39,14 @@ def from_component( def build_configured_transformation( self, component: BaseComponent - ) -> "ConfiguredTransformation": + ) -> "DashScopeConfiguredTransformation": component_type = self.value.component_type if not isinstance(component, component_type): raise ValueError( f"The enum value {self} is not compatible with component of " f"type {type(component)}" ) - return ConfiguredTransformation[component_type]( # type: ignore + return DashScopeConfiguredTransformation[component_type]( # type: ignore component=component, name=self.value.name ) @@ -117,7 +54,7 @@ def build_configured_transformation( # Node parsers try: - from llama_index.node_parser.relational.dashscope import DashScopeJsonNodeParser + from llama_index.node_parser.dashscope import DashScopeJsonNodeParser enum_members.append( ( @@ -154,14 +91,14 @@ def build_configured_transformation( return ConfigurableComponent("ConfigurableTransformations", enum_members) -ConfigurableTransformations = build_configurable_transformation_enum() +ConfigurableTransformations = dashscope_build_configurable_transformation_enum() T = TypeVar("T", bound=BaseComponent) -class ConfiguredTransformation(GenericModel, Generic[T]): +class DashScopeConfiguredTransformation(GenericModel, Generic[T]): """ - A class containing metadata & implementation for a transformation in a pipeline. + A class containing metadata & implementation for a transformation in a dashscope pipeline. """ name: str @@ -170,17 +107,7 @@ class ConfiguredTransformation(GenericModel, Generic[T]): @classmethod def from_component(cls, component: BaseComponent) -> "ConfiguredTransformation": """ - Build a ConfiguredTransformation from a component. - - This should be the preferred way to build a ConfiguredTransformation - as it will ensure that the component is supported as indicated by having a - corresponding enum value in ConfigurableTransformations. - - This has the added bonus that you don't need to specify the generic type - like ConfiguredTransformation[SentenceSplitter]. The return value of - this ConfiguredTransformation.from_component(simple_node_parser) will be - ConfiguredTransformation[SentenceSplitter] if simple_node_parser is - a SentenceSplitter. + Build a ConfiguredTransformation from a component in dashscope. """ return ConfigurableTransformations.from_component( component diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/utils.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/utils.py index 4b595e8f08231..46a0a5dd0dbc7 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/utils.py +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/utils.py @@ -1,7 +1,8 @@ import requests +import time -def post(base_url, headers, params): +def post(base_url: str, headers: dict, params: dict): response = requests.post(base_url, headers=headers, json=params) if response.status_code != 200: raise RuntimeError(response.text) @@ -11,7 +12,7 @@ def post(base_url, headers, params): return response_dict -def get(base_url, headers, params): +def get(base_url: str, headers: dict, params: dict): response = requests.get(base_url, headers=headers, params=params) if response.status_code != 200: raise RuntimeError(response.text) @@ -22,6 +23,36 @@ def get(base_url, headers, params): return response_dict -def get_pipeline_id(base_url, headers, params): +def get_pipeline_id(base_url: str, headers: dict, params: dict): response_dict = get(base_url, headers, params) return response_dict.get("id", "") + + +def run_ingestion(request_url: str, headers: dict, verbose: bool = False): + ingestion_status = "" + failed_docs = [] + + while True: + response = requests.get( + request_url, + headers=headers, + ) + try: + response_text = response.json() + except Exception as e: + print(f"Failed to get response: \n{response.text}\nretrying...") + continue + + if response_text.get("code", "") != "Success": + print( + f"Failed to get ingestion status: {response_text.get('message', '')}\n{response_text}\nretrying..." + ) + continue + ingestion_status = response_text.get("ingestion_status", "") + failed_docs = response_text.get("failed_docs", "") + if verbose: + print(f"Current status: {ingestion_status}") + if ingestion_status in ["COMPLETED", "FAILED"]: + break + time.sleep(5) + return ingestion_status, failed_docs diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml index e8ebc990801ae..a9d41f2113527 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml @@ -14,7 +14,7 @@ contains_example = false import_path = "llama_index.indices.managed.dashscope" [tool.llamahub.class_authors] -DashScopeEmbedding = "phantomgrapes" +DashScopeCloudIndex = "phantomgrapes" [tool.mypy] disallow_untyped_defs = true @@ -30,14 +30,14 @@ license = "MIT" name = "llama-index-indices-managed-dashscope" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.1.0" +version = "0.1.1" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" llama-index-core = "^0.10.0" llama-index-embeddings-dashscope = ">=0.1.3" -llama-index-readers-dashscope = ">=0.1.0" -llama-index-node-parser-relational-dashscope = ">=0.1.1" +llama-index-readers-dashscope = ">=0.1.1" +llama-index-node-parser-relational-dashscope = ">=0.1.2" [tool.poetry.group.dev.dependencies] black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/.gitignore b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/.gitignore similarity index 100% rename from llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/.gitignore rename to llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/.gitignore diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/BUILD b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/BUILD similarity index 100% rename from llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/BUILD rename to llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/BUILD diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/Makefile b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/Makefile similarity index 100% rename from llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/Makefile rename to llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/Makefile diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/README.md b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/README.md similarity index 94% rename from llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/README.md rename to llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/README.md index 5cb5daf4a19cd..727f405fd25a4 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/README.md +++ b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/README.md @@ -2,6 +2,12 @@ Transform your documents into nodes with ease using the Dashscope integration for LlamaIndex. This tool allows for precise control over chunk size, overlap size, and more, tailored for the Dashscope reader output format. +## Installation + +```shell +pip install llama-index-node-parser-dashscope +``` + ## Quick Start Get up and running with just a few lines of code: diff --git a/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/__init__.py b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/__init__.py new file mode 100644 index 0000000000000..0e083796926d6 --- /dev/null +++ b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/__init__.py @@ -0,0 +1,4 @@ +from llama_index.node_parser.dashscope.base import DashScopeJsonNodeParser + + +__all__ = ["DashScopeJsonNodeParser"] diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/base.py similarity index 95% rename from llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py rename to llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/base.py index 39197f2889d18..53558d6240e20 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/base.py +++ b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/base.py @@ -72,7 +72,8 @@ def get_nodes_from_node(self, node: TextNode) -> List[BaseNode]: return self.parse_result(response_text, node) - def post_service(self, my_input): + # def post_service(self, my_input): + def post_service(self, my_input: Dict[str, Any]) -> Optional[Dict[str, Any]]: DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", None) if DASHSCOPE_API_KEY is None: logging.error("DASHSCOPE_API_KEY is not set") @@ -104,7 +105,9 @@ def post_service(self, my_input): logging.error(f"{e}, try again.") return None - def parse_result(self, content_json, document): + def parse_result( + self, content_json: List[Dict[str, Any]], document: TextNode + ) -> List[BaseNode]: nodes = [] for data in content_json: text = "\n".join( diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/pyproject.toml similarity index 85% rename from llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml rename to llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/pyproject.toml index 0be2f59b9c0c1..1a76dfcbae93a 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/pyproject.toml +++ b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/pyproject.toml @@ -11,10 +11,10 @@ skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" [tool.llamahub] contains_example = false -import_path = "llama_index.node_parser.relational.dashscope" +import_path = "llama_index.node_parser.dashscope" [tool.llamahub.class_authors] -DashScopeEmbedding = "phantomgrapes" +DashScopeJsonNodeParser = "phantomgrapes" [tool.mypy] disallow_untyped_defs = true @@ -25,12 +25,12 @@ python_version = "3.8" [tool.poetry] authors = ["Ruixue Ding "] -description = "llama-index node_parser relational dashscope integration" +description = "llama-index node_parser dashscope integration" license = "MIT" -name = "llama-index-node-parser-relational-dashscope" +name = "llama-index-node-parser-dashscope" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.1.1" +version = "0.1.2" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/__init__.py b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/tests/__init__.py similarity index 100% rename from llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/__init__.py rename to llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/tests/__init__.py diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py similarity index 75% rename from llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py rename to llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py index c19021114499f..69ebb4bf0bd97 100644 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py +++ b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/tests/test_node_parser_relational_dashscope.py @@ -1,4 +1,4 @@ -from llama_index.node_parser.relational.dashscope import DashScopeJsonNodeParser +from llama_index.node_parser.dashscope import DashScopeJsonNodeParser from llama_index.core.node_parser.relational.base_element import BaseElementNodeParser diff --git a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/__init__.py b/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/__init__.py deleted file mode 100644 index dff0bac5358df..0000000000000 --- a/llama-index-integrations/node_parser/relational/llama-index-node-parser-relational-dashscope/llama_index/node_parser/relational/dashscope/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from llama_index.node_parser.relational.dashscope.base import DashScopeJsonNodeParser - - -__all__ = ["DashScopeJsonNodeParser"] diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/README.md b/llama-index-integrations/readers/llama-index-readers-dashscope/README.md index 8024144b92cb7..88fff9283b7d4 100644 --- a/llama-index-integrations/readers/llama-index-readers-dashscope/README.md +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/README.md @@ -1,5 +1,11 @@ # LlamaIndex Readers Integration: Dashscope +## Installation + +```shelll +pip install llama-index-readers-dashscope +``` + ## Usage ```python @@ -27,8 +33,8 @@ A full list of retriever settings/kwargs is below: - result_type: Optional[ResultType] -- The result type for the parser. The default value is ResultType.DASHCOPE_DOCMIND. - num_workers: Optional[int] -- The number of workers to use sending API requests for parsing. The default value is 4, greater than 0, less than 10. -- check_interval: Optional[int] -- The interval in seconds to check if the parsing is done. The default value is 2. -- max_timeout: Optional[int] -- The maximum timeout in seconds to wait for the parsing to finish. The default value is 20000. +- check_interval: Optional[int] -- The interval in seconds to check if the parsing is done. The default value is 5. +- max_timeout: Optional[int] -- The maximum timeout in seconds to wait for the parsing to finish. The default value is 3600. - verbose: Optional[bool] -- Whether to print the progress of the parsing. The default value is True. - show_progress: Optional[bool] -- Show progress when parsing multiple files. The default value is True. - ignore_errors: Optional[bool] -- Whether or not to ignore and skip errors raised during parsing. The default value is diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/base.py b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/base.py index a4c4746486f51..192e84f9c9d24 100644 --- a/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/base.py +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/llama_index/readers/dashscope/base.py @@ -44,6 +44,11 @@ class DashScopeParse(BasePydanticReader): description="The Workspace for the DashScope API.If not set, " "it will use the default workspace.", ) + category_id: str = Field( + default=DASHSCOPE_DEFAULT_DC_CATEGORY, + description="The dc category for the DashScope API.If not set, " + "it will use the default dc category.", + ) base_url: str = Field( default=DASHSCOPE_DEFAULT_BASE_URL, description="The base URL of the DashScope Parsing API.", @@ -104,6 +109,15 @@ def validate_workspace_id(cls, v: str) -> str: return v + @validator("category_id", pre=True, always=True) + def validate_category_id(cls, v: str) -> str: + """Validate the category.""" + if not v: + import os + + return os.getenv("DASHSCOPE_CATEGORY_ID", DASHSCOPE_DEFAULT_DC_CATEGORY) + return v + @validator("base_url", pre=True, always=True) def validate_base_url(cls, v: str) -> str: """Validate the base URL.""" @@ -143,7 +157,7 @@ async def _create_job( upload_file_lease_result.upload(file_path, f) - url = f"{self.base_url}/api/v1/datacenter/category/{DASHSCOPE_DEFAULT_DC_CATEGORY}/add_file" + url = f"{self.base_url}/api/v1/datacenter/category/{self.category_id}/add_file" async with httpx.AsyncClient(timeout=self.max_timeout) as client: response = await client.post( url, @@ -168,7 +182,7 @@ async def _create_job( retry=retry_if_exception_type(RetryException), ) def __upload_lease(self, file_path, headers): - url = f"{self.base_url}/api/v1/datacenter/category/{DASHSCOPE_DEFAULT_DC_CATEGORY}/upload_lease" + url = f"{self.base_url}/api/v1/datacenter/category/{self.category_id}/upload_lease" try: with httpx.Client(timeout=self.max_timeout) as client: response = client.post( @@ -198,8 +212,8 @@ def __upload_lease(self, file_path, headers): async def _get_job_result( self, data_id: str, result_type: str, verbose: bool = False ) -> dict: - result_url = f"{self.base_url}/api/v1/datacenter/category/{DASHSCOPE_DEFAULT_DC_CATEGORY}/file/{data_id}/download_lease" - status_url = f"{self.base_url}/api/v1/datacenter/category/{DASHSCOPE_DEFAULT_DC_CATEGORY}/file/{data_id}/query" + result_url = f"{self.base_url}/api/v1/datacenter/category/{self.category_id}/file/{data_id}/download_lease" + status_url = f"{self.base_url}/api/v1/datacenter/category/{self.category_id}/file/{data_id}/query" headers = self._get_dashscope_header() diff --git a/llama-index-integrations/readers/llama-index-readers-dashscope/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-dashscope/pyproject.toml index 178e33aaa20cd..b875b64a79c72 100644 --- a/llama-index-integrations/readers/llama-index-readers-dashscope/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-dashscope/pyproject.toml @@ -14,7 +14,7 @@ contains_example = false import_path = "llama_index.readers.dashscope.base" [tool.llamahub.class_authors] -DashScopeEmbedding = "phantomgrapes" +DashScopeParse = "phantomgrapes" [tool.mypy] disallow_untyped_defs = true @@ -30,12 +30,12 @@ license = "MIT" name = "llama-index-readers-dashscope" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.1.0" +version = "0.1.1" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" llama-index-core = "^0.10.0" -oss2 = "^2.18.4" +oss2 = "^2.18.5" retrying = "^1.3.4" [tool.poetry.group.dev.dependencies] From 7bc9c0c709e536b103fb0263878d6f265b71bfe9 Mon Sep 17 00:00:00 2001 From: "ada.drx" Date: Tue, 21 May 2024 17:56:47 +0800 Subject: [PATCH 10/13] finish test --- .../llama_index/indices/managed/dashscope/api_utils.py | 2 +- .../llama-index-indices-managed-dashscope/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/api_utils.py b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/api_utils.py index be6113c7d9a27..635a57a46ca3b 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/api_utils.py +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/llama_index/indices/managed/dashscope/api_utils.py @@ -9,7 +9,7 @@ def default_transformations() -> List[TransformComponent]: """Default transformations.""" - from llama_index.node_parser.relational.dashscope import DashScopeJsonNodeParser + from llama_index.node_parser.dashscope import DashScopeJsonNodeParser from llama_index.embeddings.dashscope import ( DashScopeEmbedding, DashScopeTextEmbeddingModels, diff --git a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml index a9d41f2113527..e9be531a5a344 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml +++ b/llama-index-integrations/indices/llama-index-indices-managed-dashscope/pyproject.toml @@ -37,7 +37,7 @@ python = ">=3.8.1,<4.0" llama-index-core = "^0.10.0" llama-index-embeddings-dashscope = ">=0.1.3" llama-index-readers-dashscope = ">=0.1.1" -llama-index-node-parser-relational-dashscope = ">=0.1.2" +llama-index-node-parser-dashscope = ">=0.1.2" [tool.poetry.group.dev.dependencies] black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} From 3262c97676a9b9cd2e33c9c281cd9cf6edfd9fa7 Mon Sep 17 00:00:00 2001 From: "ada.drx" Date: Thu, 23 May 2024 11:31:36 +0800 Subject: [PATCH 11/13] fix tailor problem --- .../llama_index/node_parser/dashscope/BUILD | 1 + .../llama-index-node-parser-relational-dashscope/tests/BUILD | 1 + 2 files changed, 2 insertions(+) create mode 100644 llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/BUILD create mode 100644 llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/tests/BUILD diff --git a/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/BUILD b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/tests/BUILD b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/tests/BUILD new file mode 100644 index 0000000000000..dabf212d7e716 --- /dev/null +++ b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/tests/BUILD @@ -0,0 +1 @@ +python_tests() From f60dc8d539ac2f5354e2f1bce91f1812b6a75dd9 Mon Sep 17 00:00:00 2001 From: "ada.drx" Date: Thu, 23 May 2024 13:35:23 +0800 Subject: [PATCH 12/13] remove duplicate code --- .../llama_index/node_parser/dashscope/base.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/base.py b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/base.py index 53558d6240e20..7b4f7221754ae 100644 --- a/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/base.py +++ b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/base.py @@ -87,10 +87,6 @@ def post_service(self, my_input: Dict[str, Any]) -> Optional[Dict[str, Any]]: os.getenv("DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com") + "/api/v1/indices/component/configed_transformations/spliter" ) - response = requests.post( - service_url, data=json.dumps(my_input), headers=headers - ) - response_text = response.json() try: response = requests.post( service_url, data=json.dumps(my_input), headers=headers From 065ef48a3fb4bcfa73e533da0e749148a7e12d18 Mon Sep 17 00:00:00 2001 From: "ada.drx" Date: Thu, 23 May 2024 13:37:07 +0800 Subject: [PATCH 13/13] remove comment --- .../llama_index/node_parser/dashscope/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/base.py b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/base.py index 7b4f7221754ae..3ed7d9815a9e8 100644 --- a/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/base.py +++ b/llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope/llama_index/node_parser/dashscope/base.py @@ -72,7 +72,6 @@ def get_nodes_from_node(self, node: TextNode) -> List[BaseNode]: return self.parse_result(response_text, node) - # def post_service(self, my_input): def post_service(self, my_input: Dict[str, Any]) -> Optional[Dict[str, Any]]: DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", None) if DASHSCOPE_API_KEY is None: