From 2ddfca74eb0fc9711eb707c1e91f397745ee778e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9E=9C=E8=8C=97?= Date: Fri, 10 May 2024 22:36:52 +0800 Subject: [PATCH 1/4] add youque(https://www.yuque.com) loader --- .../documentloaders/YuQueDocLoader.py | 46 +++++++++++ .../langflow/helpers/YuQueDocLoaderExec.py | 27 +++++++ .../base/langflow/helpers/YuQueTool.py | 76 +++++++++++++++++++ 3 files changed, 149 insertions(+) create mode 100644 src/backend/base/langflow/components/documentloaders/YuQueDocLoader.py create mode 100644 src/backend/base/langflow/helpers/YuQueDocLoaderExec.py create mode 100644 src/backend/base/langflow/helpers/YuQueTool.py diff --git a/src/backend/base/langflow/components/documentloaders/YuQueDocLoader.py b/src/backend/base/langflow/components/documentloaders/YuQueDocLoader.py new file mode 100644 index 0000000000..8353cff647 --- /dev/null +++ b/src/backend/base/langflow/components/documentloaders/YuQueDocLoader.py @@ -0,0 +1,46 @@ +from typing import List, Dict, Any + +from langchain_core.documents import Document + +from langflow.helpers.YuQueDocLoaderExec import YuQueDocLoaderExec +from langflow.interface.custom.custom_component import CustomComponent + + +class YuQueDocLoader(CustomComponent): + display_name: str = "YuQueDocLoader" + description: str = "Load from yuQue(https://www.yuque.com) URL" + + def build_config(self) -> Dict[str, Any]: + return { + "doc_type": { + "display_name": "Document Type", + "options": [ + "Knowledge", + "Document", + ], + "info": "Please select a single document or a knowledge base", + "required": True, + }, + "token": { + "display_name": "Token", + "required": True, + }, + "url": { + "display_name": "URL", + "required": True, + "info": "Please Enter the URL after [https://www.yuque.com/]" + }, + "code": {"show": "true"}, + } + + def build( + self, + doc_type: str, + token: str, + url: str, + ) -> Document: + return YuQueDocLoaderExec( + token=token, + url=url, + doc_type=doc_type, + ).load() diff --git a/src/backend/base/langflow/helpers/YuQueDocLoaderExec.py b/src/backend/base/langflow/helpers/YuQueDocLoaderExec.py new file mode 100644 index 0000000000..d63c1d871f --- /dev/null +++ b/src/backend/base/langflow/helpers/YuQueDocLoaderExec.py @@ -0,0 +1,27 @@ +import logging +from langchain_core.documents import Document +from langchain_community.document_loaders.base import BaseLoader + +from langflow.helpers.YuQueTool import get_doc_detail_by_code, get_knowledge_detail + +logger = logging.getLogger(__name__) + + +class YuQueDocLoaderExec(BaseLoader): + + def __init__( + self, + token: str = None, + url: str = None, + doc_type: str = None, + ): + self.token = token + self.url = url + self.doc_type = doc_type + + def load(self) -> Document: + if self.doc_type == "Document": + docs = get_doc_detail_by_code(self.token, self.url) + else: + docs = get_knowledge_detail(self.token, self.url) + return Document(page_content=docs) diff --git a/src/backend/base/langflow/helpers/YuQueTool.py b/src/backend/base/langflow/helpers/YuQueTool.py new file mode 100644 index 0000000000..22d0b7c2a4 --- /dev/null +++ b/src/backend/base/langflow/helpers/YuQueTool.py @@ -0,0 +1,76 @@ +from typing import List, Dict + +import requests + + +class YuQueCatalog: + def __init__(self, id: int, title: str, book_id: int): + self.id = id + self.title = title + self.book_id = book_id + + +base_url = 'https://www.yuque.com' + + +def get_doc_catalog(team: str, knowledge: str, doc_headers: Dict) -> List[YuQueCatalog]: + """Get the directory of Yuque knowledge base""" + url = f'{base_url}/api/v2/repos/{team}/{knowledge}/docs' + response = requests.get(url, headers=doc_headers) + json_data = response.json().get("data") + + catalog_list = [] + for item in json_data: + print(item['id']) + catalog = YuQueCatalog( + id=item['id'], + title=item['title'], + book_id=item['book_id'] + ) + catalog_list.append(catalog) + return catalog_list + + +def get_doc_detail(book_id: int, id: int, doc_headers: Dict): + """Get details of Yuque knowledge documents""" + url = f'{base_url}/api/v2/repos/{book_id}/docs/{id}' + response = requests.get(url, headers=doc_headers) + return response.json().get("data").get("body") + + +def get_doc_detail_by_code(token: str, url: str): + """Get details of a single document based on URL""" + if url.startswith("/"): + url = url[1:] + split_result = url.split("/") + team = split_result[0] + knowledge = split_result[1] + code = split_result[2] + full_url = f'{base_url}/api/v2/repos/{team}/{knowledge}/docs/{code}' + doc_headers = { + 'User-Agent': team, + 'X-Auth-Token': token, + 'Content-Type': 'application/json' + } + response = requests.get(full_url, headers=doc_headers) + return response.json().get("data").get("body") + + +def get_knowledge_detail(token: str, url: str) -> str: + """Get all document details under Yuque Knowledge Base""" + if url.startswith("/"): + url = url[1:] + split_result = url.split("/") + team = split_result[0] + knowledge = split_result[1] + doc_headers = { + 'User-Agent': team, + 'X-Auth-Token': token, + 'Content-Type': 'application/json' + } + catalog = get_doc_catalog(team, knowledge, doc_headers) + result = '' + for item in catalog: + doc = get_doc_detail(item.book_id, item.id, doc_headers) + result += '\n' + doc + return result From 5e1a3cec52a765aec1db5441f413b65eda1f57bd Mon Sep 17 00:00:00 2001 From: "liujichengs@gmail.com" Date: Fri, 24 May 2024 13:52:46 +0800 Subject: [PATCH 2/4] add yuque(https://www.yuque.com) loader:refactor move path --- .../documentloaders => base/loaders}/YuQueDocLoader.py | 0 src/backend/base/langflow/base/loaders/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/backend/base/langflow/{components/documentloaders => base/loaders}/YuQueDocLoader.py (100%) create mode 100644 src/backend/base/langflow/base/loaders/__init__.py diff --git a/src/backend/base/langflow/components/documentloaders/YuQueDocLoader.py b/src/backend/base/langflow/base/loaders/YuQueDocLoader.py similarity index 100% rename from src/backend/base/langflow/components/documentloaders/YuQueDocLoader.py rename to src/backend/base/langflow/base/loaders/YuQueDocLoader.py diff --git a/src/backend/base/langflow/base/loaders/__init__.py b/src/backend/base/langflow/base/loaders/__init__.py new file mode 100644 index 0000000000..e69de29bb2 From fb24802c26b98487cb0a61b92104d3ef7167809c Mon Sep 17 00:00:00 2001 From: "liujichengs@gmail.com" Date: Fri, 24 May 2024 14:06:29 +0800 Subject: [PATCH 3/4] add yuque(https://www.yuque.com) loader:rename file --- .../base/langflow/base/loaders/{YuQueDocLoader.py => yuque.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/backend/base/langflow/base/loaders/{YuQueDocLoader.py => yuque.py} (97%) diff --git a/src/backend/base/langflow/base/loaders/YuQueDocLoader.py b/src/backend/base/langflow/base/loaders/yuque.py similarity index 97% rename from src/backend/base/langflow/base/loaders/YuQueDocLoader.py rename to src/backend/base/langflow/base/loaders/yuque.py index 8353cff647..076d1c7572 100644 --- a/src/backend/base/langflow/base/loaders/YuQueDocLoader.py +++ b/src/backend/base/langflow/base/loaders/yuque.py @@ -1,4 +1,4 @@ -from typing import List, Dict, Any +from typing import Dict, Any from langchain_core.documents import Document From 599de55d019c43aef37ad4aa25b7c99e6239241a Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:36:04 +0000 Subject: [PATCH 4/4] [autofix.ci] apply automated fixes --- .../base/langflow/base/loaders/yuque.py | 10 +++--- .../langflow/helpers/YuQueDocLoaderExec.py | 9 +++--- .../base/langflow/helpers/YuQueTool.py | 32 ++++++------------- 3 files changed, 19 insertions(+), 32 deletions(-) diff --git a/src/backend/base/langflow/base/loaders/yuque.py b/src/backend/base/langflow/base/loaders/yuque.py index 076d1c7572..6d5a36a345 100644 --- a/src/backend/base/langflow/base/loaders/yuque.py +++ b/src/backend/base/langflow/base/loaders/yuque.py @@ -28,16 +28,16 @@ def build_config(self) -> Dict[str, Any]: "url": { "display_name": "URL", "required": True, - "info": "Please Enter the URL after [https://www.yuque.com/]" + "info": "Please Enter the URL after [https://www.yuque.com/]", }, "code": {"show": "true"}, } def build( - self, - doc_type: str, - token: str, - url: str, + self, + doc_type: str, + token: str, + url: str, ) -> Document: return YuQueDocLoaderExec( token=token, diff --git a/src/backend/base/langflow/helpers/YuQueDocLoaderExec.py b/src/backend/base/langflow/helpers/YuQueDocLoaderExec.py index d63c1d871f..a987f6d033 100644 --- a/src/backend/base/langflow/helpers/YuQueDocLoaderExec.py +++ b/src/backend/base/langflow/helpers/YuQueDocLoaderExec.py @@ -8,12 +8,11 @@ class YuQueDocLoaderExec(BaseLoader): - def __init__( - self, - token: str = None, - url: str = None, - doc_type: str = None, + self, + token: str = None, + url: str = None, + doc_type: str = None, ): self.token = token self.url = url diff --git a/src/backend/base/langflow/helpers/YuQueTool.py b/src/backend/base/langflow/helpers/YuQueTool.py index 22d0b7c2a4..5ae42cfebf 100644 --- a/src/backend/base/langflow/helpers/YuQueTool.py +++ b/src/backend/base/langflow/helpers/YuQueTool.py @@ -10,30 +10,26 @@ def __init__(self, id: int, title: str, book_id: int): self.book_id = book_id -base_url = 'https://www.yuque.com' +base_url = "https://www.yuque.com" def get_doc_catalog(team: str, knowledge: str, doc_headers: Dict) -> List[YuQueCatalog]: """Get the directory of Yuque knowledge base""" - url = f'{base_url}/api/v2/repos/{team}/{knowledge}/docs' + url = f"{base_url}/api/v2/repos/{team}/{knowledge}/docs" response = requests.get(url, headers=doc_headers) json_data = response.json().get("data") catalog_list = [] for item in json_data: - print(item['id']) - catalog = YuQueCatalog( - id=item['id'], - title=item['title'], - book_id=item['book_id'] - ) + print(item["id"]) + catalog = YuQueCatalog(id=item["id"], title=item["title"], book_id=item["book_id"]) catalog_list.append(catalog) return catalog_list def get_doc_detail(book_id: int, id: int, doc_headers: Dict): """Get details of Yuque knowledge documents""" - url = f'{base_url}/api/v2/repos/{book_id}/docs/{id}' + url = f"{base_url}/api/v2/repos/{book_id}/docs/{id}" response = requests.get(url, headers=doc_headers) return response.json().get("data").get("body") @@ -46,12 +42,8 @@ def get_doc_detail_by_code(token: str, url: str): team = split_result[0] knowledge = split_result[1] code = split_result[2] - full_url = f'{base_url}/api/v2/repos/{team}/{knowledge}/docs/{code}' - doc_headers = { - 'User-Agent': team, - 'X-Auth-Token': token, - 'Content-Type': 'application/json' - } + full_url = f"{base_url}/api/v2/repos/{team}/{knowledge}/docs/{code}" + doc_headers = {"User-Agent": team, "X-Auth-Token": token, "Content-Type": "application/json"} response = requests.get(full_url, headers=doc_headers) return response.json().get("data").get("body") @@ -63,14 +55,10 @@ def get_knowledge_detail(token: str, url: str) -> str: split_result = url.split("/") team = split_result[0] knowledge = split_result[1] - doc_headers = { - 'User-Agent': team, - 'X-Auth-Token': token, - 'Content-Type': 'application/json' - } + doc_headers = {"User-Agent": team, "X-Auth-Token": token, "Content-Type": "application/json"} catalog = get_doc_catalog(team, knowledge, doc_headers) - result = '' + result = "" for item in catalog: doc = get_doc_detail(item.book_id, item.id, doc_headers) - result += '\n' + doc + result += "\n" + doc return result