diff --git a/src/backend/base/langflow/base/loaders/__init__.py b/src/backend/base/langflow/base/loaders/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/backend/base/langflow/base/loaders/yuque.py b/src/backend/base/langflow/base/loaders/yuque.py new file mode 100644 index 00000000000..b06b46459be --- /dev/null +++ b/src/backend/base/langflow/base/loaders/yuque.py @@ -0,0 +1,46 @@ +from typing import Any + +from langchain_core.documents import Document + +from langflow.helpers.YuQueDocLoaderExec import YuQueDocLoaderExec +from langflow.interface.custom.custom_component import CustomComponent + + +class YuQueDocLoader(CustomComponent): + display_name: str = "YuQueDocLoader" + description: str = "Load from yuQue(https://www.yuque.com) URL" + + def build_config(self) -> dict[str, Any]: + return { + "doc_type": { + "display_name": "Document Type", + "options": [ + "Knowledge", + "Document", + ], + "info": "Please select a single document or a knowledge base", + "required": True, + }, + "token": { + "display_name": "Token", + "required": True, + }, + "url": { + "display_name": "URL", + "required": True, + "info": "Please Enter the URL after [https://www.yuque.com/]", + }, + "code": {"show": "true"}, + } + + def build( + self, + doc_type: str, + token: str, + url: str, + ) -> Document: + return YuQueDocLoaderExec( + token=token, + url=url, + doc_type=doc_type, + ).load() diff --git a/src/backend/base/langflow/helpers/YuQueDocLoaderExec.py b/src/backend/base/langflow/helpers/YuQueDocLoaderExec.py new file mode 100644 index 00000000000..e872eaa8648 --- /dev/null +++ b/src/backend/base/langflow/helpers/YuQueDocLoaderExec.py @@ -0,0 +1,27 @@ +import logging + +from langchain_community.document_loaders.base import BaseLoader +from langchain_core.documents import Document + +from langflow.helpers.YuQueTool import get_doc_detail_by_code, get_knowledge_detail + +logger = logging.getLogger(__name__) + + +class YuQueDocLoaderExec(BaseLoader): + def __init__( + self, + token: str = None, + url: str = None, + doc_type: str = None, + ): + self.token = token + self.url = url + self.doc_type = doc_type + + def load(self) -> Document: + if self.doc_type == "Document": + docs = get_doc_detail_by_code(self.token, self.url) + else: + docs = get_knowledge_detail(self.token, self.url) + return Document(page_content=docs) diff --git a/src/backend/base/langflow/helpers/YuQueTool.py b/src/backend/base/langflow/helpers/YuQueTool.py new file mode 100644 index 00000000000..211833428ba --- /dev/null +++ b/src/backend/base/langflow/helpers/YuQueTool.py @@ -0,0 +1,62 @@ +import requests + + +class YuQueCatalog: + def __init__(self, id: int, title: str, book_id: int): + self.id = id + self.title = title + self.book_id = book_id + + +base_url = "https://www.yuque.com" + + +def get_doc_catalog(team: str, knowledge: str, doc_headers: dict) -> list[YuQueCatalog]: + """Get the directory of Yuque knowledge base""" + url = f"{base_url}/api/v2/repos/{team}/{knowledge}/docs" + response = requests.get(url, headers=doc_headers) + json_data = response.json().get("data") + + catalog_list = [] + for item in json_data: + print(item["id"]) + catalog = YuQueCatalog(id=item["id"], title=item["title"], book_id=item["book_id"]) + catalog_list.append(catalog) + return catalog_list + + +def get_doc_detail(book_id: int, id: int, doc_headers: dict): + """Get details of Yuque knowledge documents""" + url = f"{base_url}/api/v2/repos/{book_id}/docs/{id}" + response = requests.get(url, headers=doc_headers) + return response.json().get("data").get("body") + + +def get_doc_detail_by_code(token: str, url: str): + """Get details of a single document based on URL""" + if url.startswith("/"): + url = url[1:] + split_result = url.split("/") + team = split_result[0] + knowledge = split_result[1] + code = split_result[2] + full_url = f"{base_url}/api/v2/repos/{team}/{knowledge}/docs/{code}" + doc_headers = {"User-Agent": team, "X-Auth-Token": token, "Content-Type": "application/json"} + response = requests.get(full_url, headers=doc_headers) + return response.json().get("data").get("body") + + +def get_knowledge_detail(token: str, url: str) -> str: + """Get all document details under Yuque Knowledge Base""" + if url.startswith("/"): + url = url[1:] + split_result = url.split("/") + team = split_result[0] + knowledge = split_result[1] + doc_headers = {"User-Agent": team, "X-Auth-Token": token, "Content-Type": "application/json"} + catalog = get_doc_catalog(team, knowledge, doc_headers) + result = "" + for item in catalog: + doc = get_doc_detail(item.book_id, item.id, doc_headers) + result += "\n" + doc + return result