Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add yuque(https://www.yuque.com) loader #1874

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
46 changes: 46 additions & 0 deletions src/backend/base/langflow/base/loaders/yuque.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from typing import Any

from langchain_core.documents import Document

from langflow.helpers.YuQueDocLoaderExec import YuQueDocLoaderExec
from langflow.interface.custom.custom_component import CustomComponent


class YuQueDocLoader(CustomComponent):
display_name: str = "YuQueDocLoader"
description: str = "Load from yuQue(https://www.yuque.com) URL"

def build_config(self) -> dict[str, Any]:
return {
"doc_type": {
"display_name": "Document Type",
"options": [
"Knowledge",
"Document",
],
"info": "Please select a single document or a knowledge base",
"required": True,
},
"token": {
"display_name": "Token",
"required": True,
},
"url": {
"display_name": "URL",
"required": True,
"info": "Please Enter the URL after [https://www.yuque.com/]",
},
"code": {"show": "true"},
}

def build(
self,
doc_type: str,
token: str,
url: str,
) -> Document:
return YuQueDocLoaderExec(
token=token,
url=url,
doc_type=doc_type,
).load()
27 changes: 27 additions & 0 deletions src/backend/base/langflow/helpers/YuQueDocLoaderExec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import logging

from langchain_community.document_loaders.base import BaseLoader
from langchain_core.documents import Document

from langflow.helpers.YuQueTool import get_doc_detail_by_code, get_knowledge_detail

logger = logging.getLogger(__name__)


class YuQueDocLoaderExec(BaseLoader):
def __init__(
self,
token: str = None,
url: str = None,
doc_type: str = None,
):
self.token = token
self.url = url
self.doc_type = doc_type

def load(self) -> Document:
if self.doc_type == "Document":
docs = get_doc_detail_by_code(self.token, self.url)
else:
docs = get_knowledge_detail(self.token, self.url)
return Document(page_content=docs)
62 changes: 62 additions & 0 deletions src/backend/base/langflow/helpers/YuQueTool.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @liujichengs
Great implementation. Sorry for the delayed response.

Could you move this to langflow/base/loaders/yuque.py?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hi, @ogabrielluiz I have modified it, thank you for your reply

Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import requests


class YuQueCatalog:
def __init__(self, id: int, title: str, book_id: int):

Check failure on line 5 in src/backend/base/langflow/helpers/YuQueTool.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (A002)

src/backend/base/langflow/helpers/YuQueTool.py:5:24: A002 Argument `id` is shadowing a Python builtin
self.id = id
self.title = title
self.book_id = book_id


base_url = "https://www.yuque.com"


def get_doc_catalog(team: str, knowledge: str, doc_headers: dict) -> list[YuQueCatalog]:
"""Get the directory of Yuque knowledge base"""
url = f"{base_url}/api/v2/repos/{team}/{knowledge}/docs"
response = requests.get(url, headers=doc_headers)
json_data = response.json().get("data")

catalog_list = []
for item in json_data:
print(item["id"])
catalog = YuQueCatalog(id=item["id"], title=item["title"], book_id=item["book_id"])
catalog_list.append(catalog)
return catalog_list


def get_doc_detail(book_id: int, id: int, doc_headers: dict):

Check failure on line 28 in src/backend/base/langflow/helpers/YuQueTool.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (A002)

src/backend/base/langflow/helpers/YuQueTool.py:28:34: A002 Argument `id` is shadowing a Python builtin
"""Get details of Yuque knowledge documents"""
url = f"{base_url}/api/v2/repos/{book_id}/docs/{id}"
response = requests.get(url, headers=doc_headers)
return response.json().get("data").get("body")


def get_doc_detail_by_code(token: str, url: str):
"""Get details of a single document based on URL"""
if url.startswith("/"):
url = url[1:]
split_result = url.split("/")
team = split_result[0]
knowledge = split_result[1]
code = split_result[2]
full_url = f"{base_url}/api/v2/repos/{team}/{knowledge}/docs/{code}"
doc_headers = {"User-Agent": team, "X-Auth-Token": token, "Content-Type": "application/json"}
response = requests.get(full_url, headers=doc_headers)
return response.json().get("data").get("body")


def get_knowledge_detail(token: str, url: str) -> str:
"""Get all document details under Yuque Knowledge Base"""
if url.startswith("/"):
url = url[1:]
split_result = url.split("/")
team = split_result[0]
knowledge = split_result[1]
doc_headers = {"User-Agent": team, "X-Auth-Token": token, "Content-Type": "application/json"}
catalog = get_doc_catalog(team, knowledge, doc_headers)
result = ""
for item in catalog:
doc = get_doc_detail(item.book_id, item.id, doc_headers)
result += "\n" + doc
return result
Loading