Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add yuque(https://www.yuque.com) loader #1874

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Empty file.
46 changes: 46 additions & 0 deletions src/backend/base/langflow/base/loaders/yuque.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from typing import Dict, Any

from langchain_core.documents import Document

from langflow.helpers.YuQueDocLoaderExec import YuQueDocLoaderExec
from langflow.interface.custom.custom_component import CustomComponent


class YuQueDocLoader(CustomComponent):
display_name: str = "YuQueDocLoader"
description: str = "Load from yuQue(https://www.yuque.com) URL"

def build_config(self) -> Dict[str, Any]:
return {
"doc_type": {
"display_name": "Document Type",
"options": [
"Knowledge",
"Document",
],
"info": "Please select a single document or a knowledge base",
"required": True,
},
"token": {
"display_name": "Token",
"required": True,
},
"url": {
"display_name": "URL",
"required": True,
"info": "Please Enter the URL after [https://www.yuque.com/]"
},
"code": {"show": "true"},
}

def build(
self,
doc_type: str,
token: str,
url: str,
) -> Document:
return YuQueDocLoaderExec(
token=token,
url=url,
doc_type=doc_type,
).load()
27 changes: 27 additions & 0 deletions src/backend/base/langflow/helpers/YuQueDocLoaderExec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import logging
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseLoader

from langflow.helpers.YuQueTool import get_doc_detail_by_code, get_knowledge_detail

logger = logging.getLogger(__name__)


class YuQueDocLoaderExec(BaseLoader):

def __init__(
self,
token: str = None,
url: str = None,
doc_type: str = None,
):
self.token = token
self.url = url
self.doc_type = doc_type

def load(self) -> Document:
if self.doc_type == "Document":
docs = get_doc_detail_by_code(self.token, self.url)
else:
docs = get_knowledge_detail(self.token, self.url)
return Document(page_content=docs)
76 changes: 76 additions & 0 deletions src/backend/base/langflow/helpers/YuQueTool.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @liujichengs
Great implementation. Sorry for the delayed response.

Could you move this to langflow/base/loaders/yuque.py?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hi, @ogabrielluiz I have modified it, thank you for your reply

Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from typing import List, Dict

import requests


class YuQueCatalog:
def __init__(self, id: int, title: str, book_id: int):
self.id = id
self.title = title
self.book_id = book_id


base_url = 'https://www.yuque.com'


def get_doc_catalog(team: str, knowledge: str, doc_headers: Dict) -> List[YuQueCatalog]:
"""Get the directory of Yuque knowledge base"""
url = f'{base_url}/api/v2/repos/{team}/{knowledge}/docs'
response = requests.get(url, headers=doc_headers)
json_data = response.json().get("data")

catalog_list = []
for item in json_data:
print(item['id'])
catalog = YuQueCatalog(
id=item['id'],
title=item['title'],
book_id=item['book_id']
)
catalog_list.append(catalog)
return catalog_list


def get_doc_detail(book_id: int, id: int, doc_headers: Dict):
"""Get details of Yuque knowledge documents"""
url = f'{base_url}/api/v2/repos/{book_id}/docs/{id}'
response = requests.get(url, headers=doc_headers)
return response.json().get("data").get("body")


def get_doc_detail_by_code(token: str, url: str):
"""Get details of a single document based on URL"""
if url.startswith("/"):
url = url[1:]
split_result = url.split("/")
team = split_result[0]
knowledge = split_result[1]
code = split_result[2]
full_url = f'{base_url}/api/v2/repos/{team}/{knowledge}/docs/{code}'
doc_headers = {
'User-Agent': team,
'X-Auth-Token': token,
'Content-Type': 'application/json'
}
response = requests.get(full_url, headers=doc_headers)
return response.json().get("data").get("body")


def get_knowledge_detail(token: str, url: str) -> str:
"""Get all document details under Yuque Knowledge Base"""
if url.startswith("/"):
url = url[1:]
split_result = url.split("/")
team = split_result[0]
knowledge = split_result[1]
doc_headers = {
'User-Agent': team,
'X-Auth-Token': token,
'Content-Type': 'application/json'
}
catalog = get_doc_catalog(team, knowledge, doc_headers)
result = ''
for item in catalog:
doc = get_doc_detail(item.book_id, item.id, doc_headers)
result += '\n' + doc
return result