In [9]:
import re
import os
import requests
from urllib import parse


def fetch_yuque_data(url):
    headers = {
        'accept': 'application/json',
        'X-Auth-Token': 'y4upgpHevZo2oDoRixCj3N1tjlKbAKVLTY11Cbn6'
    }
    params = {
        'offset': 0,
        'limit': 100
    }

    response = requests.get(url, headers=headers, params=params)

    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Request failed with status code {response.status_code}")
    
def fetch_document_data(url):
    headers = {
        'accept': 'application/json',
        'X-Auth-Token': 'y4upgpHevZo2oDoRixCj3N1tjlKbAKVLTY11Cbn6'
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Request failed with status code {response.status_code}")
    
def process_yuque_data(data):
    title = data["data"]["title"]
    body = data["data"]["body"]

    # 正则去除语雀导出的<a>标签和不可见字符
    body = re.sub(r"<a name=\".*\"></a>", "", body)
    body = re.sub(r"\x00", "", body)
    body = re.sub(r"\x05", "", body)
    body = re.sub(r'\<br \/\>!\[image.png\]', "\n![image.png]", body)
    body = re.sub(r'\)\<br \/\>', ")\n", body)

    def to_local_image_src(body):
        body = re.sub(r'\<br \/\>!\[image.png\]', "\n![image.png]", body)
        body = re.sub(r'\)\<br \/\>', ")\n", body)
        
        pattern = (r"!\[(?P<img_name>.*?)\]"
                   r"\((?P<img_src>https:\/\/cdn\.nlark\.com\/yuque.*\/(?P<slug>\d+)\/(?P<filename>.*?\.[a-zA-Z]+)).*\)")
        repl = r"![\g<img_name>](./assets/\g<filename>)"
        images = [_.groupdict() for _ in re.finditer(pattern, body)]
        new_body = re.sub(pattern, repl, body)
        return new_body, images

    def download_image(image_info: dict, save_dir: str):
        img_src = image_info['img_src']
        filename = image_info["filename"]
        
        try:
            response = requests.get(img_src)
            response.raise_for_status()  # 检查请求是否成功
            with open(os.path.join(save_dir, filename), 'wb') as f:
                f.write(response.content)
        except FileNotFoundError:
            print(f"Unable to download image: {filename}")
        except requests.RequestException as e:
            print(f"Failed to download image: {filename}, error: {e}")

    new_body, image_list = to_local_image_src(body)

    if image_list:
        assets_dir = "./docs/assets"
        if not os.path.exists(assets_dir):
            os.makedirs(assets_dir)
        for image_info in image_list:
            download_image(image_info, assets_dir)

    # 将不能作为文件名的字符进行编码
    def check_safe_path(path: str):
        for char in r'/\<>?:"|*':
            path = path.replace(char, parse.quote_plus(char))
        return path

    title = check_safe_path(title)
    save_path = f"./docs/{title}.md"
    with open(save_path, "w", encoding="utf-8") as f:
        f.write(new_body)
    
    
url = 'https://www.yuque.com/api/v2/repos/52046891/docs?offset=0&limit=100'
ids = [d["id"] for d in fetch_yuque_data(url)["data"]]

base_url = "https://www.yuque.com/api/v2/repos/52046891/docs/"
for id in ids:
    body = fetch_document_data(base_url+str(id))
    process_yuque_data(body)

In [5]:
import os
import re

def get_markdown_files(directory):
    """
    Get a list of markdown files in the specified directory.
    """
    return [f for f in os.listdir(directory) if f.endswith('.md')]

def parse_filename(filename):
    """
    Parse the filename to extract the index and title.
    """
    match = re.match(r'(\d+\.\d+)\s+(.+)\.md', filename)
    if match:
        index = match.group(1)
        title = match.group(2).strip()
        return index, title
    return None, None

def generate_markdown_directory(directory):
    sections = {
        "1": "前言介绍",
        "2": "背景知识",
        "3": "入门案例",
        "4": "高级功能",
        "5": "更多案例"
    }

    section_titles = {v: k for k, v in sections.items()}
    files = get_markdown_files(directory)
    files.sort()

    markdown_lines = ["# 目录"]
    current_section = None

    for file in files:
        index, title = parse_filename(file)
        if index and title:
            section_number = index.split('.')[0]
            if current_section != section_number:
                current_section = section_number
                section_title = sections.get(current_section, "未知章节")
                markdown_lines.append(f"\n### {current_section}. {section_title}")
            markdown_lines.append(f"* [{index} {title}](docs/{file})")

    return "\n".join(markdown_lines)

def write_directory_to_file(directory, output_file):
    """
    Generate the markdown directory and write it to a file.
    """
    markdown_directory = generate_markdown_directory(directory)
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(markdown_directory)

# 设置docs目录路径和输出文件路径
docs_directory = './docs'
output_file = 'SUMMARY.md'
try:
    write_directory_to_file(docs_directory, output_file)
    print(f"目录已写入 {output_file}")
except Exception as e:
    print(e)

目录已写入 SUMMARY.md
