In [None]:
import boto3
import json

bedrock = boto3.client(service_name='bedrock-runtime', region_name='ap-northeast-1')
body = json.dumps({
    'max_tokens': 256,
    'messages': [{'role': 'user', 'content': 'Hello, world'}],
    'anthropic_version': 'bedrock-2023-05-31'
})

response = bedrock.invoke_model(body=body, modelId='apac.anthropic.claude-sonnet-4-20250514-v1:0')

response_body = json.loads(response.get('body').read())
print(response_body.get('content'))

In [None]:
import requests
from markdownify import markdownify as md
import boto3
import json

# AWS Bedrock Runtime client
REGION_NAME = 'ap-northeast-1'  # 修改为你的 AWS 区域
MODEL_ID = 'apac.anthropic.claude-sonnet-4-20250514-v1:0'
bedrock = boto3.client('bedrock-runtime', region_name=REGION_NAME)

In [None]:
# 获取 URL 内容并转换为 Markdown
def url_to_markdown(url):
    response = requests.get(url)
    html = response.text
    markdown = md(html)
    return markdown

# 保存到文件
def save_to_file(content, filename='tabs_output.md'):
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(content)

In [None]:
# 将 Markdown 按非空行拆分为 block
def split_markdown_into_blocks(markdown_content):
    blocks = []
    current_block = []
    for line in markdown_content.splitlines():
        if line.strip():
            current_block.append(line)
        else:
            if current_block:
                blocks.append('\n'.join(current_block))
                current_block = []
    if current_block:
        blocks.append('\n'.join(current_block))
    return blocks

# 使用 Claude Sonnet 4 生成英中日短语标题
def generate_block_titles(block):
    prompt = f"""
Summarize the following Markdown block into a short English phrase, a short Chinese phrase, and a short Japanese phrase, separated by '|':
{block}
"""
    request_body = {
        'messages': [
            {'role': 'user', 'content': prompt}
        ],
        'max_tokens': 1024,
        'anthropic_version': 'bedrock-2023-05-31'
    }
    response = bedrock.invoke_model(
        modelId=MODEL_ID,
        body=json.dumps(request_body).encode('utf-8'),
        contentType='application/json',
        accept='application/json'
    )
    result_json = json.loads(response['body'].read())
    result = result_json.get("content")[0]['text'].strip()
    english, chinese, japanese = [x.strip() for x in result.split('|')]
    return english, chinese, japanese

# 翻译 Markdown block
def translate_block(block, language='zh'):
    prompt = f"""
Translate the following Markdown content into {language}:
{block}
"""
    request_body = {
        'messages': [
            {'role': 'user', 'content': prompt}
        ],
        'max_tokens': 1024,
        'anthropic_version': 'bedrock-2023-05-31'
    }
    response = bedrock.invoke_model(
        modelId=MODEL_ID,
        body=json.dumps(request_body).encode('utf-8'),
        contentType='application/json',
        accept='application/json'
    )
    result_json = json.loads(response['body'].read())
    return result_json.get("content")[0]['text'].strip()

# 生成多语言 tabs，每个 block 用 {% tabs %} 包裹
def generate_tabs(markdown_content):
    blocks = split_markdown_into_blocks(markdown_content)

    for block in blocks:
        english, chinese, japanese = generate_block_titles(block)
        block_tabs = [f'{{% tabs {english} %}}']

        # English tab
        block_tabs.append(f'<!-- tab {english} -->\n{block}\n<!-- endtab -->')

        # Chinese tab
        zh_translation = translate_block(block, 'Chinese')
        block_tabs.append(f'\n<!-- tab {chinese} -->\n{zh_translation}\n<!-- endtab -->')

        # Japanese tab
        ja_translation = translate_block(block, 'Japanese')
        block_tabs.append(f'\n<!-- tab {japanese} -->\n{ja_translation}\n<!-- endtab -->')

        block_tabs.append('{% endtabs %}')

        tabs_block = '\n'.join(block_tabs)
        yield tabs_block

In [None]:
import requests
import json

OLLAMA_API_URL = "http://mac.dtype.info:11434/api/chat"
MODEL_ID = "gpt-oss:20b"

# 使用 gpt-oss:20b 生成英中日短语标题
def generate_block_titles(block):
    prompt = f"""
You are a helper that only outputs results without any explanation.
Summarize the following Markdown block into:
- One short English phrase
- One short Chinese phrase
- One short Japanese phrase

Output them directly in the format:
English | 中文 | 日本語

Markdown block:
{block}
"""
    request_body = {
        "model": MODEL_ID,
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "stream": False
    }
    response = requests.post(OLLAMA_API_URL, json=request_body)
    result_json = response.json()
    result = result_json["message"]["content"].strip()
    english, chinese, japanese = [x.strip() for x in result.split('|')]
    return english, chinese, japanese

# 翻译 Markdown block
def translate_block(block, language='zh'):
    prompt = f"""
You are a translator that only outputs the translated text without any explanation.
Translate the following Markdown content into {language}.
Directly return the translated Markdown without extra notes.

Markdown block:
{block}
"""
    request_body = {
        "model": MODEL_ID,
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "stream": False
    }
    response = requests.post(OLLAMA_API_URL, json=request_body)
    result_json = response.json()
    return result_json["message"]["content"].strip()

In [None]:
url = 'https://llds.ling-phil.ox.ac.uk/llds/xmlui/bitstream/handle/20.500.14106/A89026/A89026.html'
markdown_content = url_to_markdown(url)
save_to_file(markdown_content, 'A89026.md')
print('Saved to A89026.md')

In [None]:
with open('A89026.md', 'r', encoding='utf-8') as f:
    markdown_content = f.read()

In [None]:
for tabs_block in generate_tabs(markdown_content):
    with open('A89026-tabs.md', 'a', encoding='utf-8') as f:
        f.write(tabs_block + '\n\n')
    print('Saved to A89026-tabs.md')
print('Finished.')

Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-tabs.md
Saved to A89026-