In [1]:
from pathlib import Path
from dataclasses import dataclass
import re
import nbformat

In [53]:
folder = Path('scipybook')
filename = "numpy-tmp"

In [54]:
cells = []

with open(folder / f'{filename}.ipynb', 'r', encoding="utf-8") as f:
    notebook = nbformat.read(f, as_version=4)

for cell in notebook.cells:
    if cell.cell_type == 'code':
        cells.append(("code", cell.source))
    elif cell.cell_type == 'markdown':
        cells.append(("markdown", cell.source))
    else:
        print(f"Other Cell Type:\n{cell.source}\n")

res = []
for i, (type, text) in enumerate(cells):
    res.append(f"$$${i}-{type}\n{text}")

In [55]:
def get_unused_filename(folder, filename):
    for i in range(100):
        fn = folder / f"{filename}_{i:02d}.txt"
        if not fn.exists():
            return fn

In [56]:
from openai import OpenAI

text = "\n".join(res[:])
# print(text)

key = "sk-9c886ac68738454185d2cd5966c8cf28"

client = OpenAI(api_key=key, base_url="https://api.deepseek.com")

request = """
You are a helpful Chinese to Japanese translator. 
Translate the text from Chinese into Japanese. 
Please use 丁寧語. 
Keep the original text format. Keep the line starts with $$$ unchanged.
don't change the source code, only translate the comments in it.
Do not add any unrelated words or comments to the translation.
"""

response = client.chat.completions.create(
    model="deepseek-chat",
    messages=[
        {"role": "system", "content": request},
        {"role": "user", "content": text},
    ],
    stream=False
)

result = response.choices[0].message.content

last_trans_id = int(re.findall(r"\$\$\$(\d+)", result)[-1])
last_id = int(re.findall(r"\$\$\$(\d+)", text)[-1])
print(last_id, last_trans_id)

with open(get_unused_filename(folder, filename), "w", encoding="utf-8") as f:
    f.write(result)

14 14


In [57]:
@dataclass
class Cell:
    type: str
    content: list

    @property
    def text(self):
        return "".join(self.content).rstrip()

trans_cells = {}

cell = None
for fn in folder.glob(f"{filename}*.txt"):
    with open(fn, encoding="utf-8") as f:
        for line in f:
            if line.startswith("$$$"):
                num, type = line.strip()[3:].split("-")
                cell = Cell(type=type, content=[])
                trans_cells[int(num)] = cell
            else:
                if cell is not None:
                    cell.content.append(line)

with open(folder / f"{filename}.ipynb", 'r', encoding="utf-8") as f:
    notebook = nbformat.read(f, as_version=4)

for i, cell in enumerate(notebook.cells):
    cell.source = trans_cells[i].text

nbformat.write(notebook, folder / f"{filename}-jp.ipynb")