In [None]:
ARXIV_ID = "2412.09737v1"

In [8]:
import sys
sys.path.append("/repos/arxiv-translator/src/arxiv_translator/*")
sys.dont_write_bytecode = True

from arxiv_translator.file_utils import download_arxiv_source, unfreeze_targz, copy_item, copy_pdf_file
from arxiv_translator.openai_chat import OpenAIChat
from arxiv_translator.tex_compiler import find_tex_files, find_main_tex, compile_tex
from arxiv_translator.tex_translator_utils import split_tex_to_chunks, insert_after_documentclass, extract_quoted_text

import yaml
from tqdm import tqdm
import concurrent.futures

### 0. ダウンロード

In [None]:
download_arxiv_source(arxiv_id=ARXIV_ID, output_dir="/data/0_tar_gz_data/")

### 1. tarの解凍

In [None]:
unfreeze_targz(targz_path=f"/data/0_tar_gz_data/arxiv-{ARXIV_ID}.tar.gz", output_dir="/data/1_raw_data/")

### 2. 作業

##### 作業場所へのコピー

In [None]:
copy_item(src=f"/data/1_raw_data/arxiv-{ARXIV_ID}", dst=f"/data/2_working_data/arxiv-{ARXIV_ID}", overwrite=True)

#### 書き換え処理

#### 準備

In [None]:
with open("/config/configs.yml", "r", encoding="utf-8") as file:
    data = yaml.safe_load(file)

inserting_pre_text = data['tex']['pre_text'].replace("\\", "\\\\")
template = data["prompt"]["translate"]["en_to_ja"]

with open("/config/api_keys.yml", "r", encoding="utf-8") as file:
    data = yaml.safe_load(file)

api_key = data["OPENAI_API_KEY"]

In [None]:
translator = OpenAIChat(api_key=api_key, model="gpt-4o", template=template)

#### 日本語化パッケージの追加

In [None]:
main_tex_path=find_main_tex(source_dir=f"/data/2_working_data/arxiv-{ARXIV_ID}")

with open(main_tex_path, 'r', encoding='utf-8') as file:
    print(main_tex_path)
    main_tex_contents = file.read()
    main_tex_contents = insert_after_documentclass(contents=main_tex_contents, inserting_pre_text=inserting_pre_text)
    
with open(main_tex_path, 'w', encoding='utf-8') as file:
    file.write(main_tex_contents)

#### テキスト分割

In [None]:
file_paths = find_tex_files(source_dir=f"/data/2_working_data/arxiv-{ARXIV_ID}")

for file_path in file_paths:
    with open(file_path, 'r', encoding='utf-8') as file:
        tex_contents = file.read()
    # テキスト分割
    tex_chunks = split_tex_to_chunks(contents=tex_contents, token_counter=translator.count_tokens)
    # 翻訳
    translated_chunks=[]
    for tex_chunk in tqdm(tex_chunks):
        translated_chunk = translator(tex_chunk)
        translated_chunk = extract_quoted_text(translated_chunk)
        translated_chunks.append(translated_chunk)
    translated_tex_contents = "".join(translated_chunks)
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(translated_tex_contents)

In [6]:
from pathlib import Path
output_path = "/data/"
print(Path(output_path))

/data


#### コンパイル

In [None]:
compile_tex(source_file_path=main_tex_path)

In [11]:
ARXIV_ID = "2412.09871"
main_tex_path=find_main_tex(source_dir=f"/data/2_working_data/arxiv-{ARXIV_ID}")
compile_tex(source_file_path=main_tex_path)
copy_pdf_file(f"/data/2_working_data/arxiv-{ARXIV_ID}", f"/data/3_output_data/{ARXIV_ID}_ja.pdf")

CalledProcessError: Command '['latexmk', '-lualatex', '-interaction=nonstopmode', '/data/2_working_data/arxiv-2412.09871/paper.tex']' returned non-zero exit status 12.

### 3. 結果

In [None]:
copy_pdf_file(f"/data/2_working_data/arxiv-{ARXIV_ID}", f"/data/3_output_data/{ARXIV_ID}_ja.pdf")