In [1]:
from pathlib import Path

def list_entries_recusive(path: Path):
    entries = []
    for entry in path.iterdir():
        entries.append(entry)
        if entry.is_dir():
            entries.extend(list_entries_recusive(entry))
    return entries


In [None]:
# copy files to another direcotry
data_dir = Path('data')
version = '1.0.3'
en_localization = data_dir / version / 'Localization' / 'en'

copy_to_dir = data_dir / version / 'Localization' / 'ja'

entries = list_entries_recusive(en_localization)
entries

In [3]:
len(entries)

12

In [None]:
assert 1==2, 'comment out this line to rerun copy process'

fname_exceptions = []

for entry in entries:
    if entry.is_dir():
        continue
    relative_path = entry.relative_to(en_localization)

    # rename filename '*_en.*' to '*_ja.*'
    if entry.stem.endswith('_en'):
        target_path = copy_to_dir / relative_path.with_stem(entry.stem[:-3] + '_ja')
    else:
        target_path = copy_to_dir / relative_path
        fname_exceptions.append(target_path)
        print(f"not '*_en.*' pattern: {entry} -> {target_path}")

    target_path.parent.mkdir(parents=True, exist_ok=True)
    # entry.rename(target_path)
    entry.copy(target_path)
    print(f'copy: {entry} -> {target_path}')

In [None]:
fname_exceptions

In [4]:
ja_entries = list(list_entries_recusive(copy_to_dir))
len(ja_entries)

486

In [None]:
[
    e for e in ja_entries
    if e.suffix != ".loc"
]

In [7]:
from pptx_docx_translator import TranslationService

translator = TranslationService()

In [8]:
def read_data_to_dict(data: str) -> dict:
    """
    Read data from a string format into a dictionary.

    Args:
        data (str): The input data in the specified format.

    Returns:
        dict: A dictionary representation of the input data.
    """
    result = {}
    lines = data.strip().splitlines()

    for line in lines:
        if line.strip():  # Check for non-empty lines
            key, value = line.split(' ', 1)  # Split into key and value
            key = key.strip()  # Clean up the key
            value = value.strip().strip('{} ')  # Clean up the value
            result[key] = value  # Add to dictionary

    return result

def write_dict_to_data(data_dict: dict) -> str:
    """
    Write a dictionary back to the specified string format.

    Args:
        data_dict (dict): The dictionary to write.

    Returns:
        str: A string representation of the dictionary in the specified format.
    """
    lines = []

    for key, value in data_dict.items():
        lines.append(f"{key} {{ {value} }}")  # Format each line

    return "\n\n".join(lines)  # Join all lines into a single string

In [None]:
skip_until = 0
for idx, file in enumerate(ja_entries):
    if idx < skip_until:
        print(f"Skipping file {idx}/{len(ja_entries)}: {file}")
        continue

    # skip directories and the UserReport file (it contains multi line strings as value which are not supported yet)
    if file.is_dir() or str(file) == "data/202410/ja/GUI/UserReport_ja.loc":
        continue

    print(f"Processing file {idx}/{len(ja_entries)}: {file}")

    # get the original data
    original_kv = read_data_to_dict(file.read_text())

    # sniff the writing style
    writing_style = translator.sniff_writing_style(list(original_kv.values()))
    print(writing_style)

    # translate the data
    translated_kv = translator.run_dict(original_kv, writing_style=writing_style)
    text_data = write_dict_to_data(translated_kv)
    print(text_data)
    file.write_text(text_data)
