In [1]:
from pathlib import Path
from helpers import read_data_to_dict
from helpers import list_entries_recusive

previous_ver = "1.0.4"
new_ver = "1.0.4.2"

previous_ver_en = Path("data") / previous_ver / "Localization" / "en"
previous_ver_ja = Path("data") / previous_ver / "Localization" / "ja"
new_en_dir = Path("data") / new_ver / "Localization" / "en"


def read_localizatoin_dir_to_dict(localization_dir_path: Path) -> tuple[dict, list]:
    """Read localization directory to dictionary

    Args:
        localization_dir_path (Path): Path to localization directory

    Returns:
        tuple[dict, list]: Tuple of dictionary and list of exceptions

        Dictionary like {"path/to/file": {"game text key": "game text"}, ...}
    """
    entries = list_entries_recusive(localization_dir_path)
    result = {}
    exceptions = []

    for entry in entries:
        if entry.is_dir() or not entry.name.endswith(".loc"):
            print(f"Skipping {entry}")
            continue

        key = entry.relative_to(localization_dir_path).as_posix()
        value = entry.read_text()

        try:
            value_dict = read_data_to_dict(value)
            result[key] = value_dict
        except Exception:
            exceptions.append(key)

    return result, exceptions


previous_ja_dict, previous_ja_exceptions = read_localizatoin_dir_to_dict(previous_ver_ja)
previous_en_dict, previous_en_exceptions = read_localizatoin_dir_to_dict(previous_ver_en)
new_en_dict, new_en_exceptions = read_localizatoin_dir_to_dict(new_en_dir)
new_ja_dict = previous_ja_dict.copy()


Skipping data/1.0.4/Localization/ja/Journal
Skipping data/1.0.4/Localization/ja/Dialogues
Skipping data/1.0.4/Localization/ja/Letters
Skipping data/1.0.4/Localization/ja/Dialog
Skipping data/1.0.4/Localization/ja/Dialog/Footprint
Skipping data/1.0.4/Localization/ja/Dialog/Perceptible
Skipping data/1.0.4/Localization/ja/Dialog/Objects
Skipping data/1.0.4/Localization/ja/Dialog/Corpses
Skipping data/1.0.4/Localization/ja/Dialogue
Skipping data/1.0.4/Localization/ja/NPC_Names
Skipping data/1.0.4/Localization/ja/GUI
Skipping data/1.0.4/Localization/ja/HUD
Skipping data/1.0.4/Localization/en/Journal
Skipping data/1.0.4/Localization/en/Dialogues
Skipping data/1.0.4/Localization/en/Letters
Skipping data/1.0.4/Localization/en/Dialog
Skipping data/1.0.4/Localization/en/Dialog/Footprint
Skipping data/1.0.4/Localization/en/Dialog/Perceptible
Skipping data/1.0.4/Localization/en/Dialog/Objects
Skipping data/1.0.4/Localization/en/Dialog/Corpses
Skipping data/1.0.4/Localization/en/Dialogue
Skipping d

In [2]:
game_strings = ""
for k,v in previous_ja_dict.items():
    for kk, vv in v.items():
        game_strings += vv

len(game_strings)


625527

In [3]:
previous_en_exceptions, new_en_exceptions

(['GUI/UserReport_en.loc'], ['GUI/UserReport_en.loc'])

In [4]:
def diff_dict_key_values(old: dict, new: dict) -> dict:
    difference = {
        "key_added": [],
        "key_removed": [],
        "value_updated": [],
    }
    for key, value in new.items():
        if key not in old:
            difference["key_added"].append(key)
        else:
            predict_value = old[key]
            if value != predict_value:
                difference["value_updated"].append(key)

    for key in old.keys():
        if key not in new:
            difference["key_removed"].append(key)

    return difference


# test
predict = {
    "a": "fooooo",
    "b": "removed!",
    "d": "removed!!",
}
actual = {
    "a": "foo, change!",
    "c": "new!",
}
res = diff_dict_key_values(predict, actual)
print(res)




def has_diff(old: dict, new: dict) -> bool:
    return diff_dict_key_values(old, new) != {"key_added": [], "key_removed": [], "value_updated": []}


# test
print(has_diff(predict, actual))
print(has_diff(predict, predict))

{'key_added': ['c'], 'key_removed': ['b', 'd'], 'value_updated': ['a']}
True
False


In [16]:
import difflib

def print_diff_hl(ground_truth, target):
    """
    Print diff result with colorful highlight
    """
    color_dic = {
        'red': '\033[31m',
        'green': '\033[32m',
        'end': '\033[0m'
    }

    d = difflib.Differ()
    diffs = list(d.compare(ground_truth, target))

    ground_truth_result = ''
    target_result = ''

    for diff in diffs:
        status = diff[0]
        character = diff[2]

        if status == '-':
            # ground truth の削除箇所を赤で表示
            ground_truth_result += color_dic['red'] + character + color_dic['end']
            target_result += ' '  # 削除部分はtargetに表示しない
        elif status == '+':
            # target の追加箇所を緑で表示
            ground_truth_result += ' '  # 追加部分はground truthに表示しない
            target_result += color_dic['green'] + character + color_dic['end']
        else:
            # 共通部分をそのまま表示
            ground_truth_result += character
            target_result += character

    print(f"ground truth : {ground_truth_result}")
    print(f"target       : {target_result}")

# テスト
ground_truth = "abcde"
target = "a ydef"
print_diff_hl(ground_truth, target)

ground truth : a[31mb[0m[31mc[0m  de 
target       : a  [32m [0m[32my[0mde[32mf[0m


In [14]:
# Search Differences
new_files = []
differences = {}


for new_file_key, new_file_content in new_en_dict.items():
    if new_file_key not in previous_en_dict.keys():
        print(f"New file found: {new_file_key}")
        new_files.append(new_file_key)
    else:
        previous_file_content = previous_en_dict[new_file_key]

        diff = diff_dict_key_values(previous_file_content, new_file_content)
        if diff == {"key_added": [], "key_removed": [], "value_updated": []}:
            # print(f"No diff found in {new_file}")
            continue
        else:
            print(f"Diff found in {new_file_key}")
            differences[new_file_key] = diff


# print results
print()
print("### New files\n", new_files)
print()
print("### Differences\n")
for new_file_key, diff in differences.items():
    print(f"{new_file_key}")

    if diff["key_added"]:
        print("Added keys\n", diff["key_added"], "\n")
    if diff["key_removed"]:
        print("Removed keys\n", diff["key_removed"], "\n")
    if diff["value_updated"]:
        print("Updated values\n")
        for key in diff["value_updated"]:
            print(f"--- Key: {key} ---")
            print_diff_hl(previous_en_dict[new_file_key][key], new_en_dict[new_file_key][key])
    print()


Diff found in Letters/Table_Brutus_en.loc
Diff found in Dialog/NPC_Roarke_en.loc

### New files
 []

### Differences

Letters/Table_Brutus_en.loc
Updated values

--- Key: Letter_1 ---
ground truth : Attempt number seven was... succes ful?<br>Drawing on the primal forces of the crystal, I was able to withstand the intense heat inside the vessel.<br><br>I found myself in a surreal world and met the spirits of nature.<br>I became nauseated. This may sound strange, but it was as though I could feel their mistrust in my veins.<br><br>How can I prove to them that I deserve their trust...?<br>Page 4
target       : Attempt number seven was... succes[32ms[0mful?<br>Drawing on the primal forces of the crystal, I was able to withstand the intense heat inside the vessel.<br><br>I found myself in a surreal world and met the spirits of nature.<br>I became nauseated. This may sound strange, but it was as though I could feel their mistrust in my veins.<br><br>How can I prove to them that I deserve t

In [17]:

from typing import Optional
from llm_utils import get_llm_response

def get_noun_dictionary_prompt():
    """
    Get noun dictionary
    """
    dictionary = {
        "Georgefarm": "ジョージファーム",
        "apple grove": "リンゴ園",
        "Nemeton": "ネメトン",
        "Morbid Moor": "病み沼",
        "Red Moor": "レッドムーア",
        "Bygones": "古き者",
        "Mighty Boar": "マイティボア",
        "Hewers": "鉱夫",
        "Remnant's Camp": "はぐれ者のキャンプ",
        "Remnant": "はぐれ者",
        "Molvina": "モルヴィナ",
        "Pit": "ピット",
        "Heath": "荒れ地",
        "Rustmoss": "ラストモス",
        "Rustroot": "ラストルート",
        "Georg Farm": "ゲオルグ農場",
        "Headless Ripper Inn": "首なしリッパー宿屋",
        "the Marked Ones": "印の一団",
        "Proximus": "プロキシマス",
        "Blades of Jero ": "ジェロの刃",
        "Apple grove" : "リンゴ園",
    }

    prompt = f"""
翻訳時には、以下の固有名詞については、変換先に従って翻訳を行ってください。
{"\n".join([f"{key} => {value}" for key, value in dictionary.items()])}
"""
    return prompt

print(get_noun_dictionary_prompt())



翻訳時には、以下の固有名詞については、変換先に従って翻訳を行ってください。
Georgefarm => ジョージファーム
apple grove => リンゴ園
Nemeton => ネメトン
Morbid Moor => 病み沼
Red Moor => レッドムーア
Bygones => 古き者
Mighty Boar => マイティボア
Hewers => 鉱夫
Remnant's Camp => はぐれ者のキャンプ
Remnant => はぐれ者
Molvina => モルヴィナ
Pit => ピット
Heath => 荒れ地
Rustmoss => ラストモス
Rustroot => ラストルート
Georg Farm => ゲオルグ農場
Headless Ripper Inn => 首なしリッパー宿屋
the Marked Ones => 印の一団
Proximus => プロキシマス
Blades of Jero  => ジェロの刃
Apple grove => リンゴ園



In [None]:
def update_translation(source_sentence:str, translated_source_sentence: str, updated_source_sentence: str, context: Optional[str] = None) -> str:
    """
    Update translation with context and previsous translation
    """
    system_prompt = """You are a world class English-Japanese translator."""
    user_prompt = f"""これは古風なRPGにおけるテキストです。元文が更新されたため、翻訳文を更新したいです。
    「更新前の元文」と「更新前の翻訳文」を参考にして、その翻訳の語感・雰囲気などを維持したまま、内容を更新してください。
    また、翻訳文の品質を向上させるため、翻訳文の修正も行っていただいて構いません。
    口調はですます調は使わず、タメ口でお願いします。
    No Copy & Paste, No 1:1 Translation, No Literal Translation, No Transliteration.
    Don't forget to keep the tone and atmosphere of the translation consistent with the original text.
    Only output the translation text. Don't output explanation or the source text, etc.
    あなたの翻訳力とセンスを信じています。

    { "# Context / Background" if context else ""}
    { context if context else ""}

    # 更新前の元文
    {source_sentence}

    # 更新前の翻訳文:
    {translated_source_sentence}

    # 更新後の元文
    {updated_source_sentence}
    """
    updated_translation = get_llm_response(
        "gpt-4o-mini",
        messages=[
            { "role": "system", "content": system_prompt },
            { "role": "user", "content": user_prompt },
        ],
        params_={
            "temperature": 0,
        }

    )
    print("Original translation:", translated_source_sentence)
    print("Updated translation :", updated_translation)
    return updated_translation

# test
update_translation(
    "Rura wants me to take Muc to her. He's supposed to help with the work. Well, he's got to be somewhere.",
    "ルーラは私にムクを彼女のところに連れて行ってほしいと言っています。彼は仕事を手伝うことになっています。さて、彼はどこかにいるはずです。",
    "Rura wants me to take Muc to her. He's supposed to help with the work. Rura said that he actually wanted to go to Georgefarm. But it's also possible that he's already finished there and is perhaps lazing around in the apple grove east of Nemeton. I should have a look around.",
    context=get_noun_dictionary_prompt()
)


Original translation: ルーラは私にムクを彼女のところに連れて行ってほしいと言っています。彼は仕事を手伝うことになっています。さて、彼はどこかにいるはずです。
Updated translation : ルーラはムクを彼女のところに連れて行ってほしいって言ってる。彼は仕事を手伝うことになってるんだ。ルーラによると、実はジョージファームに行きたがってたみたい。でも、もうそこでの仕事を終えて、ネメトンの東にあるリンゴ園でのんびりしてる可能性もある。ちょっと周りを見てみるべきだな。


'ルーラはムクを彼女のところに連れて行ってほしいって言ってる。彼は仕事を手伝うことになってるんだ。ルーラによると、実はジョージファームに行きたがってたみたい。でも、もうそこでの仕事を終えて、ネメトンの東にあるリンゴ園でのんびりしてる可能性もある。ちょっと周りを見てみるべきだな。'

In [107]:
def translate(source_sentence:str, context: Optional[str] = None, writing_style: str="") -> str:
    """
    Update translation with context and previsous translation
    """
    system_prompt = """You are a world class English-Japanese translator."""
    user_prompt = f"""これは古風なRPGにおけるテキストです。元文を日本語に翻訳してください。
    {writing_style}
    注意事項や背景情報は以下に記載しています。参考にして翻訳してください。
    No Copy & Paste, No 1:1 Translation, No Literal Translation, No Transliteration.
    Don't forget to keep the tone and atmosphere of the translation consistent with the original text.
    Only output the translation text. Don't output explanation or the source text, etc.
    あなたの翻訳力とセンスを信じています。

    { "# Context / Background" if context else ""}
    { context if context else ""}

    # 元文
    {source_sentence}
    """
    updated_translation = get_llm_response(
        "gpt-4o-mini",
        messages=[
            { "role": "system", "content": system_prompt },
            { "role": "user", "content": user_prompt },
        ],
        params_={
            "temperature": 0,
        }

    )
    print("Updated translation :", updated_translation)
    return updated_translation

translate(
    "Rura wants me to take Muc to her. He's supposed to help with the work. Rura said that he actually wanted to go to Georgefarm. But it's also possible that he's already finished there and is perhaps lazing around in the apple grove east of Nemeton. I should have a look around.",
    writing_style="口調はですます調は使わず、タメ口でお願いします。",
    context=get_noun_dictionary_prompt()
)

Updated translation : ルーラがムックを連れて行ってほしいって。彼は仕事を手伝うことになってるんだ。ルーラは、実は彼がジョージファームに行きたがってたって言ってたけど、もうそこでの仕事は終わってて、ネメトンの東にあるリンゴ園でのんびりしてる可能性もあるな。ちょっと周りを見てみるか。


'ルーラがムックを連れて行ってほしいって。彼は仕事を手伝うことになってるんだ。ルーラは、実は彼がジョージファームに行きたがってたって言ってたけど、もうそこでの仕事は終わってて、ネメトンの東にあるリンゴ園でのんびりしてる可能性もあるな。ちょっと周りを見てみるか。'

In [112]:
from helpers import write_dict_to_data
from helpers_llm import sniff_persona

print("# Show added translation keys")
for file, diff_dict in differences.items():

    npc_persona = None

    if diff_dict["key_added"]:
        for key in diff_dict["key_added"]:
            print(f"{key}")

            # TODO: create writing style class and use it from translation service
            writing_style = ""
            if "Journal" in str(file):
                writing_style = "口調はですます調でお願いします。"
            elif "NPC_" in str(file) and "Dialog" in str(file):
                if not npc_persona:
                    npc_name = Path(file).stem.split("_")[1]
                    print(f"NPC name: {npc_name}")
                    npc_persona = sniff_persona(
                        name = npc_name,
                        sentences=list(new_en_dict[file].values())
                    )
                    if npc_persona:
                        writing_style += f"""翻訳する文章はNPCのセリフです。次のNPCのペルソナに沿って翻訳文の雰囲気を決めてください: \n{npc_persona}"""
                else:
                    writing_style = npc_persona

            ja_file_key = file.replace("_en.", "_ja.")

            updated_traslation = translate(
                new_en_dict[file][key],
                context=get_noun_dictionary_prompt(),
                writing_style=writing_style
            )

            new_ja_dict[ja_file_key][key] = updated_traslation
            ja_file_path_obj = Path("data") / new_ver / "Localization" / "ja" / ja_file_key
            ja_file_path_obj.write_text(
                write_dict_to_data(new_ja_dict[ja_file_key])
            )

print("Done")

# Key Added
Entry_03_Cancel
Updated translation : 私はロボを叩きのめしましたので、彼はもう私と関わりたくないようです。
Plh_70
NPC name: Lobo
Updated translation : いじめっ子とは関わりたくないね。
Plh_183
NPC name: Rura
Updated translation : あのクソったれのムクがまたリンゴ園でゴロゴロしていないことを願うがな… *自分に言い聞かせるように*
Plh_231
NPC name: Bady
Updated translation : 斧を手に入れるまでは戦えぬ。今はただ、訓練に励ませてくれ。
Plh_232
Updated translation : 食事を摂るまでは戦うことはできない。それまでは、訓練をさせてくれ。
Plh_358
NPC name: Agilo
Updated translation : ゲロもはぐれ者のキャンプにいる。そこで彼に会ったんだ。
Plh_170
Updated translation : 傷んだ石の周りでは、沼がリンゴ園を再生しようとしているようだ。
Updated translation : アイアンモードを終了してもよろしいですか？もしそうする場合、ゲームの進行中にこのモードを再度有効にすることはできません。


In [103]:
from helpers import write_dict_to_data

assert 1==2, "comment out this line to regenerate translation"

print("# Value Updated")
for file, diff_dict in differences.items():
    if diff_dict["value_updated"]:
        print("------")
        print(f"{file.replace("_en.", "_ja.")}")
        for key in diff_dict["value_updated"]:
            print(f"{key}")
            print_diff_hl(previous_en_dict[file][key], new_en_dict[file][key])

            ja_file_key = file.replace("_en.", "_ja.")
            updated_traslation = update_translation(
                previous_en_dict[file][key],
                previous_ja_dict[ja_file_key][key],
                new_en_dict[file][key],
                context=get_noun_dictionary_prompt()
            )
            new_ja_dict[ja_file_key][key] = updated_traslation

            ja_file_path_obj = Path("data") / new_ver / "Localization" / "ja" / ja_file_key
            ja_file_path_obj.write_text(
                write_dict_to_data(new_ja_dict[ja_file_key])
            )

        print()

print("Done")

# Value Updated
------
Journal/WayIntoTheCraftsHouse_ja.loc
Entry_01
ground truth : Rura wants me to take Muc to her. He's supposed to help with the work.     [31mW[0m[31me[0m[31ml[0m[31ml[0m[31m,[0m       h    e                                            's [31mg[0m   o[31mt[0m [31mt[0m o[31m [0m   b e         s                                                o                                 me   [31mw[0m[31mh[0m[31me[0m[31mr[0m[31me[0m.                             
target       : Rura wants me to take Muc to her. He's supposed to help with the work. [32mR[0m[32mu[0m[32mr[0m[32ma[0m      [32ms[0m[32ma[0m[32mi[0m[32md[0m[32m [0m[32mt[0mh[32ma[0m[32mt[0m[32m [0m[32mh[0me[32m [0m[32ma[0m[32mc[0m[32mt[0m[32mu[0m[32ma[0m[32ml[0m[32ml[0m[32my[0m[32m [0m[32mw[0m[32ma[0m[32mn[0m[32mt[0m[32me[0m[32md[0m[32m [0m[32mt[0m[32mo[0m[32m [0m[32mg[0m[32mo[0m[32m [0m[32mt[0m[32mo[0m[32m [0m[32mG

In [74]:
print("--- Key Removed ---")

for file, diff_dict in differences.items():
    if diff_dict["key_removed"]:
        print(f"{file.replace("_en.", "_ja.")}")
        for key in diff_dict["key_removed"]:
            print(f"{key}")
            # TODO: remove from ja_dict and save
        print()

print('-- done ---')

--- Key Removed ---
-- done ---
