In [42]:
import xml.etree.ElementTree as ET
import os
import openai
import json
from tqdm import tqdm
from pydantic import BaseModel
from html.parser import HTMLParser

In [54]:
# target_locale='es'
target_locale='zh_CN'

In [55]:
# Read my API key
with open('/Users/pauly/.private/openai_api_key_itksnap','rt') as f:
    os.environ['OPENAI_API_KEY'] = f.read().strip()

In [56]:
client = openai.OpenAI()

In [57]:
class SingleTranslation(BaseModel):
    original: str
    translated: str

class TranslationResponse(BaseModel):
    translations: list[SingleTranslation]

def translate(client, locale:str, input: list[str]):
    response = client.responses.parse(
      prompt={
        "id": "pmpt_686e17744b3c8190a634009a15a02eeb0258750c14426f72",
        "version": "7",
        "variables": {
          "target_locale": locale
        }
      },
      input=json.dumps(input, ensure_ascii=False),
      text_format = TranslationResponse
    )
    return response.output_parsed.translations

In [58]:
# Extractor for HTML strings - only retains stuff that's not tags
class HTMLDataExtractor(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self.all_data = []
        
    def handle_starttag(self, tag, attrs):
        pass

    def handle_endtag(self, tag):
        pass

    def handle_data(self, data):
        string = data.strip()
        if len(string):
            self.all_data.append(string)

parser = HTMLDataExtractor()
parser.feed('<html><head/><body><p><span style=" font-weight:600;">Zoom Inspector</span></p><p>Control zoom factor in all slice views. </p></body></html>')
parser.all_data

['Zoom Inspector', 'Control zoom factor in all slice views.']

In [59]:
translate(client, target_locale, parser.all_data)

[SingleTranslation(original='Zoom Inspector', translated='缩放检查器'),
 SingleTranslation(original='Control zoom factor in all slice views.', translated='控制所有切片视图的缩放比例。')]

In [60]:
# Initial parsing of the contexts and text elements
tree = ET.parse(f'/Users/pauly/tk/itksnap/itksnap/GUI/Qt/Translations/itksnap_{target_locale}.ts')
root = tree.getroot()

all_con = []
for context in root.findall('context'):
    all_tran = []
    for message in context.findall('message'):
        source = message.find('source')
        translation = message.find('translation')
        if translation.attrib.get('type') != 'unfinished':
            continue
        if 'qrc:/' in source.text:
            continue
        htmlextr = HTMLDataExtractor()
        htmlextr.feed(source.text)
        all_tran.append({'message':message, 'text':source.text, 'strings': htmlextr.all_data})
    if len(all_tran) > 0:
        all_con.append({'context': context, 'tran': all_tran})

In [61]:
# For each context, perform the translation
for con in all_con:
    all_strings = [x for xs in con['tran'] for x in xs['strings']]
    if len(all_strings):
        print(f"Translating {con['context'].find('name').text}")
        con['gpt'] = translate(client, target_locale, all_strings)

Translating CursorInspector
Translating FileChooserPanelWithHistory
Translating ImageIOWizard
Translating MainImageWindow
Translating QObject
Translating imageiowiz::SelectFilePage


In [62]:
# Translator for HTML strings - only retains stuff that's not tags
class HTMLDataTranslator(HTMLParser):
    def __init__(self, token_list, token_index):
        HTMLParser.__init__(self)
        self.all_data = []
        self.token_list = token_list
        self.token_index = token_index
        self.result = ''
        self.success = True
        
    def handle_starttag(self, tag, attrs):
        attr_str = ''.join(f' {k}="{v}"' for k, v in attrs)
        self.result += f"<{tag}{attr_str}>"

    def handle_endtag(self, tag):
        self.result += f"</{tag}>"

    def handle_data(self, data):
        string = data.strip()
        if len(string):
            if self.token_index >= len(self.token_list):
                print(f'Replacement list size mismatch')
                self.success = False
                self.result += data
            else:
                t = self.token_list[self.token_index]
                if t.original != string:
                    print(f'Mismatch, original f{t.original} != f{string}')
                    self.success = False
                    self.result += data
                else:
                    self.result += data.replace(t.original, t.translated)
                self.token_index += 1

In [63]:
for con in all_con:
    gpt = con.get('gpt')
    if gpt is None:
        continue
    gpt_index = 0
    print(f"Updating {con['context'].find('name').text}")
    for tran in con['tran']:
        message = tran['message']
        source = message.find('source')
        translation = message.find('translation')
        #if translation.attrib.get('type') != 'unfinished':
        #    continue
        if 'qrc:/' in source.text:
            continue
        htmltran = HTMLDataTranslator(gpt, gpt_index)
        htmltran.feed(source.text)
        gpt_index = htmltran.token_index
        if htmltran.success:
            translation.text = htmltran.result
            translation.attrib.pop('type', None)

Updating CursorInspector
Updating FileChooserPanelWithHistory
Updating ImageIOWizard
Updating MainImageWindow
Updating QObject
Updating imageiowiz::SelectFilePage


In [64]:
tree.write(f'/Users/pauly/tk/itksnap/itksnap/GUI/Qt/Translations/itksnap_{target_locale}.ts', encoding='utf-8')