In [2]:
from google.cloud import translate

In [3]:
import time
import re
import copy

In [4]:
project_id = "thematic-bloom-275007"
parent = 'projects/{}'.format(project_id)

In [5]:
client = translate.TranslationServiceClient()

In [6]:
response = client.get_supported_languages(parent = parent)

print('Supported Languages:')
for language in response.languages:
    print('Language Code: {}'.format(language.language_code))

Supported Languages:
Language Code: af
Language Code: am
Language Code: ar
Language Code: az
Language Code: be
Language Code: bg
Language Code: bn
Language Code: bs
Language Code: ca
Language Code: ceb
Language Code: co
Language Code: cs
Language Code: cy
Language Code: da
Language Code: de
Language Code: el
Language Code: en
Language Code: eo
Language Code: es
Language Code: et
Language Code: eu
Language Code: fa
Language Code: fi
Language Code: fr
Language Code: fy
Language Code: ga
Language Code: gd
Language Code: gl
Language Code: gu
Language Code: ha
Language Code: haw
Language Code: he
Language Code: hi
Language Code: hmn
Language Code: hr
Language Code: ht
Language Code: hu
Language Code: hy
Language Code: id
Language Code: ig
Language Code: is
Language Code: it
Language Code: iw
Language Code: ja
Language Code: jw
Language Code: ka
Language Code: kk
Language Code: km
Language Code: kn
Language Code: ko
Language Code: ku
Language Code: ky
Language Code: la
Language Code: lb
Lang

In [7]:
def translate(text, lang_from, lang_to):
    if len(text)>128:
        text = text[:128]
    response = client.translate_text(
    parent=parent,
    contents=text,
    mime_type='text/plain',  # mime types: text/plain, text/html
    source_language_code=lang_from,
    target_language_code=lang_to)
    
    res = []
    for line in response.translations:
        res.append(line.translated_text)

    return res

In [8]:
ro_res = translate(["I'm looking for [Onigiri - Japanese Rice Balls](001)"], "en", "ro")
ro_res

['Caut [Onigiri - Bile de orez japoneze] (001)']

In [9]:
it_res = translate(ro_res, "ro", "it")
it_res

['Cerca [Onigiri - Polpette di riso giapponesi] (001)']

In [10]:
en_res = translate(it_res, "it", "en")
en_res

['Search for [Onigiri - Japanese rice balls] (001)']

In [11]:
with open ("example.md", "r") as myfile:
    data=myfile.readlines()

In [12]:
def parse_entities(entity_names, txt):
    res = re.findall('\[[^\]]+\]\([^\)]+\)', txt)
    if res:
        for r in res:
            ent_name = r[r.find('](')+2:-1]
            if entity_names.get(ent_name) is None:
                code = str(len(entity_names)+100)
                entity_names[ent_name] = code
            else:
                code = entity_names[ent_name]
            new_txt = r[:r.find('](')+1] + "("+str(code)+")" 
            txt = txt.replace(r, new_txt)
    return txt

In [13]:
intents = {}
entity_names = {}
intent_name = None

for line in data:
    if line[0:2] == "##":
        intent_name = line[10:-1]
        intents[intent_name] = []
    else:
        if line[0:2] == "- ":
            l = parse_entities(entity_names, line[2:-1])
            intents[intent_name].append(l)

In [14]:
intents

{'yes_simple': ['Sure',
  'yes',
  'ok',
  "That's why I'm here",
  'yes sure',
  'absolutely',
  'for sure',
  'definitely',
  'ok, sure',
  'of course'],
 'yes_not_sure': ['what do you have?',
  'sure, what can I get?',
  'yes, but not sure what I want',
  'what can I get?',
  "what's in the menu?",
  "what's on the menu?",
  'not sure what I want',
  'can you help me choose?',
  "I'm not sure"],
 'yes_x_y': ['Yes, I want [a pizza](100)',
  'Yes, I want [a soup](100) and [a pizza](100)',
  'Do you have [pizza](100)?',
  'I want something [italian](100)',
  '[a pizza](100) and [something sweet](100)',
  'I want [a pizza](100) and some [soup](100)',
  'What do you have [sweet](100)?',
  'I want [a meal](100) and [a soup](100)',
  'What kind of [pizza](100) do you have?',
  'Give me something [japanese](100)',
  "Yes, I'm looking for something from [France](100)",
  'Do you have [mexican](100)?',
  "Sure, what do you have that's [vegan](100)?",
  'I need something [without gluten](100)'

In [15]:
new_intents = copy.deepcopy(intents)
time_to_sleep = 2
languages_1 = ["ja", "de", "es", "it"]
languages_2 = ["ro", "id", "fy", "fr"]
for intent_name, lines in intents.items():
    print("Intent - {}".format(intent_name))
    languages_1_translations = []
    for l in languages_1:
        languages_1_translations.append({
            'language_code': l,
            'lines': translate(lines, 'en', l)
        })
        time.sleep(time_to_sleep)
    print("step 1")
    
    languages_2_translations = []
    i = 0
    for l in languages_2:
        languages_2_translations.append({
            'language_code': l,
            'lines': translate(languages_1_translations[i]['lines'], languages_1_translations[i]['language_code'], l)
        })
        time.sleep(time_to_sleep)
        i+=1
    print("step 2")
    
    res = []
    
    for i in range(0, len(languages_2_translations)):
        res.extend(translate(languages_2_translations[i]['lines'], languages_2_translations[i]['language_code'], 'en'))
        
    print("step 3")
    
    for r in res:
        if r not in new_intents[intent_name]:
            new_intents[intent_name].append(r)
    

Intent - yes_simple
step 1
step 2
step 3
Intent - yes_not_sure
step 1
step 2
step 3
Intent - yes_x_y
step 1
step 2
step 3
Intent - decline
step 1
step 2
step 3
