In [None]:
import configparser

config = configparser.ConfigParser()
config.read('config.ini')

apikey = config['DEFAULT']['ApiKey']
url = config['DEFAULT']['Url']
field_selection = 'Text Entry'
csv = 'var/translator/DiaryNotes_sample_Mar28.csv'

In [None]:
import json
import sys

try:
    import pandas as pd
except ImportError:
    subprocess.run([sys.executable, "-m", "pip", "install", "pandas"], check=True)
finally:
    import pandas as pd

try:
    from ibm_watson import LanguageTranslatorV3
    from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
except ImportError:
    subprocess.run([sys.executable, "-m", "pip", "install", "ibm-watson"], check=True)
    subprocess.run([sys.executable, "-m", "pip', 'install', 'ibm-cloud-sdk-core"], check=True)
finally:
    from ibm_watson import LanguageTranslatorV3
    from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

authenticator = IAMAuthenticator(apikey)
translator = LanguageTranslatorV3(
    version='2018-05-01',
    authenticator=authenticator)

translator.set_service_url(url)

In [None]:
models = translator.list_models().get_result()
print(json.dumps(models, indent=2))

In [None]:
data = pd.read_csv(csv)
print(data)

text_entries = [str(elem) for elem in data[field_selection].to_list()]

In [None]:
from math import ceil

def utf8len(s=''):
    s = str(s)
    return len(s.encode('utf-8'))

def utf8_length_list(l=[]):
    length = 0
    for i in l:
        length += utf8len(i)
    return length

def chunks(l, n):
    d, r = divmod(len(l), n)
    for i in range(n):
        si = (d+1)*(i if i < r else r) + d*(0 if i < r else i - r)
        yield l[si:si+(d+1 if i < r else d)]

for chunk in chunks(text_entries, ceil(utf8_length_list(text_entries)/48000)):
    print(utf8_length_list(chunk))

In [None]:
fr_en_translation_result = []
for chunk in chunks(text_entries, ceil(utf8_length_list(text_entries)/48000)):
    fr_en_translation_result.append(translator.translate(
        text=chunk,
        model_id='fr-CA-en').get_result())
print(json.dumps(fr_en_translation_result, indent=2))

In [None]:
fr_en_translations = []
for result in fr_en_translation_result:
    for line in result["translations"]:
        fr_en_translations.append(line["translation"])
print(fr_en_translations)

In [None]:
en_fr_translation_result = []
for chunk in chunks(text_entries, ceil(utf8_length_list(text_entries)/48000)):
    en_fr_translation_result.append(translator.translate(
        text=chunk,
        model_id='en-fr-CA').get_result())
print(json.dumps(en_fr_translation_result, indent=2))

In [None]:
en_fr_translations = []
for result in en_fr_translation_result:
    for line in result["translations"]:
        en_fr_translations.append(line["translation"])
print(en_fr_translations)

In [None]:
data['fr-CA-en_result'] = fr_en_translations
data['en-fr-CA_result'] = en_fr_translations
data.to_csv('var/translator/translated_result.csv', index=False)
print(data)