# Backtranslate the samples

In [87]:
import ast
import json
import os
from pprint import pprint
from typing import Dict

from google.cloud import translate

In [62]:
ROOT_FP = "../data/"
TRANSLATED_DATA_FP = "../data/translations/"
samples_to_translate_filename = "samples_to_translate.jsonl"
samples_to_translate_fp = os.path.join(ROOT_FP, samples_to_translate_filename)

In [63]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "../credentials.json"

In [64]:
PROJECT_ID = "nlp2022-final-project"
LOCATION= "global"
PARENT = f"projects/{PROJECT_ID}/locations/{LOCATION}"

GOOGLE_CLIENT = translate.TranslationServiceClient()

In [84]:
ROOT_SOURCE_LANG = "en-US"
SPANISH_PIVOT_LANG = "es"
JAPANESE_PIVOT_LANG = "ja"

## Load samples

In [66]:
samples_to_translate_lst = []

with open(samples_to_translate_fp, 'r') as f:
    for line in f.readlines():
        samples_to_translate_lst.append(
            ast.literal_eval(line)
        )

In [68]:
samples_to_translate_lst[0]

{'id': '56de57394396321400ee2830',
 'title': 'Institute_of_technology',
 'context': 'In Turkey and the Ottoman Empire, the oldest technical university is Istanbul Technical University. Its graduates contributed to a wide variety of activities in scientific research and development. In 1950s, 2 technical universities were opened in Ankara and Trabzon. In recent years, Yildiz University is reorganized as Yildiz Technical University and 2 institutes of technology were founded in Kocaeli and Izmir. In 2010, another technical university named Bursa Technical University was founded in Bursa. Moreover, a sixth technical university is about to be opened in Konya named Konya Technical University.',
 'question': 'When Konya Technical University opens, how many total institutes of technology will there be in Turkey and the Ottoman Empire?',
 'answers': {'text': ['six'], 'answer_start': [520]}}

## Set up access to Google API, Google Translate

In [35]:
def translate_text(text="Hello, world!", project_id="nlp2022-final-project"):

    client = translate.TranslationServiceClient()
    location = "global"
    parent = f"projects/{project_id}/locations/{location}"

    response = client.translate_text(
        request={
            "parent": parent,
            "contents": [text],
            "mime_type": "text/plain",
            "source_language_code": "en-US",
            "target_language_code": "es",
        }
    )

    for translation in response.translations:
        print("Translated text: {}".format(translation.translated_text))



In [39]:
translate_text()

Translated text: ¡Hola Mundo!


## Create helper functions

In [77]:
def translate_text(
    text: str, source_lang: str, target_lang: str
) -> str:
    response = GOOGLE_CLIENT.translate_text(
        request={
            "parent": PARENT,
            "contents": [text],
            "mime_type": "text/plain",
            "source_language_code": source_lang,
            "target_language_code": target_lang,
        }
    )
    print(response.translations)
    return response.translations[0].translated_text

In [72]:
def backtranslate_text(text: str, source_lang: str, pivot_lang: str) -> str:
    source_to_pivot_translation = translate_text(
        text=text, source_lang=source_lang, target_lang=pivot_lang
    )
    pivot_to_source_translation = translate_text(
        text=source_to_pivot_translation, source_lang=pivot_lang,
        target_lang=source_lang
    )
    return {
        "translation": source_to_pivot_translation,
        "backtranslation":pivot_to_source_translation
    }

In [102]:
def translate_sample(
    object_dict: Dict, source_lang: str, pivot_lang: str
):
    text = object_dict["question"]
    backtranslation_dict = backtranslate_text(
        text=text, source_lang=source_lang, pivot_lang=pivot_lang
    )
    return {
        **object_dict,
        **backtranslation_dict,
        **{
            "source_lang": source_lang,
            "pivot_lang": pivot_lang   
        }
    }


In [89]:
def write_json(object_dict: Dict, write_path: str):
    with open(write_path, 'w') as f:
        f.write(json.dumps(object_dict))


In [90]:
def process_sample(
    object_dict: Dict,
    source_lang: str,
    pivot_lang: str,
    write_dir: str
) -> Dict:
    processed_dict = translate_sample(
        object_dict=object_dict, source_lang=source_lang, pivot_lang=pivot_lang
    )
    filename = f"{object_dict['id']}.json"
    fp = os.path.join(write_dir, filename)
    write_json(processed_dict, fp)
    return processed_dict

## Do backtranslation from English to Spanish

In [95]:
SPANISH_TEST_COUNT = 2

In [96]:
spanish_write_dir = "../data/translations/es"

In [97]:
for i in range(SPANISH_TEST_COUNT):
    process_sample(
        object_dict=samples_to_translate_lst[i],
        source_lang=ROOT_SOURCE_LANG,
        pivot_lang=SPANISH_PIVOT_LANG,
        write_dir=spanish_write_dir
    )

[translated_text: "Cuando se abra la Universidad Técnica de Konya, ¿cuántos institutos de tecnología habrá en total en Turquía y el Imperio Otomano?"
]
[translated_text: "When Konya Technical University opens, how many institutes of technology will there be in total in Turkey and the Ottoman Empire?"
]
[translated_text: "¿Qué dos ciudades de Turquía adquirieron institutos de tecnología en la década de 1950?"
]
[translated_text: "Which two cities in Turkey acquired institutes of technology in the 1950s?"
]


## Do backtranslation from English to Japanese

In [98]:
JAPANESE_TEST_COUNT = 2

In [99]:
japanese_write_dir = "../data/translations/ja"

In [101]:
for i in range(JAPANESE_TEST_COUNT):
    print(f"Sample {i}")
    processed_dict = process_sample(
        object_dict=samples_to_translate_lst[i],
        source_lang=ROOT_SOURCE_LANG,
        pivot_lang=JAPANESE_PIVOT_LANG,
        write_dir=japanese_write_dir
    )
    pprint(processed_dict)
    print('-' * 10)

Sample 0
[translated_text: "コンヤ工科大学が開校したとき、トルコとオスマン帝国には合計でいくつの技術研究所ができますか?"
]
[translated_text: "How many technical institutes in total will there be in Turkey and the Ottoman Empire when the Konya University of Technology opens?"
]
{'answers': {'answer_start': [520], 'text': ['six']},
 'backtranslation': 'How many technical institutes in total will there be in '
                    'Turkey and the Ottoman Empire when the Konya University '
                    'of Technology opens?',
 'context': 'In Turkey and the Ottoman Empire, the oldest technical university '
            'is Istanbul Technical University. Its graduates contributed to a '
            'wide variety of activities in scientific research and '
            'development. In 1950s, 2 technical universities were opened in '
            'Ankara and Trabzon. In recent years, Yildiz University is '
            'reorganized as Yildiz Technical University and 2 institutes of '
            'technology were founded in Kocaeli and 