**Load the excel file**

In [1]:
import pandas as pd

path_excel = r'/home/xian/Documents/SUITE_Trans/PostProcessing_JPN_CHN.xlsx'
data = pd.read_excel (path_excel, header=0)
df = pd.DataFrame(data)

#print(repr(df.columns[1])) # show header column name
#print(repr(df.iloc[53,0])) # access a field
source_en = df['English (US)\r\n[ S; en-US ]'].to_list()

**Save extracted data to file**

In [2]:
with open('/home/xian/Documents/SUITE_Trans/output.txt', 'w') as f:
    for item in source_en:
        item = repr(item)
        item = item[1:-1]
        f.write("%s\n" % item)

**Import Google Translate API**

Try one sentence translation

In [3]:
from google.cloud import translate

def translate_text(text="YOUR_TEXT_TO_TRANSLATE", project_id="YOUR_PROJECT_ID"):
    """Translating Text."""

    client = translate.TranslationServiceClient()

    parent = client.location_path(project_id, "global")

    # Detail on supported types can be found here:
    # https://cloud.google.com/translate/docs/supported-formats
    response = client.translate_text(
        parent=parent,
        contents=[text],
        mime_type="text/plain",  # mime types: text/plain, text/html
        source_language_code="en-US",
        target_language_code="zh-CN",
    )
    # Display the translation for each input text provided
    for translation in response.translations:
        print(u"Translated text: {}".format(translation.translated_text))

my_project_id = 'carbide-trees-283313'
translate_text(text=item, project_id=my_project_id)

Translated text: 停止


**Batch Translation**

In [4]:
from google.cloud import translate

def batch_translate_text(
    input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt",
    output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/",
    project_id="YOUR_PROJECT_ID"
):
    """Translates a batch of texts on GCS and stores the result in a GCS location."""

    client = translate.TranslationServiceClient()

    location = "us-central1"
    # Supported file types: https://cloud.google.com/translate/docs/supported-formats
    gcs_source = {"input_uri": input_uri}

    input_configs_element = {
        "gcs_source": gcs_source,
        "mime_type": "text/plain"  # Can be "text/plain" or "text/html".
    }
    gcs_destination = {"output_uri_prefix": output_uri}
    output_config = {"gcs_destination": gcs_destination}
    parent = client.location_path(project_id, location)

    # Supported language codes: https://cloud.google.com/translate/docs/language
    operation = client.batch_translate_text(
        parent=parent,
        source_language_code="en-US",
        target_language_codes=["zh-CN"],  # Up to 10 language codes here.
        input_configs=[input_configs_element],
        output_config=output_config)

    print(u"Waiting for operation to complete...")
    response = operation.result(180)

    print(u"Total Characters: {}".format(response.total_characters))
    print(u"Translated Characters: {}".format(response.translated_characters))

my_project_id = 'carbide-trees-283313'
my_inpurt_url = "gs://input_src/test/test.en.tsv"
my_output_url = "gs://output_tgt/test/"

batch_translate_text(input_uri=my_inpurt_url, output_uri=my_output_url, project_id=my_project_id)

Waiting for operation to complete...
Total Characters: 47823
Translated Characters: 47823


**Create a Glossar list**

In [19]:
from google.cloud import translate_v3 as translate

def create_glossary(
    project_id="YOUR_PROJECT_ID",
    input_uri="YOUR_INPUT_URI",
    glossary_id="YOUR_GLOSSARY_ID",
):
    """
    Create a equivalent term sets glossary. Glossary can be words or
    short phrases (usually fewer than five words).
    https://cloud.google.com/translate/docs/advanced/glossary#format-glossary
    """
    client = translate.TranslationServiceClient()

    # Supported language codes: https://cloud.google.com/translate/docs/languages
    source_lang_code = "en"
    target_lang_code = "zh"
    location = "us-central1"  # The location of the glossary

    name = client.glossary_path(project_id, location, glossary_id)
    language_codes_set = translate.types.Glossary.LanguageCodesSet(
        language_codes=[source_lang_code, target_lang_code]
    )

    gcs_source = translate.types.GcsSource(input_uri=input_uri)

    input_config = translate.types.GlossaryInputConfig(gcs_source=gcs_source)

    glossary = translate.types.Glossary(
        name=name, language_codes_set=language_codes_set, input_config=input_config
    )

    parent = client.location_path(project_id, location)
    # glossary is a custom dictionary Translation API uses
    # to translate the domain-specific terminology.
    operation = client.create_glossary(parent=parent, glossary=glossary)

    result = operation.result(timeout=180)
    print("Created: {}".format(result.name))
    print("Input Uri: {}".format(result.input_config.gcs_source.input_uri))

my_project_id = 'carbide-trees-283313'
my_inpurt_url = "gs://input_src/glossary/test_glossary.csv"
my_glossary_id = "my_en_zh_glossary"

create_glossary(project_id=my_project_id, input_uri=my_inpurt_url, glossary_id=my_glossary_id)

Created: projects/505142793087/locations/us-central1/glossaries/my_en_zh_glossary
Input Uri: gs://input_src/glossary/test_glossary.csv


**Deleting a glossary**

In [18]:
from google.cloud import translate_v3 as translate

def delete_glossary(
    project_id="YOUR_PROJECT_ID", glossary_id="YOUR_GLOSSARY_ID"
):
    """Delete a specific glossary based on the glossary ID."""
    client = translate.TranslationServiceClient()

    parent = client.glossary_path(project_id, "us-central1", glossary_id)

    operation = client.delete_glossary(parent)
    result = operation.result(timeout=180)
    print("Deleted: {}".format(result.name))

my_project_id = 'carbide-trees-283313'
my_glossary_id = "my_en_zh_glossary"

delete_glossary(project_id=my_project_id, glossary_id=my_glossary_id)

Deleted: projects/505142793087/locations/us-central1/glossaries/my_en_zh_glossary


**Batch Translation with Glossary**

In [20]:
from google.cloud import translate

def batch_translate_text_with_glossary(
    input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt",
    output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/",
    project_id="YOUR_PROJECT_ID",
    glossary_id="YOUR_GLOSSARY_ID",
):
    """Translates a batch of texts on GCS and stores the result in a GCS location.
    Glossary is applied for translation."""

    client = translate.TranslationServiceClient()

    # Supported language codes: https://cloud.google.com/translate/docs/languages
    location = "us-central1"

    # Supported file types: https://cloud.google.com/translate/docs/supported-formats
    gcs_source = {"input_uri": input_uri}

    input_configs_element = {
        "gcs_source": gcs_source,
        "mime_type": "text/plain"  # Can be "text/plain" or "text/html".
    }
    gcs_destination = {"output_uri_prefix": output_uri}
    output_config = {"gcs_destination": gcs_destination}

    parent = client.location_path(project_id, location)

    # glossary is a custom dictionary Translation API uses
    # to translate the domain-specific terminology.
    glossary_path = client.glossary_path(
        project_id, "us-central1", glossary_id  # The location of the glossary
    )

    glossary_config = translate.types.TranslateTextGlossaryConfig(
        glossary=glossary_path
    )

    glossaries = {"zh": glossary_config}  # target lang as key

    operation = client.batch_translate_text(
        parent=parent,
        source_language_code="en",
        target_language_codes=["zh"],  # Up to 10 language codes here.
        input_configs=[input_configs_element],
        glossaries=glossaries,
        output_config=output_config,
    )

    print(u"Waiting for operation to complete...")
    response = operation.result(180)

    print(u"Total Characters: {}".format(response.total_characters))
    print(u"Translated Characters: {}".format(response.translated_characters))

my_project_id = 'carbide-trees-283313'
my_inpurt_url = "gs://input_src/test/test.en.tsv"
my_output_url = "gs://output_tgt/test/"
my_glossary_id = "my_en_zh_glossary"

batch_translate_text_with_glossary(input_uri=my_inpurt_url, output_uri=my_output_url, project_id=my_project_id, glossary_id=my_glossary_id)


Waiting for operation to complete...
Total Characters: 47823
Translated Characters: 47823


**Write translated text back to the Excel table**

In [17]:
colnames = ['ind', 'en', 'zh']
hyp = pd.read_csv('/home/xian/Documents/SUITE_Trans/test/test_input_src_test_test.en_zh-CN_translations.tsv', names=colnames, sep='\t')

df['Chinese (Simplified/China) (.NET)\r\n[ zh-CHS ]']=hyp.zh
#print(df['Chinese (Simplified/China) (.NET)\r\n[ zh-CHS ]'])

df.to_excel("/home/xian/Documents/SUITE_Trans/test/output.xlsx")
