In [None]:
# Copyright 2024 Forusone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Translation - Glossary


In [1]:
# @title Install Vertex AI SDK for Python and other required packages

%pip install --upgrade --quiet google-cloud-translate

In [2]:
# @title Define constants
PROJECT_ID = "ai-hangsik"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

In [3]:
# @title GCP Authentication

# Use OAuth to access the GCP environment.
import sys
if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user(project_id=PROJECT_ID)

In [None]:
# !gcloud auth application-default login
# !gcloud auth application-default set-quota-project ai-hangsik

In [4]:
from google.cloud import translate_v3 as translate

def create_glossary(
    input_uri: str = "YOUR_INPUT_URI",
    glossary_id: str = "YOUR_GLOSSARY_ID",
    timeout: int = 180,
) -> translate.Glossary:
    """
    Create a equivalent term sets glossary. Glossary can be words or
    short phrases (usually fewer than five words).
    https://cloud.google.com/translate/docs/advanced/glossary#format-glossary
    """
    client = translate.TranslationServiceClient()

    # Supported language codes: https://cloud.google.com/translate/docs/languages
    source_lang_code = "ko"
    target_lang_code = "en"

    name = client.glossary_path(PROJECT_ID, LOCATION, glossary_id)

    language_codes_set = translate.types.Glossary.LanguageCodesSet(
        language_codes=[source_lang_code, target_lang_code]
    )

    gcs_source = translate.types.GcsSource(input_uri=input_uri)

    input_config = translate.types.GlossaryInputConfig(gcs_source=gcs_source)

    glossary = translate.types.Glossary(
        name=name, language_codes_set=language_codes_set, input_config=input_config
    )

    parent = f"projects/{PROJECT_ID}/locations/{LOCATION}"
    operation = client.create_glossary(parent=parent, glossary=glossary)

    result = operation.result(timeout)
    print(f"Created: {result.name}")
    print(f"Input Uri: {result.input_config.gcs_source.input_uri}")

    return result


In [5]:
from google.cloud import translate_v3 as translate

def delete_glossary(
    glossary_id: str,
    timeout: int = 180,
) -> translate.Glossary:
    """Delete a specific glossary based on the glossary ID.

    Args:
        glossary_id: The ID of the glossary to delete.
        timeout: The timeout for this request.

    Returns:
        The glossary that was deleted.
    """
    client = translate.TranslationServiceClient()
    name = client.glossary_path(PROJECT_ID, LOCATION, glossary_id)

    operation = client.delete_glossary(name=name)
    result = operation.result(timeout)
    print(f"Deleted: {result.name}")

    return result

In [6]:
from google.cloud import translate_v3 as translate

def get_glossary(
    glossary_id: str
) -> translate.Glossary:
    """Get a particular glossary based on the glossary ID.

    Args:
        glossary_id: The ID of the glossary to retrieve.

    Returns:
        The glossary.
    """
    client = translate.TranslationServiceClient()
    name = client.glossary_path(PROJECT_ID, LOCATION, glossary_id)

    response = client.get_glossary(name=name)
    print(f"Glossary name: {response.name}")
    print(f"Entry count: {response.entry_count}")
    print(f"Input URI: {response.input_config.gcs_source.input_uri}")

    return response

In [22]:
from google.cloud import translate_v3 as translate

def list_glossaries() -> translate.Glossary:
    """List Glossaries.

    Args:

    Returns:
        The glossary.
    """
    glossary = None
    client = translate.TranslationServiceClient()
    parent = f"projects/{PROJECT_ID}/locations/{LOCATION}"

    # Iterate over all results
    for glossary in client.list_glossaries(parent=parent):
        print(f"Name: {glossary.name}")
        print(f"Entry count: {glossary.entry_count}")
        print(f"Input uri: {glossary.input_config.gcs_source.input_uri}")

        # Note: You can create a glossary using one of two modes:
        # language_code_set or language_pair. When listing the information for
        # a glossary, you can only get information for the mode you used
        # when creating the glossary.
        for language_code in glossary.language_codes_set.language_codes:
            print(f"Language code: {language_code}")

    return glossary

In [10]:
from google.cloud import translate_v3 as translate

def translate_text_with_glossary(
    text: str,
    glossary_id: str,
) -> translate.TranslateTextResponse:

    """Translates a given text using a glossary.

    Args:
        text: The text to translate.
        glossary_id: The ID of the glossary to use.

    Returns:
        The translated text."""

    client = translate.TranslationServiceClient()
    parent = f"projects/{PROJECT_ID}/locations/{LOCATION}"

    glossary = client.glossary_path(
        PROJECT_ID, LOCATION, glossary_id  # The location of the glossary
    )

    glossary_config = translate.TranslateTextGlossaryConfig(glossary=glossary)

    # Supported language codes: https://cloud.google.com/translate/docs/languages
    response = client.translate_text(
        request={
            "contents": [text],
            "source_language_code": "ko",
            "target_language_code": "en",
            "parent": parent,
            "glossary_config": glossary_config,
        }
    )

    return response

In [24]:
import uuid

GLOSSARY_INPUT_URI = "gs://translation-0106/glossary_ko.csv"
GLOSSARY_ID = f"translate-{uuid.uuid4()}"

result = create_glossary(
    GLOSSARY_INPUT_URI, GLOSSARY_ID
)

Created: projects/721521243942/locations/us-central1/glossaries/translate-c43e6872-98a0-482b-9405-1583bd4dd4b0
Input Uri: gs://translation-0106/glossary_ko.csv


In [13]:
text = "뭐라카노?"
glossary_id = GLOSSARY_ID

response = translate_text_with_glossary( text, glossary_id)

print("Translated text: \n")
for translation in response.glossary_translations:
    print(f"\t {translation.translated_text}")

Translated text: 

	 What did you say?


In [23]:
list_glossaries()

In [16]:
glossary_id = GLOSSARY_ID
get_glossary(glossary_id)

Glossary name: projects/721521243942/locations/us-central1/glossaries/translate-e69e69c3-f3ea-40fc-8123-e70ddf9d9890
Entry count: 7
Input URI: gs://translation-0106/glossary_ko.csv


name: "projects/721521243942/locations/us-central1/glossaries/translate-e69e69c3-f3ea-40fc-8123-e70ddf9d9890"
language_codes_set {
  language_codes: "ko"
  language_codes: "en"
}
input_config {
  gcs_source {
    input_uri: "gs://translation-0106/glossary_ko.csv"
  }
}
entry_count: 7
submit_time {
  seconds: 1736127867
  nanos: 138764937
}
end_time {
  seconds: 1736127870
  nanos: 904457000
}
display_name: "translate-e69e69c3-f3ea-40fc-8123-e70ddf9d9890"

In [19]:
delete_glossary(
    glossary_id="test-ce96c711-a5ef-4fd7-b440-54f0535e963b")


Deleted: projects/721521243942/locations/us-central1/glossaries/test-ce96c711-a5ef-4fd7-b440-54f0535e963b


name: "projects/721521243942/locations/us-central1/glossaries/test-ce96c711-a5ef-4fd7-b440-54f0535e963b"
submit_time {
  seconds: 1736128128
  nanos: 420655581
}
end_time {
  seconds: 1736128129
  nanos: 567402293
}