In [None]:
# Copyright 2024 Forusone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Adaptive translation


In [3]:
# @title Install Vertex AI SDK for Python and other required packages
%pip install --upgrade --quiet --user google-cloud-aiplatform

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m52.9 MB/s[0m eta [36m0:00:00[0m
[0m

In [5]:
# @title Define constants
PROJECT_ID = "ai-hangsik"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

In [6]:
# @title GCP Authentication

# Use OAuth to access the GCP environment.
import sys
if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user(project_id=PROJECT_ID)

## Adaptive translations with a dataset

In [24]:
# @title Helper functions for translation with a adaptive dataset.

# Imports the Google Cloud Translation library
from google.cloud import translate_v3 as translate

def create_adaptive_mt_dataset(dataset_id):
  # Create a client
  client = translate.TranslationServiceClient()
  # Initialize request argument(s)
  adaptive_mt_dataset = translate.AdaptiveMtDataset()
  adaptive_mt_dataset.name = f"projects/{PROJECT_ID}/locations/{LOCATION}/adaptiveMtDatasets/{dataset_id}"
  adaptive_mt_dataset.display_name = "dataset_adaptive"
  adaptive_mt_dataset.source_language_code = "ko"
  adaptive_mt_dataset.target_language_code = "en"
  request = translate.CreateAdaptiveMtDatasetRequest(
      parent=f"projects/{PROJECT_ID}/locations/{LOCATION}",
      adaptive_mt_dataset=adaptive_mt_dataset,
  )
  # Make the request
  response = client.create_adaptive_mt_dataset(request=request)
  # Handle the response
  print(response)
  return response
  #-------------------------------------------------------------

def import_adaptive_mt_file(dataset_id:str):
  # Create a client
  client = translate.TranslationServiceClient()
  gcs_input_source = translate.GcsInputSource()
  gcs_input_source.input_uri = "gs://translation-0106/adaptive_proverb.tsv"
  # Initialize the request
  request = translate.ImportAdaptiveMtFileRequest(
      parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/adaptiveMtDatasets/{dataset_id}",
      gcs_input_source=gcs_input_source
  )
  # Make the request
  response = client.import_adaptive_mt_file(request)
  # Handle the response
  print(response)
  return response
#--------------------------------------------------------

def adaptive_mt_translate(text:str, dataset_id:str):
  # Create a client
  client = translate.TranslationServiceClient()
  # Initialize the request
  request = translate.AdaptiveMtTranslateRequest(
      parent=f"projects/{PROJECT_ID}/locations/{LOCATION}",
      dataset=f"projects/{PROJECT_ID}/locations/{LOCATION}/adaptiveMtDatasets/{dataset_id}",
      content=[text]
  )
  # Make the request
  response = client.adaptive_mt_translate(request)

  return response



In [8]:
# @title run

import uuid

dataset_id= f"adaptive-{uuid.uuid4()}"

response = create_adaptive_mt_dataset(dataset_id)
print(response)


name: "projects/ai-hangsik/locations/us-central1/adaptiveMtDatasets/adaptive-63db13aa-b7c6-4c6c-8ce2-140084e07f8b"
display_name: "dataset_adaptive"
source_language_code: "ko"
target_language_code: "en"

name: "projects/ai-hangsik/locations/us-central1/adaptiveMtDatasets/adaptive-63db13aa-b7c6-4c6c-8ce2-140084e07f8b"
display_name: "dataset_adaptive"
source_language_code: "ko"
target_language_code: "en"



In [9]:
dataset_id = "adaptive-a6d10929-2462-495a-86e0-4b55dd3ffa14"
response = import_adaptive_mt_file(dataset_id)


adaptive_mt_file {
  name: "projects/ai-hangsik/locations/us-central1/adaptiveMtDatasets/adaptive-a6d10929-2462-495a-86e0-4b55dd3ffa14/adaptiveMtFiles/13498504017393455548"
  display_name: "adaptive_proverb.tsv"
  entry_count: 62
}



In [10]:
text = "미꾸라지 한 마리가 온 웅덩이 흐린다."
dataset_id = "adaptive-a6d10929-2462-495a-86e0-4b55dd3ffa14"
adaptive_mt_translate(text, dataset_id)

translations {
  translated_text: "One bad apple spoils the whole bunch."
}
language_code: "en"

## Adaptive translations with reference sentence pairs

In [19]:
# @title Translate with reference sentence pairs.

def adaptive_mt_translate(text:str,
                          reference_sentence_config):
  # Create a client
  client = translate.TranslationServiceClient()
  # Initialize the request
  request = translate.AdaptiveMtTranslateRequest(

    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}",

    reference_sentence_config= reference_sentence_config,
    content=[text]
  )

  response = client.adaptive_mt_translate(request)
  return response


In [21]:
text = "돈이면 안되는 일 없다."

reference_sentence_config= {
        "reference_sentence_pair_lists": [ {
            "reference_sentence_pairs": [
            {
                "source_sentence": "돈이면 안되는 일 없다.",
                "target_sentence": "Money makes the mare go. Money is everything."
            },
            {
              "source_sentence": '미꾸라지 한 마리가 온 웅덩이 흐린다.',
              "target_sentence": 'One rotten apple spoils the barrel'
            },
            {
              "source_sentence": '남의 떡이 커 보인다.',
              "target_sentence": 'The grass is greener on the other side of the fence.'
            }
          ]

        }],
        "source_language_code": "ko",
        "target_language_code": "en"
    }



adaptive_mt_translate(text,reference_sentence_config)

translations {
  translated_text: "Money makes the mare go. Money is everything."
}
language_code: "en"

## Management

In [27]:

def list_adaptive_mt_datasets():
  """
  List datasets
  """
  # Create a client
  client = translate.TranslationServiceClient()
  # Initialize the request
  request = translate.ListAdaptiveMtDatasetsRequest(
      parent=f"projects/{PROJECT_ID}/locations/{LOCATION}",
  )
  # Make the request
  response = client.list_adaptive_mt_datasets(request)
  # Handle the response
  print(response)
#--------------------------------------------------------
def delete_adaptive_mt_dataset(dataset_id:str):
  # Create a client
  client = translate.TranslationServiceClient()
  # Initialize the request
  request = translate.DeleteAdaptiveMtDatasetRequest(
      name=f"projects/{PROJECT_ID}/locations/{LOCATION}/adaptiveMtDatasets/{dataset_id}"
  )
  # Make the request
  response = client.delete_adaptive_mt_dataset(request)
  # Handle the response
  print(response)


#--------------------------------------------------------
def list_adaptive_mt_files(dataset_id:str):
  """
  List files in dataset
  """
  # Create a client
  client = translate.TranslationServiceClient()
  # Initialize the request
  request = translate.ListAdaptiveMtFilesRequest(
      parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/adaptiveMtDatasets/{dataset_id}"
  )
  # Make the request
  response = client.list_adaptive_mt_files(request)
  # Handle the response
  print(response)

#--------------------------------------------------------
def delete_adaptive_mt_file(dataset_id:str, file_id):
  # Create a client
  client = translate.TranslationServiceClient()
  # Initialize the request
  request = translate.DeleteAdaptiveMtFileRequest(
      name=f"projects/{PROJECT_ID}/locations/{LOCATION}/adaptiveMtDatasets/{dataset_id}/adaptiveMtFiles/{file_id}"
  )
  # Make the request
  response = client.delete_adaptive_mt_file(request)
  # Handle the response
  print(response)



In [None]:
list_adaptive_mt_datasets()

In [30]:
dataset_id = "adaptive-a6d10929-2462-495a-86e0-4b55dd3ffa14"
list_adaptive_mt_files(dataset_id)

ListAdaptiveMtFilesPager<adaptive_mt_files {
  name: "projects/ai-hangsik/locations/us-central1/adaptiveMtDatasets/adaptive-a6d10929-2462-495a-86e0-4b55dd3ffa14/adaptiveMtFiles/13498504017393455548"
  display_name: "adaptive_proverb.tsv"
  entry_count: 62
}
adaptive_mt_files {
  name: "projects/ai-hangsik/locations/us-central1/adaptiveMtDatasets/adaptive-a6d10929-2462-495a-86e0-4b55dd3ffa14/adaptiveMtFiles/4592145638753982976"
  display_name: "adaptive_proverb.tsv"
  entry_count: 62
}
>


In [29]:
dataset_id = "adaptive-a6d10929-2462-495a-86e0-4b55dd3ffa14"
file_id = "615312462539334210"
delete_adaptive_mt_file(dataset_id, file_id)

None


In [31]:
dataset_id = "adaptive-a6d10929-2462-495a-86e0-4b55dd3ffa14"

delete_adaptive_mt_dataset(dataset_id)

None
