# Translate Documents in Batches

## Install Libraries

In [1]:
pip install --upgrade google-cloud-translate

Note: you may need to restart the kernel to use updated packages.


## Set variables

In [6]:
project_id = "jo-vertex-ai-playground-ffyc"
location = "us-central1"
timeout = 180
source_language_code = "en"
target_language_code = "fr"
input_uri = "gs://jo-translation-docs/input/KT_session_with_PwC.txt"
output_uri = "gs://jo-translation-docs/output/"

## Translate function

In [4]:
from google.cloud import translate


def batch_translate_text(
    input_uri,
    output_uri,
    project_id,
    timeout,
    source_language_code,
    target_language_code
) -> translate.TranslateTextResponse:
    """Translates a batch of texts on GCS and stores the result in a GCS location.

    Args:
        input_uri: The input URI of the texts to be translated.
        output_uri: The output URI of the translated texts.
        project_id: The ID of the project that owns the destination bucket.
        timeout: The timeout for this batch translation operation.

    Returns:
        The translated texts.
    """

    client = translate.TranslationServiceClient()

    location = "us-central1"
    # Supported file types: https://cloud.google.com/translate/docs/supported-formats
    gcs_source = {"input_uri": input_uri}

    input_configs_element = {
        "gcs_source": gcs_source,
        "mime_type": "text/plain",  # Can be "text/plain" or "text/html".
    }
    gcs_destination = {"output_uri_prefix": output_uri}
    output_config = {"gcs_destination": gcs_destination}
    parent = f"projects/{project_id}/locations/{location}"

    # Supported language codes: https://cloud.google.com/translate/docs/languages
    operation = client.batch_translate_text(
        request={
            "parent": parent,
            "source_language_code": source_language_code,
            "target_language_codes": [target_language_code],  # Up to 10 language codes here.
            "input_configs": [input_configs_element],
            "output_config": output_config,
        }
    )

    print("Waiting for operation to complete...")
    response = operation.result(timeout)

    print(f"Total Characters: {response.total_characters}")
    print(f"Translated Characters: {response.translated_characters}")

    return response

## Call function

In [7]:
result = batch_translate_text(input_uri, output_uri, project_id, timeout, "en", "fr")

Waiting for operation to complete...
Total Characters: 194615
Translated Characters: 194615
