# Translate Document

## Install Libraries

In [None]:
pip install --upgrade google-cloud-translate google-cloud-storage FPDF2 srt

## Set variables

In [21]:
project_id = "jo-vertex-ai-playground-ffyc"
location = "us-central1"
bucket_name = "jo-translation-docs"
input_folder_name = "input"
input_file_name = "KT_session_with_PwC.vtt"
converted_folder_name = "converted"
converted_file_name = "KT_session_with_PwC.pdf"
output_folder_name = "output"
output_file_name = "KT_session_with_PwC_FR.pdf"
source_language_code = "en"
target_language_code = "fr"

: 

## Download VTT document

In [None]:
from google.cloud import storage

def read_gcs_file(bucket_name, input_folder_name, input_file_name):

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(f"{input_folder_name}/{input_file_name}")

    content = blob.download_as_string()
    content = content.decode('utf-8') 

    return content

file_content = read_gcs_file(bucket_name, input_folder_name, input_file_name)
print(file_content)

## Convert VTT to PDF

In [23]:
from fpdf import FPDF
from google.cloud import storage

def convert_to_pdf(text_content, bucket_name, converted_folder_name, converted_file_name):

    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("helvetica", size=12)

    pdf.multi_cell(0, 10, text_content) 
    temp_file_path = "temp_output.pdf"
    pdf.output(temp_file_path)

    # Upload to GCS
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(f"{converted_folder_name}/{converted_file_name}")
    blob.upload_from_filename(temp_file_path)

# Example usage
pdf_file_path = "output.pdf"
convert_to_pdf(file_content, bucket_name, converted_folder_name, converted_file_name)

## Translate function

In [63]:
# Imports the Google Cloud Translation library
from google.cloud import translate_v3

# Initialize Translation client
def translate_document(    
    bucket_name,
    converted_folder_name,
    converted_file_name,
    location,
    project_id,    
    source_language_code,
    target_language_code,
    output_folder_name,
    output_file_name
) -> translate_v3.TranslationServiceClient:

    client = translate_v3.TranslationServiceClient()

    location = location

    parent = f"projects/{project_id}/locations/{location}"
    
    document_input_config = {
        "gcs_source": {
            "input_uri": f"gs://{bucket_name}/{converted_folder_name}/{converted_file_name}"
        },
        "mime_type": "application/pdf"
    }
        
    document_output_config = {
        "gcs_destination": {
            "output_uri_prefix": f"gs://{bucket_name}/{output_folder_name}/"
        },
        "file_name": output_file_name
    }
  
    response = client.translate_document(
        request={
            "parent": parent,
            "source_language_code": source_language_code,
            "target_language_code": target_language_code,
            "document_input_config": document_input_config,
            "document_output_config": document_output_config,
            "is_translate_native_pdf_only": True
        }
    )
    
    print(response)

    return response

## Call function

In [None]:
translated_document = translate_document(
    bucket_name,
    converted_folder_name,
    converted_file_name,
    location,
    project_id,    
    source_language_code,
    target_language_code,
    output_folder_name,
    output_file_name
)