In [20]:
import pandas as pd
from google.cloud import translate_v3 as translate
import os
import time

# Set up authentication
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/content/key.json'

# Initialize the Translation API client
translate_client = translate.TranslationServiceClient()

# Define your project details
project_id = 'daring-diode-408818'
location = 'global'

# Read the Excel file
df = pd.read_excel('/content/eng_captions.xlsx')

# Define a function to translate a batch of text
def translate_batch(texts, target_language):
    if texts:
        response = translate_client.translate_text(
            contents=texts,
            target_language_code=target_language,
            parent=f"projects/{project_id}/locations/{location}"
        )
        return [translation.translated_text for translation in response.translations]
    return texts

# Function to process the DataFrame in batches
def process_in_batches(df, column_name, target_language, batch_size=400):
    translated_texts = []
    batch_texts = []
    batch_counter = 0

    for text in df[column_name].fillna(""):  # Fill NaN with empty strings
        batch_texts.append(text)
        if len(batch_texts) == batch_size:
            batch_translations = translate_batch(batch_texts, target_language)
            translated_texts.extend(batch_translations)
            # Print the first few characters of the first item in each batch for verification
            print(f"Batch {batch_counter} processed. Sample translation: {batch_translations[0][:50]}")
            batch_texts = []
            batch_counter += 1
            time.sleep(15)  # Sleep for 60 seconds between batches

    # Process any remaining texts in the last batch
    if batch_texts:
        translated_texts.extend(translate_batch(batch_texts, target_language))

    return translated_texts

# Translate and update the DataFrame
df['translated_captions'] = process_in_batches(df, 'comment', 'ur')  # 'comment' is the column name

# Save the dataframe with the translations to a new Excel file
df.to_excel('/content/translated_captions.xlsx', index=False)


Batch 0 processed. Sample translation:  گھنے بالوں والے دو نوجوان صحن میں گھومتے ہوئے اپن
Batch 1 processed. Sample translation:  ایک جوڑے اور ایک شیر خوار بچے، جو مرد کے ہاتھوں پ
Batch 2 processed. Sample translation:  ایک لڑکی ایک بڑے دریا میں پیدل چل رہی ہے، جیسا کہ
Batch 3 processed. Sample translation:  ایک سفید کتا کھلی آنکھوں کے ساتھ ٹائل والے فرش پر
Batch 4 processed. Sample translation:  ایروڈینامک گیئر میں ایک آدمی تیز گھماؤ کے گرد سڑک
Batch 5 processed. Sample translation:  ایک بھورا اور سفید اور ایک سیاہ اور سفید کتا سمند
Batch 6 processed. Sample translation:  ایک بوڑھا آدمی صحن کا کام کر رہا ہے۔
Batch 7 processed. Sample translation:  ایک بھورا کتا گھاس اور پھولوں میں سے دوڑ رہا ہے۔
Batch 8 processed. Sample translation:  پانچ بالغ اپنے گھر کے پچھواڑے میں پکنک کی میز پر 
Batch 9 processed. Sample translation:  دو آدمی، ایک اپنے ساتھ ڈرم کے ساتھ سکرین کو دیکھ 
Batch 10 processed. Sample translation:  گہرے کمبل میں لپٹی ہوئی ایک چھوٹی سی لڑکی ہجوم سے
Batch 11 processed. Sam

# **Previous Code**

In [10]:
"""import os
from google.cloud import storage
from google.cloud import translate_v3 as translate

# Set up authentication
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/content/key.json'

# Initialize the Cloud Storage client
storage_client = storage.Client()

# Initialize the Translation API client
translate_client = translate.TranslationServiceClient()

# Set your project ID and bucket details
project_id = 'daring-diode-408818'
bucket_name = 'capurdusk'
source_file_name = '/content/eng_captions.xlsx'
destination_blob_name = 'captions.xlsx'  # The name of the file in the bucket

# Create a new bucket (unique across all Google Cloud)
# Note: Bucket names must be unique. If you get an error, try a different name.
bucket = storage_client.create_bucket(bucket_name)
print(f"Bucket {bucket.name} created.")

# Upload the file to Google Cloud Storage
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)
print(f"File {source_file_name} uploaded to {destination_blob_name}.")

"""

Bucket capurdusk created.
File /content/eng_captions.xlsx uploaded to captions.xlsx.


In [11]:
"""# ... previous code ...
from google.protobuf.json_format import MessageToDict
import time

# Construct the resource name of the parent
location = 'us-central1'  # or other specific location
parent = f"projects/{project_id}/locations/{location}"

# Construct the request
gcs_source_uri = f"gs://{bucket_name}/{destination_blob_name}"
gcs_destination_prefix = f"translated_{destination_blob_name}"

# Specify the input configuration
input_configs_element = {
    "gcs_source": {"input_uri": gcs_source_uri}
}
input_configs = [input_configs_element]

# ... previous code ...

# Make sure the output_uri_prefix ends with a '/'
gcs_destination_prefix = f"gs://{bucket_name}/translated_{destination_blob_name}/"

# Specify the output configuration
gcs_destination = {"output_uri_prefix": gcs_destination_prefix}
output_config = {"gcs_destination": gcs_destination}

# Submit a batch translation request
operation = translate_client.batch_translate_document(
    parent=parent,
    source_language_code="en",
    target_language_codes=["ur"],  # Use Urdu as the target language
    input_configs=input_configs,
    output_config=output_config
)


response = operation.result()  # No need for timeout, as operation.done() is True
# Output the translated document location
print(f"Translated document written to {gcs_destination_prefix}")



# Output the translated document location
output_uri_prefix = response.output_config.gcs_destination.output_uri_prefix

# List objects with the given prefix.
bucket = storage_client.get_bucket(bucket_name)
blobs = list(bucket.list_blobs(prefix=gcs_destination_prefix))



# ... previous code ...

if blobs:
    # The folder where you want to save the translated file
    destination_folder = "/content/TranslatedCaptions"

    # Get the name of the first blob (file) in the list
    blob_name = blobs[0].name

    # The full path should include the filename, for example:
    destination_file_name = os.path.join(destination_folder, "translated_file.xlsx")

    # Download the translated file
    blob = bucket.blob(blob_name)
    blob.download_to_filename(destination_file_name)

    print(f"Translated file downloaded to {destination_file_name}")

else:
    print("No files found with the specified prefix.")
"""

InternalServerError: ignored