In [1]:
!pip install webvtt-py
!pip install googletrans



In [2]:
import webvtt
from googletrans import Translator
import glob

___

In [3]:
import webvtt
from googletrans import Translator
import os

def translate_and_save_vtt(file_path, output_dir='translate_subtitle', dest_language='fa'):
    """
    Reads a VTT file, translates its content, and saves it to a new file.

    Args:
        file_path (str): The path to the source VTT subtitle file.
        output_dir (str): The directory to save the translated file in.
        dest_language (str): The destination language (e.g., 'fa' for Persian).
    """
    # --- 1. Validate Input and Prepare Output Path ---
    if not os.path.exists(file_path):
        print(f"SKIPPING: Source file not found at '{file_path}'")
        return

    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Get the base name of the input file to create a new name
    base_name = os.path.basename(file_path)
    new_file_name = base_name.replace('.vtt', '_fa.vtt')
    new_file_name = new_file_name.replace("original_subtitle", "translate_subtitle")
    output_path = os.path.join(output_dir, new_file_name)

    # --- 2. Translate the File ---
    print(f"Translating '{file_path}'.")
    translator = Translator()
    try:
        # Read the entire VTT file into a vtt object
        vtt = webvtt.read(file_path)

        # Loop through each caption and translate its text
        for caption in vtt:
            if caption.text.strip():  # Only translate if there's text
                translated_text = translator.translate(caption.text, dest=dest_language).text
                caption.text = translated_text
        
        # --- 3. Save the Translated File Correctly ---
        vtt.save(output_path)
        print(f"SUCCESS: Saved translated file to '{output_path}'")

    except Exception as e:
        print(f"ERROR: Failed to process {file_path}. Reason: {e}")

___

### --- HOW TO USE ---

In [4]:
# 1. Define the list of files you want to translate

# Specify the directory path and pattern for .srt files
directory_path = 'original_subtitle/*.vtt'

# Find all .srt files
files_to_translate = glob.glob(directory_path)
files_to_translate

['original_subtitle/4. Feature Scaling in Machine Learning Normalization vs Standardization Explained.vtt',
 'original_subtitle/2. Machine Learning Workflow Importing, Modeling, and Evaluating Your ML Model.vtt',
 'original_subtitle/3. Data Preprocessing Importance of Training-Test Split in ML Model Evaluation.vtt']

In [5]:
# 2. Loop through the list and translate each file
for file in files_to_translate:
    translate_and_save_vtt(file)
    print("-" * 20) # Separator for clarity

Translating 'original_subtitle/4. Feature Scaling in Machine Learning Normalization vs Standardization Explained.vtt'.
SUCCESS: Saved translated file to 'translate_subtitle/4. Feature Scaling in Machine Learning Normalization vs Standardization Explained_fa.vtt'
--------------------
Translating 'original_subtitle/2. Machine Learning Workflow Importing, Modeling, and Evaluating Your ML Model.vtt'.
SUCCESS: Saved translated file to 'translate_subtitle/2. Machine Learning Workflow Importing, Modeling, and Evaluating Your ML Model_fa.vtt'
--------------------
Translating 'original_subtitle/3. Data Preprocessing Importance of Training-Test Split in ML Model Evaluation.vtt'.
SUCCESS: Saved translated file to 'translate_subtitle/3. Data Preprocessing Importance of Training-Test Split in ML Model Evaluation_fa.vtt'
--------------------
