In [6]:
import os
import re
from googletrans import Translator
from concurrent.futures import ThreadPoolExecutor, as_completed
import logging
import time

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Set retry delay and maximum retries
RETRY_DELAY = 2  # Time to wait before retrying
MAX_RETRIES = 3  # Maximum number of retries for translation failures

def translate_json_files(input_folder, output_folder, target_language):
    os.makedirs(output_folder, exist_ok=True)

    logging.info('Started translation process.')

    def translate_text(text, filename, retries=MAX_RETRIES, chunk_size=4000):
        """
        This function handles the translation of text with retries on failure.
        """
        translator = Translator()  # Initialize Translator for each file
        
        for attempt in range(retries):
            try:
                # Split the text into chunks for translation
                chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
                translated_chunks = [translator.translate(chunk, dest=target_language).text for chunk in chunks]
                return " ".join(translated_chunks)  # Return concatenated translated text
            except Exception as e:
                logging.error(f"Error translating '{filename}' on attempt {attempt + 1}/{retries}: {e}")
                if attempt < retries - 1:
                    logging.info(f"Retrying in {RETRY_DELAY} seconds...")
                    time.sleep(RETRY_DELAY)  # Wait before retrying
                else:
                    logging.error(f"Failed to translate '{filename}' after {retries} attempts.")
                    return False  # Return None if all retries fail

    def preserve_markdown_formatting(text, filename):
        """
        This function replaces parts of the markdown text with placeholders,
        translates the text, and then puts everything back together.
        """
        # Define regular expressions for Markdown elements to preserve
        patterns = {
            'latex_block': r'\\\[.*?\\\]',  # Skip LaTeX math block: \[ text \]
        }

        # Find all Markdown elements and replace them with placeholders
        placeholders = {}
        for key, pattern in patterns.items():
            matches = re.findall(pattern, text)
            for i, match in enumerate(matches):
                placeholder = f"__{key}_{i}__"
                placeholders[placeholder] = match
                text = text.replace(match, placeholder)

        # Translate the remaining plain text
        translated_text = translate_text(text, filename)

        # Replace the placeholders with the original Markdown elements
        if translated_text:
            for placeholder, original in placeholders.items():
                translated_text = translated_text.replace(placeholder, original)
        
        return translated_text

    def translate_file(filename):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)
        
        try:
            with open(input_path, 'r', encoding='utf-8') as f:
                data = f.read()
            
            logging.info(f"Translating {filename}")
            
            # Preserve Markdown formatting (including MathJax) and translate
            translated_data = preserve_markdown_formatting(data, filename)
            
            if translated_data:
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(translated_data)
                logging.info(f"{filename} translated and saved to {output_path}")
            else:
                logging.error(f"Failed to translate {filename}")
        except Exception as e:
            logging.error(f"Error reading or processing '{filename}': {e}")

    files_to_translate = [filename for filename in os.listdir(input_folder) if filename.endswith(".md")]

    # Set max_workers to a reasonable value (e.g., 10 or 20) to avoid overloading the system/API
    with ThreadPoolExecutor(max_workers=200) as executor:
        futures = []
        
        # Submit tasks for each file
        for filename in files_to_translate:
            future = executor.submit(translate_file, filename)
            futures.append(future)
        
        # Process completed futures as they finish
        for future in as_completed(futures):
            try:
                future.result()  # Retrieve result or catch exceptions
            except Exception as e:
                logging.error(f"An error occurred: {e}")

# Example usage
# input_folder = "/path/to/input/folder"
# output_folder = "/path/to/output/folder"
# target_language = "es"  # For Spanish
# translate_json_files(input_folder, output_folder, target_language)


In [None]:
import os

locales = ['ta', 'hi', 'es', 'de', 'ru', 'ja', 'fr', 'pt', 'zh-CN', 'ar', 'it', 'ko', 'te', 'kn']
locales = ['hi', 'es', 'de', 'ru', 'ja', 'fr', 'pt', 'zh-CN', 'ar', 'it', 'ko', 'te', 'kn']
base_folder = "/Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/unit-converter"
input_base_folder = os.path.join(base_folder, 'en')

# Function to process files
def process_file(input_file_path, output_folder_path, target_language):
    # Add your code to handle translation here
    translate_json_files(input_file_path, output_folder_path, target_language)

# Function to recursively navigate through directories
def process_folders(input_folder, output_folder, locale):
    for root, dirs, files in os.walk(input_folder):
        for file in files:
            input_file_path = os.path.join(root, file)
            # Create corresponding output folder if not exists
            relative_path = os.path.relpath(input_file_path, input_folder)
            output_file_path = os.path.join(output_folder, relative_path)
            os.makedirs(os.path.dirname(output_file_path), exist_ok=True)
            # Process each file
            process_file(input_file_path, output_file_path, locale)

for locale in locales:
    print(f'Started processing locale: {locale}')

    localefolder = 'zh' if locale == 'zh-CN' else locale
    # Process folders inside the 'en' directory
    for input_folder in os.listdir(input_base_folder):
        # Skip 'soldiers' folder (or any folder you wish to exclude)
        if input_folder.lower() in ['acceleration', 'angle']:
            print(f"Skipping folder: {input_folder}")
            continue

        # if input_folder.lower() in ['density']:
        input_folder_path = os.path.join(input_base_folder, input_folder)
            
            # Only process directories inside 'en'
        if os.path.isdir(input_folder_path):
            output_folder_path = os.path.join(base_folder, f'{localefolder}/{input_folder}')
            target_language = locale
            translate_json_files(input_folder_path, output_folder_path, target_language)
        # else:
        #     output_folder_path = os.path.join(base_folder, f'{localefolder}')
        #     target_language = locale
        #     translate_json_files(input_folder_path, output_folder_path, target_language)



    
    print(f'Finished processing locale: {locale}')


In [7]:
import os
locales = ['hi', 'es', 'de', 'ru', 'ja', 'fr', 'pt', 'zh-CN', 'ar', 'it', 'ko', 'te', 'kn']
base_folder = "/Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator"
for locale in locales:
    print('started ', locale)
    localefolder = 'zh' if locale == 'zh-CN' else locale

    input_folder = os.path.join(base_folder, 'en')
    output_folder = os.path.join(base_folder, f'{localefolder}')
    target_language = locale
    translate_json_files(input_folder, output_folder, target_language)
    print('started ', locale)



2025-03-22 10:30:58,464 - INFO - Started translation process.
2025-03-22 10:30:58,466 - INFO - Translating telepen.md
2025-03-22 10:30:58,467 - INFO - Translating plessey.md
2025-03-22 10:30:58,467 - INFO - Translating ultracode.md
2025-03-22 10:30:58,467 - INFO - Translating upce.md
2025-03-22 10:30:58,467 - INFO - Translating gs1datamatrixrectangular.md
2025-03-22 10:30:58,468 - INFO - Translating posicode.md
2025-03-22 10:30:58,468 - INFO - Translating qr-code-note.md
2025-03-22 10:30:58,468 - INFO - Translating swissqrcode.md
2025-03-22 10:30:58,468 - INFO - Translating pharmacode2.md
2025-03-22 10:30:58,469 - INFO - Translating japanpost.md
2025-03-22 10:30:58,470 - INFO - Translating hanxin.md
2025-03-22 10:30:58,470 - INFO - Translating upca.md
2025-03-22 10:30:58,470 - INFO - Translating interleaved2of5.md
2025-03-22 10:30:58,470 - INFO - Translating telepennumeric.md
2025-03-22 10:30:58,470 - INFO - Translating social-media-qr-code.md
2025-03-22 10:30:58,470 - INFO - Translati

started  hi


2025-03-22 10:30:58,535 - INFO - Translating ean8composite.md
2025-03-22 10:30:58,535 - INFO - Translating hibcazteccode.md
2025-03-22 10:30:58,536 - INFO - Translating rationalizedCodabar.md
2025-03-22 10:30:58,537 - INFO - Translating databarexpandedstacked.md
2025-03-22 10:30:58,538 - INFO - Translating 5_privacy_term.md
2025-03-22 10:30:58,539 - INFO - Translating gs1dotcode.md
2025-03-22 10:30:58,539 - INFO - Translating issn.md
2025-03-22 10:30:58,540 - INFO - Translating instagram-qr-code.md
2025-03-22 10:30:58,541 - INFO - Translating code49.md
2025-03-22 10:30:58,541 - INFO - Translating hibccode39.md
2025-03-22 10:30:58,542 - INFO - Translating databarexpandedstackedcomposite.md
2025-03-22 10:30:58,542 - INFO - Translating hibcmicropdf417.md
2025-03-22 10:30:58,543 - INFO - Translating leitcode.md
2025-03-22 10:30:58,543 - INFO - Translating databaromnicomposite.md
2025-03-22 10:30:58,544 - INFO - Translating ean2.md
2025-03-22 10:30:58,544 - INFO - Translating code39.md
2025

started  hi
started  es


2025-03-22 10:31:12,933 - INFO - telepennumeric.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/es/telepennumeric.md
2025-03-22 10:31:15,267 - INFO - 3_types.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/es/3_types.md
2025-03-22 10:31:15,682 - INFO - 2_intro_history.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/es/2_intro_history.md
2025-03-22 10:31:16,753 - ERROR - Error translating 'mailmark.md' on attempt 1/3: The read operation timed out
2025-03-22 10:31:16,753 - ERROR - Error translating 'swissqrcode.md' on attempt 1/3: The read operation timed out
2025-03-22 10:31:16,753 - INFO - Retrying in 2 seconds...
2025-03-22 10:31:16,753 - INFO - Retrying in 2 seconds...
2025-03-22 10:31:16,760 - ERROR - Error translating 'code39ext.md' on attempt 1/3: The read operation timed out
2025-03-22 10:31:16,760 - INFO - Retrying in 2 seconds...
202

started  es
started  de


2025-03-22 10:31:23,669 - INFO - Translating qr-code-app-store.md
2025-03-22 10:31:23,669 - INFO - Translating code93ext.md
2025-03-22 10:31:23,669 - INFO - Translating 1_tableof_content.md
2025-03-22 10:31:23,669 - INFO - Translating channelcode.md
2025-03-22 10:31:23,670 - INFO - Translating ean13composite.md
2025-03-22 10:31:23,670 - INFO - Translating databartruncated.md
2025-03-22 10:31:23,671 - INFO - Translating hibcqrcode.md
2025-03-22 10:31:23,671 - INFO - Translating datamatrix.md
2025-03-22 10:31:23,671 - INFO - Translating identcode.md
2025-03-22 10:31:23,672 - INFO - Translating symbol.md
2025-03-22 10:31:23,672 - INFO - Translating qrcode.md
2025-03-22 10:31:23,672 - INFO - Translating databarexpandedcomposite.md
2025-03-22 10:31:23,673 - INFO - Translating daft.md
2025-03-22 10:31:23,674 - INFO - Translating qr-code-vcard.md
2025-03-22 10:31:23,674 - INFO - Translating ean8composite.md
2025-03-22 10:31:23,674 - INFO - Translating hibcazteccode.md
2025-03-22 10:31:23,674 

started  de
started  ru


2025-03-22 10:31:39,064 - INFO - 3_types.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ru/3_types.md
2025-03-22 10:31:39,541 - INFO - 4_uses.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ru/4_uses.md
2025-03-22 10:31:40,120 - INFO - telepennumeric.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ru/telepennumeric.md
2025-03-22 10:31:40,320 - INFO - qr-code-app-store.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ru/qr-code-app-store.md
2025-03-22 10:31:40,853 - INFO - 5_privacy_term.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ru/5_privacy_term.md
2025-03-22 10:31:41,159 - INFO - 2_intro_history.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ru/2_intro_history.md
2025-03-22 10:31:41,187 - 

started  ru
started  ja


2025-03-22 10:31:50,622 - INFO - telepennumeric.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ja/telepennumeric.md
2025-03-22 10:31:51,571 - INFO - 3_types.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ja/3_types.md
2025-03-22 10:31:53,363 - ERROR - Error translating 'qr-code-note.md' on attempt 1/3: The read operation timed out
2025-03-22 10:31:53,363 - INFO - Retrying in 2 seconds...
2025-03-22 10:31:53,478 - ERROR - Error translating 'posicode.md' on attempt 1/3: The read operation timed out
2025-03-22 10:31:53,478 - INFO - Retrying in 2 seconds...
2025-03-22 10:31:53,488 - INFO - 1_tableof_content.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ja/1_tableof_content.md
2025-03-22 10:31:53,624 - ERROR - Error translating 'code2of5.md' on attempt 1/3: The read operation timed out
2025-03-22 10:31:53,624 - INFO - Retrying in 2 seconds...

started  ja
started  fr


2025-03-22 10:32:03,758 - INFO - 3_types.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/fr/3_types.md
2025-03-22 10:32:04,186 - INFO - telepennumeric.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/fr/telepennumeric.md
2025-03-22 10:32:04,308 - INFO - 4_uses.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/fr/4_uses.md
2025-03-22 10:32:04,933 - INFO - qr-code-app-store.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/fr/qr-code-app-store.md
2025-03-22 10:32:05,576 - ERROR - Error translating 'postnet.md' on attempt 1/3: The read operation timed out
2025-03-22 10:32:05,576 - INFO - Retrying in 2 seconds...
2025-03-22 10:32:05,602 - ERROR - Error translating 'gs1datamatrixrectangular.md' on attempt 1/3: The read operation timed out
2025-03-22 10:32:05,602 - INFO - Retrying in 2 seconds...
2025-03

started  fr
started  pt


2025-03-22 10:32:12,621 - INFO - Translating gs1northamericancoupon.md
2025-03-22 10:32:12,621 - INFO - Translating azteccode.md
2025-03-22 10:32:12,623 - INFO - Translating isbn.md
2025-03-22 10:32:13,936 - INFO - telepennumeric.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/pt/telepennumeric.md
2025-03-22 10:32:16,893 - INFO - 3_types.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/pt/3_types.md
2025-03-22 10:32:17,261 - INFO - databartruncatedcomposite.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/pt/databartruncatedcomposite.md
2025-03-22 10:32:17,267 - INFO - upcecomposite.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/pt/upcecomposite.md
2025-03-22 10:32:17,282 - INFO - msi.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/pt/msi.md
2025

started  pt
started  zh-CN


2025-03-22 10:32:24,029 - INFO - Translating qr-code-vcard.md
2025-03-22 10:32:24,030 - INFO - Translating daft.md
2025-03-22 10:32:24,030 - INFO - Translating hibcazteccode.md
2025-03-22 10:32:24,031 - INFO - Translating ean8composite.md
2025-03-22 10:32:24,031 - INFO - Translating databarexpandedstacked.md
2025-03-22 10:32:24,032 - INFO - Translating rationalizedCodabar.md
2025-03-22 10:32:24,032 - INFO - Translating 5_privacy_term.md
2025-03-22 10:32:24,032 - INFO - Translating code49.md
2025-03-22 10:32:24,033 - INFO - Translating gs1dotcode.md
2025-03-22 10:32:24,033 - INFO - Translating issn.md
2025-03-22 10:32:24,034 - INFO - Translating instagram-qr-code.md
2025-03-22 10:32:24,035 - INFO - Translating hibccode39.md
2025-03-22 10:32:24,035 - INFO - Translating databarexpandedstackedcomposite.md
2025-03-22 10:32:24,035 - INFO - Translating hibcmicropdf417.md
2025-03-22 10:32:24,036 - INFO - Translating leitcode.md
2025-03-22 10:32:24,037 - INFO - Translating databaromnicomposite.

started  zh-CN
started  ar


2025-03-22 10:32:38,401 - INFO - telepennumeric.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ar/telepennumeric.md
2025-03-22 10:32:39,952 - INFO - 3_types.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ar/3_types.md
2025-03-22 10:32:41,008 - ERROR - Error translating 'swissqrcode.md' on attempt 1/3: The read operation timed out
2025-03-22 10:32:41,010 - INFO - Retrying in 2 seconds...
2025-03-22 10:32:41,057 - ERROR - Error translating 'databarstacked.md' on attempt 1/3: The read operation timed out
2025-03-22 10:32:41,057 - INFO - Retrying in 2 seconds...
2025-03-22 10:32:41,095 - ERROR - Error translating 'social-media-qr-code.md' on attempt 1/3: The read operation timed out
2025-03-22 10:32:41,096 - INFO - Retrying in 2 seconds...
2025-03-22 10:32:41,097 - ERROR - Error translating 'gs1dldatamatrix.md' on attempt 1/3: The read operation timed out
2025-03-22 10:32:41,097 - ERROR - Er

started  ar
started  it


2025-03-22 10:32:47,084 - INFO - Translating qr-code-vcard.md
2025-03-22 10:32:47,084 - INFO - Translating qrcode.md
2025-03-22 10:32:47,084 - INFO - Translating ean8composite.md
2025-03-22 10:32:47,084 - INFO - Translating daft.md
2025-03-22 10:32:47,085 - INFO - Translating hibcazteccode.md
2025-03-22 10:32:47,085 - INFO - Translating rationalizedCodabar.md
2025-03-22 10:32:47,085 - INFO - Translating databarexpandedstacked.md
2025-03-22 10:32:47,087 - INFO - Translating code49.md
2025-03-22 10:32:47,089 - INFO - Translating 5_privacy_term.md
2025-03-22 10:32:47,090 - INFO - Translating gs1dotcode.md
2025-03-22 10:32:47,092 - INFO - Translating issn.md
2025-03-22 10:32:47,094 - INFO - Translating hibccode39.md
2025-03-22 10:32:47,095 - INFO - Translating instagram-qr-code.md
2025-03-22 10:32:47,096 - INFO - Translating hibcmicropdf417.md
2025-03-22 10:32:47,096 - INFO - Translating databarexpandedstackedcomposite.md
2025-03-22 10:32:47,097 - INFO - Translating databaromnicomposite.md

started  it
started  ko


2025-03-22 10:32:59,764 - INFO - telepennumeric.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ko/telepennumeric.md
2025-03-22 10:32:59,967 - ERROR - Error translating 'japanpost.md' on attempt 1/3: the JSON object must be str, bytes or bytearray, not NoneType
2025-03-22 10:32:59,967 - INFO - Retrying in 2 seconds...
2025-03-22 10:33:00,540 - INFO - pharmacode2.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ko/pharmacode2.md
2025-03-22 10:33:01,383 - INFO - 4_uses.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ko/4_uses.md
2025-03-22 10:33:01,422 - INFO - 3_types.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ko/3_types.md
2025-03-22 10:33:02,479 - INFO - 2_intro_history.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/ko/2_intro_history.md
2

started  ko
started  te


2025-03-22 10:33:11,154 - INFO - Translating databaromni.md
2025-03-22 10:33:11,154 - INFO - Translating micropdf417.md
2025-03-22 10:33:11,154 - INFO - Translating code32.md
2025-03-22 10:33:11,155 - INFO - Translating pdf417compact.md
2025-03-22 10:33:11,156 - INFO - Translating databarstackedomnicomposite.md
2025-03-22 10:33:11,156 - INFO - Translating gs1-128.md
2025-03-22 10:33:11,157 - INFO - Translating pdf417.md
2025-03-22 10:33:11,157 - INFO - Translating databarlimited.md
2025-03-22 10:33:11,158 - INFO - Translating datamatrixrectangular.md
2025-03-22 10:33:11,158 - INFO - Translating flattermarken.md
2025-03-22 10:33:11,160 - INFO - Translating raw.md
2025-03-22 10:33:11,160 - INFO - Translating ean14.md
2025-03-22 10:33:11,160 - INFO - Translating pzn.md
2025-03-22 10:33:11,160 - INFO - Translating industrial2of5.md
2025-03-22 10:33:11,161 - INFO - Translating rectangularmicroqrcode.md
2025-03-22 10:33:11,161 - INFO - Translating azteccodecompact.md
2025-03-22 10:33:11,164 

started  te
started  kn


2025-03-22 10:33:26,652 - INFO - telepennumeric.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/kn/telepennumeric.md
2025-03-22 10:33:27,018 - ERROR - Error translating 'databarstacked.md' on attempt 1/3: the JSON object must be str, bytes or bytearray, not NoneType
2025-03-22 10:33:27,018 - ERROR - Error translating 'japanpost.md' on attempt 1/3: the JSON object must be str, bytes or bytearray, not NoneType
2025-03-22 10:33:27,018 - INFO - Retrying in 2 seconds...
2025-03-22 10:33:27,019 - INFO - Retrying in 2 seconds...
2025-03-22 10:33:27,027 - INFO - interleaved2of5.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/kn/interleaved2of5.md
2025-03-22 10:33:27,066 - INFO - upca.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inayam-doc/scanner-code-generator/kn/upca.md
2025-03-22 10:33:27,555 - INFO - telepen.md translated and saved to /Users/rakesh.c/rakesh/my/am/inayam/inaya

started  kn
