In [7]:
import os
import re
import google.generativeai as genai

In [9]:
# ✅ Initialize Gemini client
api_key = os.getenv('GEMINI_API_KEY')
genai.configure(api_key=api_key)

In [10]:
# Input & Output directories
input_dir = "converted_html"
output_dir = "converted_html_arabic_gemini"
os.makedirs(output_dir, exist_ok=True)

def extract_and_replace_images(html_content):
    """Replace <img> tags with placeholders and return (processed_html, images_dict)."""
    images = {}
    def replacer(match):
        placeholder = f"[[IMG_TAG_{len(images)}]]"
        images[placeholder] = match.group(0)
        return placeholder
    processed_html = re.sub(r"<img[^>]*>", replacer, html_content, flags=re.IGNORECASE)
    return processed_html, images

def restore_images(translated_html, images):
    """Put back the original <img> tags in the translated content."""
    for placeholder, img_tag in images.items():
        translated_html = translated_html.replace(placeholder, img_tag)
    return translated_html

def enforce_rtl(html):
    """Force <html> tag to have lang='ar' and dir='rtl'."""
    html = re.sub(
        r"<html[^>]*>",
        '<html lang="ar" dir="rtl">',
        html,
        count=1,
        flags=re.IGNORECASE,
    )
    return html

def translate_to_arabic(text):
    """Send text to Gemini for Arabic translation with RTL enforcement."""
    prompt = f"""
Translate the following HTML content into **Arabic**.  
⚠️ Important formatting rules:
- Keep the full HTML structure unchanged.  
- Translate ONLY visible text inside tags.  
- Do not touch HTML attributes, placeholders like [[IMG_TAG_#]], or CSS.  
- Ensure the <html> tag becomes: <html lang="ar" dir="rtl">.  
- Keep tables, lists, and inline formatting intact.  

Example of expected output:
<!DOCTYPE html>
<html lang="ar" dir="rtl">
...
</html>

Content to translate:
{text}
"""
    model = genai.GenerativeModel("gemini-1.5-flash")
    response = model.generate_content(prompt)
    return response.text.strip()

# 🚀 Walk through chapters
for chapter in sorted(os.listdir(input_dir)):
    chapter_path = os.path.join(input_dir, chapter)
    if os.path.isdir(chapter_path):  # Only process folders (chap0, chap1, ...)
        out_chapter_path = os.path.join(output_dir, chapter)
        os.makedirs(out_chapter_path, exist_ok=True)

        for filename in os.listdir(chapter_path):
            if filename.endswith(".html"):
                input_file = os.path.join(chapter_path, filename)
                output_file = os.path.join(out_chapter_path, filename)

                with open(input_file, "r", encoding="utf-8") as f:
                    html_content = f.read()

                # 🚨 Extract and replace <img> tags
                safe_html, images = extract_and_replace_images(html_content)

                # 🌍 Translate with Gemini
                translated_html = translate_to_arabic(safe_html)

                # 🔄 Restore <img> tags
                final_html = restore_images(translated_html, images)

                # ✅ Ensure RTL alignment in <html>
                final_html = enforce_rtl(final_html)

                # Save Arabic version
                with open(output_file, "w", encoding="utf-8") as f:
                    f.write(final_html)

                print(f"✅ Translated: {input_file} → {output_file}")

print("🎉 All files translated into Arabic (RTL enforced, images preserved) using Gemini!")

✅ Translated: converted_html/chap0/Introduction to Forex Markets.html → converted_html_arabic_gemini/chap0/Introduction to Forex Markets.html
✅ Translated: converted_html/chap0/Key Concepts.html → converted_html_arabic_gemini/chap0/Key Concepts.html
✅ Translated: converted_html/chap0/Market Participants.html → converted_html_arabic_gemini/chap0/Market Participants.html
✅ Translated: converted_html/chap0/Global Market Influences.html → converted_html_arabic_gemini/chap0/Global Market Influences.html
✅ Translated: converted_html/chap02/Demo Accounts and Real Accounts.html → converted_html_arabic_gemini/chap02/Demo Accounts and Real Accounts.html
✅ Translated: converted_html/chap02/Choosing a Broker.html → converted_html_arabic_gemini/chap02/Choosing a Broker.html
✅ Translated: converted_html/chap02/Types of Forex Brokers.html → converted_html_arabic_gemini/chap02/Types of Forex Brokers.html
✅ Translated: converted_html/chap02/Trading Platforms and Tools.html → converted_html_arabic_gemin

ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 18
}
]