# Paddle OCR

In [None]:
import os
from pathlib import Path
from paddleocr import PaddleOCR
from groq import Groq
from playsound import playsound
import requests

# --- Set up ---
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_API_URL = os.getenv(
    "GROQ_API_URL", "https://api.groq.com/openai/v1/chat/completions"
)
images_folder = "part_2_images"
image_extensions = [".jpg", ".jpeg", ".png", ".bmp", ".tiff"]

# PaddleOCR for English and Arabic (Urdu best handled by Arabic model)
ocr_en = PaddleOCR(use_angle_cls=True, lang="en")
ocr_ar = PaddleOCR(use_angle_cls=True, lang="ar")

def extract_text(image_path):
    result_en = ocr_en.ocr(image_path)
    text_en = (
        " ".join([line[1][0] for line in result_en[0]])
        if result_en and result_en[0]
        else ""
    )
    result_ar = ocr_ar.ocr(image_path)
    text_ar = (
        " ".join([line[1][0] for line in result_ar[0]])
        if result_ar and result_ar[0]
        else ""
    )
    return text_ar if len(text_ar) > len(text_en) else text_en

def detect_language_groq(text):
    if not GROQ_API_KEY:
        raise RuntimeError("Set your GROQ_API_KEY environment variable!")
    prompt = f"Detect the language of the following text and respond with only the ISO 639-1 language code:\n\n{text}"
    headers = {"Authorization": f"Bearer {GROQ_API_KEY}"}
    data = {
        "model": "llama3-8b-8192",
        "messages": [{"role": "user", "content": prompt}],
    }
    response = requests.post(GROQ_API_URL, headers=headers, json=data)
    response.raise_for_status()
    lang_code = response.json()["choices"][0]["message"]["content"].strip()
    return lang_code

def tts_play(text, wav_path="tts_outputs/speech.wav"):
    print("Speaking with Groq TTS (Adelaide-PlayAI)...")
    client = Groq(api_key=GROQ_API_KEY)
    response = client.audio.speech.create(
        model="playai-tts",
        voice="Adelaide-PlayAI",
        response_format="wav",
        input=text,
    )
    # Ensure output directory exists
    speech_file_path = Path(wav_path)
    speech_file_path.parent.mkdir(parents=True, exist_ok=True)
    with open(speech_file_path, "wb") as f:
        f.write(response.content)
    playsound(str(speech_file_path))

for img_file in os.listdir(images_folder):
    if not any(img_file.lower().endswith(ext) for ext in image_extensions):
        continue
    img_path = os.path.join(images_folder, img_file)
    print(f"\n--- Processing: {img_file} ---")
    text = extract_text(img_path)
    if not text.strip():
        print("No text detected.")
        continue
    try:
        lang = detect_language_groq(text)
    except Exception as e:
        print(f"Language detection failed: {e}")
        lang = "unknown"
    print(f"Detected language (Groq): {lang}")
    print(f"Extracted text: {text}")
    # Save each speech file with a unique name per image
    wav_path = f"tts_outputs/{Path(img_file).stem}_{lang}.wav"
    tts_play(text, wav_path=wav_path)