# 1. With Python Script 

In [1]:
import os
import cv2
import pytesseract
from langdetect import detect
import requests


# --- PROCESSING ---
img_folder = "part_2_images"
output_folder = "tts_outputs"
os.makedirs(output_folder, exist_ok=True)

for img_file in os.listdir(img_folder):
    if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
        img_path = os.path.join(img_folder, img_file)
        img = cv2.imread(img_path)
        text = pytesseract.image_to_string(img)
        print(f"\nText from {img_file}:\n{text}")

        if text.strip():
            try:
                lang = detect(text)
                print(f"Detected language: {lang}")

                # --- LiveKit TTS API Call ---
                tts_payload = {
                    "text": text,
                    "voice": "default",  # or specify a voice if LiveKit supports it
                    "lang": lang
                }
                headers = {"Authorization": f"Bearer {LIVEKIT_API_KEY}"}
                response = requests.post(LIVEKIT_TTS_URL, json=tts_payload, headers=headers)
                if response.status_code == 200:
                    audio_path = os.path.join(output_folder, f"{os.path.splitext(img_file)[0]}_{lang}.mp3")
                    with open(audio_path, "wb") as f:
                        f.write(response.content)
                    print(f"TTS audio saved: {audio_path}")
                else:
                    print(f"LiveKit TTS error: {response.status_code} {response.text}")

            except Exception as e:
                print(f"Error processing {img_file}: {e}")
        else:
            print(f"No text detected in {img_file}.")

TesseractNotFoundError: tesseract is not installed or it's not in your PATH. See README file for more information.

# Groq + Live Kit

In [None]:
import os
import cv2
import pytesseract
import requests

# Load secrets from .env or set directly
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
GROQ_API_KEY = "your_groq_api_key"
LIVEKIT_TTS_URL = "https://your-livekit-server.com/tts"
LIVEKIT_API_KEY = "your_livekit_api_key"

img_folder = "part_2_images"
output_folder = "tts_outputs"
os.makedirs(output_folder, exist_ok=True)

def detect_language_groq(text):
    prompt = f"Detect the language of the following text and respond with only the ISO 639-1 language code:\n\n{text}"
    headers = {"Authorization": f"Bearer {GROQ_API_KEY}"}
    data = {
        "model": "llama3-8b-8192",  # Or another Groq-supported model
        "messages": [{"role": "user", "content": prompt}]
    }
    response = requests.post(GROQ_API_URL, headers=headers, json=data)
    lang_code = response.json()["choices"][0]["message"]["content"].strip()
    return lang_code

for img_file in os.listdir(img_folder):
    if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
        img_path = os.path.join(img_folder, img_file)
        img = cv2.imread(img_path)
        text = pytesseract.image_to_string(img)
        print(f"\nText from {img_file}:\n{text}")

        if text.strip():
            try:
                lang = detect_language_groq(text)
                print(f"Detected language (Groq): {lang}")

                # --- LiveKit TTS API Call ---
                tts_payload = {
                    "text": text,
                    "voice": "default",
                    "lang": lang
                }
                headers = {"Authorization": f"Bearer {LIVEKIT_API_KEY}"}
                response = requests.post(LIVEKIT_TTS_URL, json=tts_payload, headers=headers)
                if response.status_code == 200:
                    audio_path = os.path.join(output_folder, f"{os.path.splitext(img_file)[0]}_{lang}.mp3")
                    with open(audio_path, "wb") as f:
                        f.write(response.content)
                    print(f"TTS audio saved: {audio_path}")
                else:
                    print(f"LiveKit TTS error: {response.status_code} {response.text}")

            except Exception as e:
                print(f"Error processing {img_file}: {e}")
        else:
            print(f"No text detected in {img_file}.")