In [None]:
pip install pytesseract

In [None]:
!sudo apt install tesseract-ocr-ben

In [None]:
pip install pytesseract google-generativeai opencv-python-headless

In [61]:
import os
import io
import json
import logging
from typing import List, Dict
import cv2
import numpy as np
from PIL import Image
import google.generativeai as genai
import pytesseract

In [62]:
API_KEY = os.getenv("GOOGLE_API_KEY", "<your api key>")
if not API_KEY or API_KEY.startswith("<API-KEY>"):
    raise RuntimeError("Please set your GOOGLE_API_KEY environment variable.")
os.environ["GOOGLE_API_KEY"] = API_KEY

genai.configure(api_key=API_KEY)
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")

In [63]:
def validate_image_path(img_path: str) -> None:
    if not os.path.exists(img_path):
        raise FileNotFoundError(f"Image file not found: {img_path}")
    try:
        Image.open(img_path).verify()
    except Exception as e:
        raise ValueError(f"Invalid image file: {e}")

def safe_resize(img: np.ndarray, scale: float = 1.5) -> np.ndarray:
    if img is None or img.size == 0:
        raise ValueError("Invalid image input for resizing")

    height, width = img.shape[:2]
    new_size = (int(width * scale), int(height * scale))
    return cv2.resize(img, new_size, interpolation=cv2.INTER_CUBIC)

In [64]:
def preprocess_image(img_path: str) -> Image.Image:
    try:
        img = cv2.imread(img_path)
        if img is None:
            raise ValueError("Failed to read image with OpenCV")
        img = safe_resize(img, 1.5)

        lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
        l_clahe = clahe.apply(l)
        enhanced = cv2.merge([l_clahe, a, b])
        enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2BGR)

        gray = cv2.cvtColor(enhanced, cv2.COLOR_BGR2GRAY)
        thresh = cv2.adaptiveThreshold(gray, 255,
            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY, 11, 2)

        return Image.fromarray(thresh)

    except Exception as e:
        logging.error(f"Preprocessing failed: {e}")
        return Image.open(img_path)

In [65]:
def extract_qa_from_image(img_path: str, out_json: str = "qa_output.json"):
    try:
        img = Image.open(img_path)
    except IOError as e:
        raise ValueError(f"Invalid image file: {e}")
    model = genai.GenerativeModel('models/gemini-2.0-flash')
    prompt = (
        "Extract Bengali text from this exam sheet with exact formatting. Follow:\n"
        "1. Identify question numbers (Bengali digits)\n"
        "2. Preserve original Bengali text for questions\n"
        "3. Options labeled with ক, খ, গ, ঘ\n"
        "Return STRICT JSON format:\n"
        "{\"questions\": [{\"question\": \"...\", \"options\": [\"ক. ...\", ...]}]}\n"
        "No English translations. Maintain original Bengali punctuation."
    )

    try:
        response = model.generate_content([prompt, img])
        cleaned_response = response.text.replace("```json", "").replace("```", "").strip()
        data = json.loads(cleaned_response)

        if "questions" not in data or not isinstance(data["questions"], list):
            raise ValueError("Invalid response structure")

    except Exception as e:
        raise RuntimeError(f"Extraction failed: {e}")

    with open(out_json, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    return data

In [66]:
if __name__ == "__main__":
    image_path = "/content/1.jpg"
    output_file = "qa_output.json"
    result = extract_qa_from_image(image_path, output_file)
    print(f"Successfully extracted {len(result['questions'])} questions")

Successfully extracted 25 questions
