#Cell 1: ‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤ API ‡πÅ‡∏•‡∏∞ Import

!pip install -q -U google-generativeai

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output, Markdown
import os
import google.generativeai as genai
from PIL import Image
import traceback
import cv2
import numpy as np

GOOGLE_API_KEY = "your api key"

try:
    genai.configure(api_key=GOOGLE_API_KEY)
    model = genai.GenerativeModel('gemini-2.0-flash')
    print("Setup Complete.")
except Exception as e:
    print(f"Error: {e}")

Setup Complete.


#Cell 2: ‡∏ü‡∏±‡∏á‡∏Å‡πå‡∏ä‡∏±‡∏ô‡∏Ñ‡πâ‡∏ô‡∏´‡∏≤‡πÑ‡∏ü‡∏•‡πå‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û

In [49]:
def get_all_image_paths():
    image_extensions = ('.png', '.jpg', '.jpeg', '.tiff', '.bmp')
    files_found = []
    for root, dirs, files in os.walk("."):
        if '.ipynb_checkpoints' in root: continue
        for file in files:
            if file.lower().endswith(image_extensions):
                clean_path = os.path.join(root, file).replace("./", "").replace(".\\", "")
                files_found.append(clean_path)
    return sorted(files_found)

all_files = get_all_image_paths()

#Cell 3: ‡∏ü‡∏±‡∏á‡∏Å‡πå‡∏ä‡∏±‡∏ô‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏• (‡∏™‡πà‡∏á‡∏£‡∏π‡∏õ‡πÑ‡∏õ Gemini)

In [50]:
def manual_crop_image(image_path):
    try:
        stream = open(image_path, "rb")
        bytes = bytearray(stream.read())
        numpyarray = np.asarray(bytes, dtype=np.uint8)
        cv_img = cv2.imdecode(numpyarray, cv2.IMREAD_UNCHANGED)
        stream.close()

        if cv_img is None:
            return Image.open(image_path), False

        height, width = cv_img.shape[:2]
        max_height = 800
        scale_ratio = 1.0
        display_img = cv_img.copy()

        if height > max_height:
            scale_ratio = max_height / height
            new_width = int(width * scale_ratio)
            new_height = int(height * scale_ratio)
            display_img = cv2.resize(cv_img, (new_width, new_height))

        rect = cv2.selectROI("Select Area (Enter to Confirm)", display_img, showCrosshair=True, fromCenter=False)
        cv2.destroyAllWindows()

        if rect == (0, 0, 0, 0):
            return Image.open(image_path), False

        x = int(rect[0] / scale_ratio)
        y = int(rect[1] / scale_ratio)
        w = int(rect[2] / scale_ratio)
        h = int(rect[3] / scale_ratio)

        x = max(0, x); y = max(0, y)
        w = min(w, width - x); h = min(h, height - y)

        cropped = cv_img[y:y+h, x:x+w]
        cropped_rgb = cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB)
        return Image.fromarray(cropped_rgb), True

    except Exception:
        try: cv2.destroyAllWindows()
        except: pass
        return Image.open(image_path), False

#cell4(Process Logic)

In [None]:
def process_images_with_gemini(file_names_str, enable_manual_crop=False, custom_topic=""):
    clear_output(wait=True)
    file_names = [name.strip() for name in file_names_str.replace(';', ',').split(',') if name.strip()]
    merged_text = ""
    output_file = "gemini_ocr_output.txt"


    base_prompt = """
    Act as an OCR expert. Read all text in this image and convert it to text.
    Rules:
    1. If there are multiple columns, read from left to right.
    2. Preserve formatting (headings, paragraphs) using Markdown.
    3. Return only the text content.
    """

    if custom_topic and custom_topic.strip() != "":
        final_prompt = f"{base_prompt}\n\nIMPORTANT - Focus on this topic/instruction: {custom_topic}"
        print(f"üéØ Focus Topic: {custom_topic}")
    else:
        final_prompt = base_prompt

    for i, file_name in enumerate(file_names):
        print(f"Processing: {file_name}")
        
        if os.path.exists(file_name):
            try:
                img = None
                if enable_manual_crop:
          
                    img, is_cropped = manual_crop_image(file_name)
                    cv2.waitKey(1)
                else:
                    img = Image.open(file_name)

                # ‡πÅ‡∏™‡∏î‡∏á‡∏£‡∏π‡∏õ
                display_width = 600
                w_percent = (display_width / float(img.size[0]))
                h_size = int((float(img.size[1]) * float(w_percent)))
                display(img.resize((display_width, h_size)))
                
                print("Sending to Gemini...")
                response = model.generate_content([final_prompt, img])
                result_text = response.text
                
                print("\nResult:")
                display(Markdown(result_text))
                merged_text += f"\n=== File: {file_name} ===\nInstructions: {custom_topic}\n{result_text}\n"

            except Exception as e:
                print(f"Error: {e}")
                traceback.print_exc()
    
    if merged_text:
        with open(output_file, "w", encoding="utf-8") as f:
            f.write(merged_text)
        print(f"Saved to {output_file}")

#Cell 5: (UI Dashboard)

In [None]:
search_box = widgets.Text(
    value='', placeholder='‡∏Ñ‡πâ‡∏ô‡∏´‡∏≤‡πÑ‡∏ü‡∏•‡πå...', description='üîç Search:', layout=widgets.Layout(width='98%')
)

file_selector = widgets.SelectMultiple(
    options=all_files, value=[], rows=10, description='üìÇ Select:', layout=widgets.Layout(width='98%')
)

refresh_btn = widgets.Button(
    description='Refresh File List', button_style='warning', icon='refresh', layout=widgets.Layout(width='98%')
)


topic_input = widgets.Text(
    value='', 
    placeholder='(‡πÑ‡∏°‡πà‡∏ö‡∏±‡∏á‡∏Ñ‡∏±‡∏ö) ‡∏û‡∏¥‡∏°‡∏û‡πå‡∏™‡∏¥‡πà‡∏á‡∏ó‡∏µ‡πà‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£‡πÉ‡∏´‡πâ‡∏≠‡πà‡∏≤‡∏ô ‡πÄ‡∏ä‡πà‡∏ô "‡∏¢‡∏≠‡∏î‡∏£‡∏ß‡∏°", "‡∏ä‡∏∑‡πà‡∏≠‡∏ô‡∏≤‡∏°‡∏™‡∏Å‡∏∏‡∏•", "‡∏™‡∏£‡∏∏‡∏õ"', 
    description='üìù Topic:', 
    layout=widgets.Layout(width='98%')
)

manual_crop_checkbox = widgets.Checkbox(
    value=True, description='‚úÇÔ∏è Manual Crop (‡πÄ‡∏•‡∏∑‡∏≠‡∏Å‡∏û‡∏∑‡πâ‡∏ô‡∏ó‡∏µ‡πà‡πÄ‡∏≠‡∏á)', indent=False
)

run_btn = widgets.Button(
    description='‚ñ∂ Run OCR', button_style='success', icon='play', layout=widgets.Layout(width='98%')
)

out = widgets.Output()

def on_search_change(change):
    search_term = change['new'].lower()
    filtered_list = [f for f in all_files if search_term in f.lower()]
    file_selector.options = filtered_list

def on_refresh_click(b):
    global all_files
    # ‡πÄ‡∏£‡∏µ‡∏¢‡∏Å‡∏ü‡∏±‡∏á‡∏Å‡πå‡∏ä‡∏±‡∏ô‡∏à‡∏≤‡∏Å Cell 2 (‡∏ï‡πâ‡∏≠‡∏á‡∏£‡∏±‡∏ô Cell 2 ‡∏°‡∏≤‡∏Å‡πà‡∏≠‡∏ô)
    all_files = get_all_image_paths()
    file_selector.options = all_files

def on_run_click_gemini(b):
    with out:
        clear_output(wait=True)
        selected_files = file_selector.value
        if not selected_files:
            print("‚ùå ‡∏Å‡∏£‡∏∏‡∏ì‡∏≤‡πÄ‡∏•‡∏∑‡∏≠‡∏Å‡πÑ‡∏ü‡∏•‡πå‡∏Å‡πà‡∏≠‡∏ô‡∏Ñ‡∏£‡∏±‡∏ö")
            return
        files_str = ",".join(selected_files)
        
        # ‡∏™‡πà‡∏á‡∏Ñ‡πà‡∏≤ Topic ‡πÅ‡∏•‡∏∞ Crop ‡πÑ‡∏õ‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏•
        process_images_with_gemini(
            files_str, 
            enable_manual_crop=manual_crop_checkbox.value,
            custom_topic=topic_input.value
        )

search_box.observe(on_search_change, names='value')
refresh_btn.on_click(on_refresh_click)
run_btn.on_click(on_run_click_gemini)

ui_layout = widgets.VBox([
    widgets.HBox([refresh_btn]),
    search_box,
    file_selector,
    widgets.HTML("<hr>"),
    topic_input,         
    manual_crop_checkbox, 
    widgets.HTML("<br>"),
    run_btn,
    out
])

display(ui_layout)

