In [None]:
# ✅ No need to install these again if already installed

# 📂 Imports
import cv2
import numpy as np
import tensorflow as tf
from tensorflow import keras
from transformers import pipeline
from PIL import Image
import pdfkit
import tkinter as tk
from tkinter import filedialog

# 📁 Paths
model_path = r"C:\Users\sagni\Downloads\HandWriting to Digital Notes\ocr_model.h5"
pdf_output_path = r"C:\Users\sagni\Downloads\HandWriting to Digital Notes\digital_note.pdf"

# 🔤 Load OCR model
model = keras.models.load_model(model_path)
print("✅ OCR model loaded.")

# ✨ Character mapping (same as training)
all_chars = sorted(set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 .,-"))
char_to_num = {char: idx + 1 for idx, char in enumerate(all_chars)}
num_to_char = {idx + 1: char for idx, char in enumerate(all_chars)}

# 🔮 Decode CTC predictions
def decode_ctc(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    decoded, _ = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)
    decoded_text = []
    for seq in decoded[0]:
        text = "".join([num_to_char.get(int(char), "") for char in seq if int(char) != -1])
        decoded_text.append(text)
    return decoded_text

# 📥 Load and preprocess image
def preprocess_image(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise FileNotFoundError(f"❌ Image not found or cannot be read: {img_path}")
    img = cv2.resize(img, (128, 32))
    img = img / 255.0
    img = img.reshape(1, 32, 128, 1)
    return img

# 🖱️ File picker for user
def select_image():
    root = tk.Tk()
    root.withdraw()  # Hide the main window
    file_path = filedialog.askopenfilename(
        title="Select Handwritten Image",
        filetypes=[("Image Files", "*.jpg *.png *.jpeg *.bmp")]
    )
    if not file_path:
        raise ValueError("❌ No image selected.")
    print(f"📂 Selected Image: {file_path}")
    return file_path

# 🚀 Process selected image
try:
    image_path = select_image()
    img = preprocess_image(image_path)
    preds = model.predict(img)
    predicted_text = decode_ctc(preds)[0]
    print("📝 Raw OCR Output:", predicted_text)
except Exception as e:
    print(f"❌ Error: {e}")
    predicted_text = ""

# 🪄 Optional: Grammar Cleanup & Summarization
if predicted_text:
    try:
        summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
        cleaned_text = summarizer(predicted_text, max_length=512, min_length=50, do_sample=False)[0]['generated_text']
        print("✨ Cleaned Digital Note:", cleaned_text)
    except Exception as e:
        print(f"⚠️ Summarizer failed ({e}). Using raw OCR text.")
        cleaned_text = predicted_text

    # 📦 Save as PDF
    with open("temp_note.txt", "w", encoding="utf-8") as f:
        f.write(cleaned_text)

    pdfkit.from_file("temp_note.txt", pdf_output_path)
    print(f"📄 Digital Note saved to PDF at: {pdf_output_path}")
else:
    print("⚠️ No text detected. Skipping PDF save.")




✅ OCR model loaded.
