### Reading Children's Handwriting

This Jupyter notebook explores methods for accurately reading children's handwriting while preserving all errors without any autocorrection. The goal is to identify and analyze mistakes in handwriting to support learning and improvement.

In [None]:
!pip install -r requirements.txt

In [None]:
import cv2
import numpy as np
from PIL import Image#
import base64

The preprocess_image() function converts the source image to a grayscale format and saves it as preprocessed_image.png. This step prepares the image for further analysis by enhancing contrast and reducing noise.

In [None]:
def preprocess_image(image_path):
    """
    Enhanced preprocessing with better noise reduction
    """
    # Read image using opencv
    img = cv2.imread(image_path)
    
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Apply bilateral filter to reduce noise while preserving edges
    denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
    
    # Enhance contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(denoised)
    
    # Apply adaptive thresholding
    binary = cv2.adaptiveThreshold(
        enhanced,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV,
        23,  # block size
        8   # C constant subtracted from the mean or weighted sum
    )
    
    # Remove small noise using morphological operations
    kernel = np.ones((3,3), np.uint8)
    denoised = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
    
    # Clean up isolated pixels
    kernel_clean = np.ones((2,2), np.uint8)
    cleaned = cv2.morphologyEx(denoised, cv2.MORPH_CLOSE, kernel_clean)
    
    # Invert back to black text on white background
    final = cv2.bitwise_not(cleaned)
    
    # One final pass of median blur to clean up any remaining specks
    final = cv2.medianBlur(final, 3)
    
    # Write the processed image to disk
    cv2.imwrite("files/preprocessed/preprocessed_image.png", final)
    
    # Save debug image
    debug_images = np.hstack([gray, enhanced, final])
    cv2.imwrite("files/preprocessed/debug_preprocessing.png", debug_images)
    
    return "files/preprocessed/preprocessed_image.png"

In [None]:
def encode_image_to_base64(image_path):
    """
    Convert image to base64 string
    """
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

## Optical Character Recognition - OCR

In [None]:
# Test image 
image_path = "files/source/text.jpg"
# image_path = "files/source/text2.jpg"
# image_path = "files/source/text3.jpg"

Running `preprocess_image` will create a greyscale image and store it in `/files/preprocessed`.

In [None]:
preprocess_image(image_path)

### OCD using OpenAI

https://platform.openai.com/docs/guides/vision

In [None]:
!pip install openai

In [None]:
from openai import OpenAI

def perform_ocr_openai(image_path):
    """
    Perform OCR using GPT-4 Vision
    """
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image_path)
        
        # Encode the preprocessed image
        base64_image = encode_image_to_base64(preprocessed_image)
        
        # Initialize OpenAI client
        client = OpenAI()  # Make sure OPENAI_API_KEY is set in your environment
        
        # Create the API request
        response = client.chat.completions.create(
            model="gpt-4o",  # Updated model name
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "Transcribe the text from this image exactly as it appears, preserving all spelling mistakes. Return only the text without modifications, explanations, or formatting. If any letters are unclear, replace them with *."
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/png;base64,{base64_image}"
                            }
                        }
                    ]
                }
            ],
            max_tokens=1000,
            temperature=0
        )
        
        # Extract the text from the response
        text = response.choices[0].message.content
        
        return text.strip()
    
    except Exception as e:
        return f"An error occurred: {str(e)}"


In [None]:
# Perform OCR using OpenAI 
extracted_text = perform_ocr_openai(image_path)
    
print("Extracted Text:")
print("--------------")
print(extracted_text)

### OCD using pytesseract

https://pypi.org/project/pytesseract/

In [None]:
!pip install pytesseract

In [None]:
import pytesseract

def perform_ocr_pytesseract(image_path):
    """
    Perform OCR on the preprocessed image with custom configuration
    """
    # Preprocess the image
    preprocessed_image = preprocess_image(image_path)
    
    try:
        # Load the preprocessed image
        img = Image.open(preprocessed_image)
        
        # Configure tesseract parameters
        custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,!? "'
        
        # Extract text from image
        text = pytesseract.image_to_string(
            img,
            lang='ces',
            config=custom_config
        )
        
        return text.strip()
    
    except Exception as e:
        return f"An error occurred: {str(e)}"

In [None]:
# Perform OCR using pytesseract
extracted_text = perform_ocr_pytesseract(image_path)
    
print("Extracted Text:")
print("--------------")
print(extracted_text)

### OCD using EasyOCR

https://github.com/JaidedAI/EasyOCR

In [None]:
!pip install easyocr

In [None]:
import easyocr
def perform_ocr_easyocr(image_path):
    """Perform OCR using EasyOCR."""
    reader = easyocr.Reader(['en', 'cs'], gpu=True)  # Supports English and Czech, use GPU
    result = reader.readtext(image_path, detail=0)
    return " ".join(result)


In [None]:
# Perform OCR using EasyOCR
extracted_text = perform_ocr_easyocr(image_path)
    
print("Extracted Text:")
print("--------------")
print(extracted_text)

### OCD using PaddleOCR

https://paddlepaddle.github.io/PaddleOCR/main/en/index.html

In [None]:
!pip install paddleocr
!pip install paddlepaddle-gpu

# No GPU
# !pip install paddlepaddle

In [None]:
from paddleocr import PaddleOCR

def perform_ocr_paddleocr(image_path):
    """Perform OCR using PaddleOCR."""
    ocr = PaddleOCR(lang='cs')  # Use 'cs' for Czech
    result = ocr.ocr(image_path, cls=True)
    extracted_text = " ".join([word_info[1][0] for line in result for word_info in line])
    return extracted_text

In [None]:
# Perform OCR using PaddleOCR
extracted_text = perform_ocr_paddleocr(image_path)
    
print("Extracted Text:")
print("--------------")
print(extracted_text)