In [3]:
import cv2

image = cv2.imread('data/handwritten_sample1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 5)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,8)

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
dilate = cv2.dilate(thresh, kernel, iterations=6)
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)

for c in cnts:
    x,y,w,h = cv2.boundingRect(c)
    ROI = image[y:y+h, x:x+w]
    cv2.imwrite('temp/ROI.png', ROI)
    break

cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.imshow('ROI', ROI)
cv2.waitKey()

-1

Updated code


In [1]:
import cv2
import pytesseract

# Optional: Set tesseract path if not in PATH (for Windows users)
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Step 1: Load the image
image = cv2.imread('data/intro.jpg')
if image is None:
    raise FileNotFoundError("Image not found. Check the path.")

# Step 2: Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Step 3: Preprocessing
blur = cv2.medianBlur(gray, 5)
thresh = cv2.adaptiveThreshold(
    blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    cv2.THRESH_BINARY_INV, 11, 8
)

# Step 4: Find contours to isolate the handwritten region
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
dilate = cv2.dilate(thresh, kernel, iterations=6)

contours = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
contours = sorted(contours, key=cv2.contourArea, reverse=True)

# # Step 5: Crop the largest region of interest
for c in contours:
    x, y, w, h = cv2.boundingRect(c)
    ROI = image[y:y+h, x:x+w]
    break  # Only consider the largest contour

# Step 6: OCR on the extracted region
# Convert ROI to grayscale and threshold again for better OCR
roi_gray = cv2.cvtColor(ROI, cv2.COLOR_BGR2GRAY)
_, roi_thresh = cv2.threshold(roi_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# Run OCR
extracted_text = pytesseract.image_to_string(roi_thresh, lang='eng')


# Step 7: Show output
print("Extracted Text:\n", extracted_text)

# Optional: Visual debug
cv2.imshow('Thresholded ROI', roi_thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()


Extracted Text:
  

 

 

My Nayve ts Saakely Wags,

 

Th is an Open CV baton .



In [2]:
import cv2
import pytesseract
import numpy as np

# Optional: Set Tesseract path on Windows
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Load the image
image = cv2.imread('data/intro.jpg')
if image is None:
    raise FileNotFoundError("Image not found. Check the path.")

# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Preprocessing
blur = cv2.medianBlur(gray, 3)
thresh = cv2.adaptiveThreshold(
    blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    cv2.THRESH_BINARY_INV, 11, 6
)

# Dilation to connect components
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
dilate = cv2.dilate(thresh, kernel, iterations=2)

# Find contours
contours, _ = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Sort contours top-to-bottom, left-to-right
def get_contour_precedence(contour, cols):
    tolerance_factor = 10
    origin = cv2.boundingRect(contour)
    return ((origin[1] // tolerance_factor) * tolerance_factor) * cols + origin[0]

contours = sorted(contours, key=lambda ctr: get_contour_precedence(ctr, image.shape[1]))

# OCR config
custom_config = r'--oem 1 --psm 6'

# Loop through all contours
results = []
debug_image = image.copy()

for c in contours:
    x, y, w, h = cv2.boundingRect(c)
    if w < 25 or h < 25:
        continue  # Skip small noise

    roi = image[y:y+h, x:x+w]
    roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    _, roi_thresh = cv2.threshold(roi_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Resize for better OCR
    scale = 2
    roi_resized = cv2.resize(roi_thresh, (w * scale, h * scale), interpolation=cv2.INTER_LINEAR)

    # OCR
    text = pytesseract.image_to_string(roi_resized, lang='eng', config=custom_config).strip()
    if text:
        results.append(text)

        # Draw rectangle on debug image
        cv2.rectangle(debug_image, (x, y), (x + w, y + h), (0, 255, 0), 2)

# Print and save final extracted text
extracted_text = "\n".join(results)
print("Extracted Text:\n", extracted_text)

with open("output.txt", "w", encoding="utf-8") as f:
    f.write(extracted_text)

# Optional visual debug
cv2.imshow('Detected Regions', debug_image)
cv2.waitKey(0)
cv2.destroyAllWindows()


Extracted Text:
 —_— >
1 m

>

O

<= ,
©
ae
om
ee
ae
E65
=
44 er
OS
RET
Eee
. _
=
.
Ba
i |
es
a tee
jahea
