In [1]:
import os
import numpy as np
import pandas as pd
import cv2 as cv 
import matplotlib.pyplot as plt

import pytesseract
pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract'

## 01/04 Text Recognition
**Summary of Previous Work Done**     
    
Edits to `notebooks/2022-1221_NT_Image-Preprocessing.ipynb` were corrupted (edge detection and failed scalebar recognition). Tight to "mid" ranges of CANNY proved decent detection on defined images (Test 2).  
     
[**Watershed segmentation (ref)**](https://docs.opencv.org/4.x/d3/db4/tutorial_py_watershed.html)    
proved unuseful (as expected) the Distance Transform's separation between foreground and background ignores the heavy occlusion of particles.     

[**OCR Tesseract Text Extraction (ref)**](https://www.geeksforgeeks.org/text-detection-and-extraction-using-opencv-and-ocr/ )   
Worked well on left hand text, extracted lot number and magnification accurately. Could not identify the 50um scaling on the lower right hand side, despite cropping input image to highlight the region. Kernel sizes [(10,10), (18, 18), (20, 20)] performed similarly, with text extracts still saved in notebook folder.     
   
Bottom right cropping `image[750:, :750]`    

### Thoughts
1. Generalizable function to identify text and scalebar
2. CANNY Edge Detection > Circle Recognition for Demo
3. Found similar existing work (morphology-independent SEM segmentation) published as two students' theses.  
i.  Lab Notebook: Scalebar recognition for Histology [no github, post](https://h-ra.github.io/26-AreaScaleBar/)  
ii. Thermal spray (occluded circles) segmentation [paper](https://onlinelibrary.wiley.com/doi/pdf/10.1002/qre.2689)    
iii. Lost thesis...

### Image Loading

How to generalize root to be parent of git repo?

In [17]:
data_root = "/Users/nicolelrtin/vision-test/data/source"
internal = data_root + '/internal/'
# take 10 images
file_names = os.listdir(internal)
image = []
for name in file_names: image += [cv.imread(internal + name)]

### Helper Functions
Image Display/Saving

In [3]:
# preview image, missing save image functionality
def preview(img, savename = 'preview_image'):
    cv.imshow(savename, img)
    k = cv.waitKey(0)
    
    if k == ord('s'):
        cv.imwrite(savename + '.png', img)

## Text Recognition+Extraction

01/04: OCR recognizes left-side text on plain image. scale recognition requires image cropping to read/extract text.

In [26]:
# -- Finding Optimal Edge Detection
# wide = cv.Canny(image[1], 10, 200)
mid = cv.Canny(image[1], 30, 150)
# tight = cv.Canny(image[1], 50, 150)

def crop_to_text(img):
    return img[750:, :]

def dilate(img):
    kernel = np.ones((5,5), np.uint8)
    return cv.dilate(image, kernel, iterations= 1)

def canny(img, thresh_a = 30, thresh_b = 150):
    return cv.Canny(img, thresh_a, thresh_b)

def box_image_text(image, name = 'bounded_text'):
    img = image.copy()
    h, w, c = img.shape
    boxes = pytesseract.image_to_boxes(img) 
    for b in boxes.splitlines():
        b = b.split(' ')
        img = cv.rectangle(img, (int(b[1]), h - int(b[2])), 
              (int(b[3]), h - int(b[4])), (0, 255, 0), 2)

    cv.imshow(name, img)
    cv.waitKey(0)

In [None]:
base_image = image[1]
cropped_img = base_image[750:, :]

# miscellaneous preprocessing
blur_img = cv.GaussianBlur(cropped_img, (3,3), 0)
canny_img = canny(blur_img)
dilated_img = dilate(canny_img)

In [5]:
box_image_text(base_image, 'box_base_img')
pytesseract.image_to_string(base_image)

'GS-3972 Lot 10775-47\n\n'

In [30]:
cropped = base_image[900:, 950:]
box_image_text(cropped, 'cropped')
pytesseract.image_to_string(cropped)

'50.0um\n'