# Check Image Reading Workshop: Remote Deposit Capturing

#### Install and load packages
- `tesseract` uses optical character recognition (OCR) technology to get text from an image
- `cv2` has many image processing functions

In [8]:
import pytesseract
import shutil
import os
import random
import cv2
import numpy as np
try:
    from PIL import Image
except ImportError:
    import Image

#### Get raw image read:

In [9]:
extractedInformation = pytesseract.image_to_string(Image.open('check1.png'))
print(extractedInformation)

TesseractNotFoundError: tesseract is not installed or it's not in your PATH. See README file for more information.

#### Get image read with improvements to quality using GIMP software:
More information about GIMP [here](https://www.gimp.org/)

In [None]:
extractedInformationGIMP = pytesseract.image_to_string(Image.open('GIMPcheck.png'))
print(extractedInformationGIMP)

#### Get image read with improvements using `cv2` package:

In [None]:
#read image
img = cv2.imread('check1.png')
#resize image
img = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
#change to grayscale
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
kernel = np.ones((1,1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
#apply thresholding
img = cv2.threshold(cv2.medianBlur(img, 3), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
#convert image to text
txt = pytesseract.image_to_string(img ,lang = 'eng')
print(txt)

#### Adjust page segmentation mode (PSM):
- Run `tesseract --help-psm` in command line for more information

In [None]:
for psm in range(6,13+1):
    config = '--oem 3 --psm %d' % psm
    txt = pytesseract.image_to_string(img, config = config, lang='eng')
    print('psm ', psm, ':',txt)

### Try it yourself with a handwritten check!
- Write check
- Scan it or take picture
- Make sure it's saved as PNG
- Upload to google colab files
- Copy the code above and try adjusting the preprocessing options to get a better read


In [None]:
#using example handwritten check
extractedInformation = pytesseract.image_to_string(Image.open('check2.png'))
print(extractedInformation)