# 1.  Import Dependencies

In [None]:
!pip install opencv-python

In [15]:
# Import opencv, os and time
import cv2
import os
import time

# 2. Define Document Properties

In [16]:
# Give info to start collecting images from document
doc_name = input("Document name: ")
start_page = input("Starting page: ")

Document name: PoemadeChile
Starting page: 122


# 3. Setup Folders

In [17]:
# Set path to save images from document
document_path = os.path.join("workspace", "documents_to_translate")

In [18]:
# Setup folder structure for chosen document
if not os.path.exists(document_path):
    !mkdir {document_path}
path = os.path.join(document_path, doc_name)
path_collection = os.path.join(path, "collection")
path_translation = os.path.join(path, "translation")
if not os.path.exists(path):
    !mkdir {path}
if not os.path.exists(path_collection):
    !mkdir {path_collection}
if not os.path.exists(path_translation):
    !mkdir {path_translation}

# 4. Collect Images from Document

In [19]:
# Address for IP Webcam, in order to use phone as webcam for higher resolution images
address = "http://192.168.178.32:8080/video"

# Set start page for correct page numbering
current_page = int(start_page)

print("Press \"p\" to collect page, press \"q\" when ready...")
print(f"Collecting page {current_page}...")

# While loop for webcam streaming and image collection
while True:
    cap = cv2.VideoCapture(0)
    cap.open(address)
    ret, frame = cap.read()
    # Resize frame in order to fit on laptop screen, can be altered for higher quality image collection
    frame = cv2.resize(frame, (540, 960))
    cv2.imshow('Webcam feed', frame)
    # Set pagenum in order to save with correct numbering
    pagenum = str(current_page).zfill(5)
    # Set imgname for current page
    imgname = os.path.join(path_collection, f"{doc_name}_{pagenum}.jpg")
    # Give options for keystrokes - "p" for printing/collecting current frame, "q" to quit and break off loop
    k = cv2.waitKey(1) & 0xFF
    if k == ord("p"):
        cv2.imwrite(imgname, frame)
        print(f"Page {current_page} saved!")
        current_page += 1
        # Print which page to collect next
        print(f"Collecting page {current_page}...")
        
    elif k == ord("q"):
        break
        
cap.release()
cv2.destroyAllWindows()
print("Finished collecting images.")

Press "p" to collect page, press "q" when ready...
Collecting page 122...
Page 122 saved!
Collecting page 123...
Page 123 saved!
Collecting page 124...
Finished collecting images.


# 5. Use Tesseract for OCR

Note: Tesseract should be installed for this to work. For the Windows (which is what I use) installer, click the following link: https://github.com/UB-Mannheim/tesseract/wiki

In [20]:
!pip install pytesseract



In [21]:
# Import pytesseract
import pytesseract

In [22]:
tesseract_path = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
pytesseract.pytesseract.tesseract_cmd = tesseract_path

In [26]:
for pg in os.listdir(path_collection):
    pagepath = os.path.join(path_collection, pg)
    page = cv2.imread(pagepath)
    cv2.imshow(pg, page)
    cv2.waitKey(0)

In [27]:
text = pytesseract.image_to_string(page)

In [28]:
text

'M\nONTE Aconcagua\n\nYo he visto, yo he visto\nmi monte Aconcagua.\nMe dura en los ojos\n\nla blanca Ilamarada\n\ny como ya lo vi\n\nla muerte no me mata.\n\nLe crecen los espinos\n\nen la primer jornada,\n\nlo abrazan bien los boldos\nde reveses de plata,\n\na mas y mas que sube\n\nel pecho se le aclara\narrebatado Elias,\n\njElohim Aconcagua}\n\nManda la noche grande,\nsuelta las mafianas,\n\nse esconde en nubes,\n\nse borra, se acaba\n\ny sigue pastoreando\n\ndetras de la nubada\n\namor dulce y tremen\nMonte Aconcagua:\n\ndo,\n\n122\n\x0c'