# 1.  Import Dependencies

In [None]:
!pip install opencv-python

In [1]:
# Import opencv, os and time
import cv2
import os
import time

# 2. Define Document Properties

In [2]:
# Give info to start collecting images from document
doc_name = input("Document name: ")
start_page = input("Starting page: ")

Document name: PoemadeChile
Starting page: 9


# 3. Setup Folders

In [3]:
# Set path to save images from document
document_path = os.path.join("workspace", "documents_to_translate")

In [4]:
# Setup folder structure for chosen document
if not os.path.exists(document_path):
    !mkdir {document_path}
path = os.path.join(document_path, doc_name)
path_collection = os.path.join(path, "collection")
path_translation = os.path.join(path, "translation")
if not os.path.exists(path):
    !mkdir {path}
if not os.path.exists(path_collection):
    !mkdir {path_collection}
if not os.path.exists(path_translation):
    !mkdir {path_translation}

# 4. Collect Images from Document

In [19]:
# Address for IP Webcam, in order to use phone as webcam for higher resolution images
address = "http://192.168.178.32:8080/video"

# Set start page for correct page numbering
current_page = int(start_page)

print("Press \"p\" to collect page, press \"q\" when ready...")
print(f"Collecting page {current_page}...")

# While loop for webcam streaming and image collection
while True:
    cap = cv2.VideoCapture(0)
    cap.open(address)
    ret, frame = cap.read()
    # Resize frame in order to fit on laptop screen, can be altered for higher quality image collection
    frame = cv2.resize(frame, (540, 960))
    cv2.imshow('Webcam feed', frame)
    # Set pagenum in order to save with correct numbering
    pagenum = str(current_page).zfill(5)
    # Set imgname for current page
    imgname = os.path.join(path_collection, f"{doc_name}_{pagenum}.jpg")
    # Give options for keystrokes - "p" for printing/collecting current frame, "q" to quit and break off loop
    k = cv2.waitKey(1) & 0xFF
    if k == ord("p"):
        cv2.imwrite(imgname, frame)
        print(f"Page {current_page} saved!")
        current_page += 1
        # Print which page to collect next
        print(f"Collecting page {current_page}...")
        
    elif k == ord("q"):
        break
        
cap.release()
cv2.destroyAllWindows()
print("Finished collecting images.")

Press "p" to collect page, press "q" when ready...
Collecting page 122...
Page 122 saved!
Collecting page 123...
Page 123 saved!
Collecting page 124...
Finished collecting images.


# 5. Use Tesseract for OCR

Note: Tesseract should be installed for this to work. For the Windows (which is what I use) installer, click the following link: https://github.com/UB-Mannheim/tesseract/wiki

In [20]:
!pip install pytesseract



In [11]:
# Import pytesseract
import pytesseract

In [12]:
tesseract_path = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
pytesseract.pytesseract.tesseract_cmd = tesseract_path

In [13]:
for pg in os.listdir(path_collection):
    pagepath = os.path.join(path_collection, pg)
    page = cv2.imread(pagepath)
    cv2.imshow(pg, page)
    gray = cv2.cvtColor(page, cv2.COLOR_BGR2GRAY)
    adaptive_threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 85, 11)
    cv2.waitKey(0)

In [14]:
text = pytesseract.image_to_string(adaptive_threshold)

In [15]:
print(text)

>

Monte Aconcagua

Yo he visto, yo he visto
mi monte Aconcagua.
Me dura en los ojos
la blanca llamarada

y como ya lo vi

la muerte no me mata.

Le crecen los espinos

en la primer jornada,

lo abrazan bien los boldos
de reveses de plata,

a mas y mas que sube

el pecho se le aclara
arrebatado Elias,

Elohim Aconcagua!

Manda la noche grande,
suelta las mafianas,

se esconde en nubes,

se borra, se acaba

y sigue pastoreando
detrds de la nubada
amor dulce ¥ tremendo,
Monte Aconcagua

 

aa



# 6. Translate

In [64]:
!pip install google_trans_new

Collecting google_trans_new
  Downloading google_trans_new-1.1.9-py3-none-any.whl (9.2 kB)
Installing collected packages: google-trans-new
Successfully installed google-trans-new-1.1.9


In [16]:
from google_trans_new import google_translator

In [19]:
cv2.imshow(pg, page)
cv2.waitKey(0)

-1

In [18]:
translator = google_translator()
print(translator.translate(text))

>  Mount Aconcagua.  I've seen, I've seen  My Mount Aconcagua.  It lasts me in the eyes  The white flare  and as I already saw it  Death does not kill me.  The thorns grow  In the first day,  They hug the Bolds well  of silver setbacks,  to more and more that goes up  the chest is clarified  ELIAS,  Elohim Aconcagua!  Send the big night,  release the mafiians,  It is hidden in clouds,  It is erased, it's over  And keep grazing  detachment  love sweet ¥ tremendous,  Mount Aconcagua.  AA. 
