# Tesseract OCR for Non-English Languages

In [None]:
!sudo apt-get install tesseract-ocr
!pip install pytesseract
!pip install textblob

### Downloading and Adding Language Packs to Tesseract OCR

In [2]:
!git clone https://github.com/tesseract-ocr/tessdata

Cloning into 'tessdata'...
remote: Enumerating objects: 769, done.[K
remote: Counting objects: 100% (1/1), done.[K
remote: Total 769 (delta 0), reused 1 (delta 0), pack-reused 768[K
Receiving objects: 100% (769/769), 3.17 GiB | 17.69 MiB/s, done.
Resolving deltas: 100% (178/178), done.
Checking out files: 100% (172/172), done.


In [1]:
import os
os.environ["TESSDATA_PREFIX"] = "/content/tessdata"

In [2]:
%cd tesseract-non-english

/content/tesseract-non-english


### Import Packages

In [3]:
# import the necessary packages
from matplotlib import pyplot as plt
from textblob import TextBlob
import pytesseract
import argparse
import cv2

### Function to display images in Jupyter Notebooks and Google Colab

In [4]:
def plt_imshow(title, image):
	# convert the image frame BGR to RGB color space and display it
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	plt.imshow(image)
	plt.title(title)
	plt.grid(False)
	plt.show()

### Implementing Our Tesseract with Non-English Languages Script

In [5]:
# since we are using Jupyter Notebooks we can replace our argument
# parsing code with *hard coded* arguments and values
args = {
	"image": "images/german.png",
	"lang": "deu",
	"to": "en",
	"psm": 13
}

In [6]:
# load the input image and convert it from BGR to RGB channel
# ordering
image = cv2.imread(args["image"])
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# OCR the image, supplying the country code as the language parameter
options = "-l {} --psm {}".format(args["lang"], args["psm"])
text = pytesseract.image_to_string(rgb, config=options)

# show the original OCR'd text
print("ORIGINAL")
print("========")
print(text)
print("")

ORIGINAL
Ich brauche ein Bier!




In [7]:
# translate the text into a different language
tb = TextBlob(text)
translated = tb.translate(to=args["to"])

# show the translated text
print("TRANSLATED")
print("==========")
print(translated)

TRANSLATED
I need a beer!
