Skip to content

Commit

Permalink
Added function to check pre-requisites and checks for Windows
Browse files Browse the repository at this point in the history
  • Loading branch information
avsaditya authored and prabhakar267 committed Oct 20, 2018
1 parent b7afef9 commit e4d764f
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 4 deletions.
2 changes: 2 additions & 0 deletions constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
DEFAULT_OUTPUT_DIRECTORY_NAME = "converted-text"

DEFAULT_CHECK_COMMAND = "which"
WINDOWS_CHECK_COMMAND = "where"
TESSERACT_DATA_PATH_VAR = 'TESSDATA_PREFIX'

VALID_IMAGE_EXTENSIONS = [".jpg", ".jpeg", ".gif", ".png", ".tga", ".tif", ".bmp"]
43 changes: 39 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sys

from constants import DEFAULT_OUTPUT_DIRECTORY_NAME, VALID_IMAGE_EXTENSIONS, WINDOWS_CHECK_COMMAND, \
DEFAULT_CHECK_COMMAND
DEFAULT_CHECK_COMMAND, TESSERACT_DATA_PATH_VAR


def create_directory(path):
Expand Down Expand Up @@ -37,16 +37,51 @@ def get_command():
return DEFAULT_CHECK_COMMAND


def main(input_path, output_path):
# Check if tesseract is installed or not
def check_pre_requisites_tesseract():
"""
Check if the pre-requisites required for running the tesseract application are satisfied or not
:param : NA
:return: boolean
"""
check_command = get_command()
logging.debug("Running `{}` to check if tesseract is installed or not.".format(check_command))
result = subprocess.run([check_command, 'tesseract'], stdout=subprocess.PIPE)
if not result.stdout:
logging.error("tesseract-ocr missing, use install `tesseract` to resolve.")
return
return False
logging.debug("Tesseract correctly installed!\n")

if sys.platform.startswith('win'):
environment_variables = os.environ
logging.debug("Checking if the Tesseract Data path is set correctly or not.\n")
if TESSERACT_DATA_PATH_VAR in environment_variables:
if environment_variables[TESSERACT_DATA_PATH_VAR]:
path = environment_variables[TESSERACT_DATA_PATH_VAR]
logging.debug("Checking if the path configured for Tesseract Data Environment variable `{}` \
as `{}` is valid or not.".format(TESSERACT_DATA_PATH_VAR, path))
if os.path.isdir(path) and os.access(path, os.R_OK):
logging.debug("All set to go!")
return True
else:
logging.error("Configured path for Tesseract data is not accessible!")
return False
else:
logging.error("Tesseract Data path Environment variable '{}' configured to an empty string!\
".format(TESSERACT_DATA_PATH_VAR))
return False
else:
logging.error("Tesseract Data path Environment variable '{}' needs to be configured to point to\
the tessdata!".format(TESSERACT_DATA_PATH_VAR))
return False
else:
return True


def main(input_path, output_path):
# Check if tesseract is installed or not
if not check_pre_requisites_tesseract():
return

# Check if a valid input directory is given or not
if not check_path(input_path):
logging.error("No directory found at `{}`".format(input_path))
Expand Down

0 comments on commit e4d764f

Please sign in to comment.