Skip to content

Commit

Permalink
make binary paths configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
ciur committed Aug 11, 2020
1 parent ac7f2ae commit 5edd196
Show file tree
Hide file tree
Showing 11 changed files with 168 additions and 42 deletions.
9 changes: 9 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@

# Changelog


## [1.2.6] - 11 August 2020

### Added

- mglib.conf.settings module. The points it to get rid of hardcoded binary paths. Binary paths are now provided as configurations.


## [1.2.3] - 25 July 2020

### Changed
Expand Down
8 changes: 8 additions & 0 deletions mglib/conf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from .settings import (
DefaultSettings,
MgLibSettings
)

settings = MgLibSettings(
DefaultSettings()
)
28 changes: 28 additions & 0 deletions mglib/conf/default_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@


# file utility used to find out mime type of a file
BINARY_FILE = "/usr/bin/file"

# Provided by ImageMagick package.
# Used for resizing images.
BINARY_CONVERT = "/usr/bin/convert"

# Provided by Poppler Utils.
# Used to extract images from PDF file.
BINARY_PDFTOPPM = "/usr/bin/pdftoppm"

# Provided by Poppler Utils.
# used to get page count in PDF file
BINARY_PDFINFO = "/usr/bin/pdfinfo"

# Provided by ImageMagick package.
# Used to get number of pages in TIFF file.
BINARY_IDENTIFY = "/usr/bin/identify"

# Provided by tesseract package.
# Used to extract text from images/PDF files.
BINARY_OCR = "/usr/bin/tesseract"

# Provided by pdftk package
# Used to reorder, cut/paste, delete pages withing PDF document
BINARY_PDFTK = "/usr/bin/pdftk"
55 changes: 55 additions & 0 deletions mglib/conf/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import importlib

try:
from django.conf import settings as django_settings
except ImportError:
# Operating outside django, use own settings module
django_settings = None


class DefaultSettings:

def __init__(
self,
settings_module="mglib.conf.default_settings"
):
self.SETTINGS_MODULE = settings_module

mod = importlib.import_module(
self.SETTINGS_MODULE
)

for setting in dir(mod):
if setting.isupper():
setting_value = getattr(mod, setting)
setattr(self, setting, setting_value)

def configure(self, **options):
for name, value in options.items():
setattr(self, name, value)


class MgLibSettings:

def __init__(
self, default_settings
):
self.default_settings = default_settings

def __getattr__(self, name):
# When operating withing django,
# get configuration from django settings
if not name.isupper():
raise AttributeError

if django_settings:
val = getattr(django_settings, name)
return val

val = getattr(self.default_settings, name)
return val

def configure(self, **options):
self.default_settings.configure(
**options
)
3 changes: 2 additions & 1 deletion mglib/mime.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import logging
from . import wrapper

from .conf import settings

logger = logging.getLogger(__name__)


class Mime(wrapper.Wrapper):
def __init__(self, filepath):
super().__init__(exec_name="file")
super().__init__(exec_name=settings.BINARY_FILE)
self.filepath = filepath

def get_cmd(self):
Expand Down
11 changes: 7 additions & 4 deletions mglib/pdfinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import subprocess
import logging

from .conf import settings

"""
Uses command line pdfinfo utility (from poppler pakage) for various
small operations (e.g. get pdf page count).
Expand All @@ -13,7 +15,7 @@

def get_tiff_pagecount(filepath):
cmd = [
"/usr/bin/identify",
settings.BINARY_IDENTIFY,
"-format",
"%n\n",
filepath
Expand Down Expand Up @@ -76,10 +78,11 @@ def get_pagecount(filepath):
"Only jpeg, png, pdf and tiff are handlerd by this"
" method"
)

# pdfinfo "${PDFFILE}" | grep Pages

cmd = ["/usr/bin/pdfinfo", filepath]
cmd = [
settings.BINARY_PDFINFO,
filepath
]
compl = subprocess.run(
cmd,
stdout=subprocess.PIPE,
Expand Down
10 changes: 6 additions & 4 deletions mglib/pdftk.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from mglib.runcmd import run
from mglib.pdfinfo import get_pagecount

from .conf import settings

logger = logging.getLogger(__name__)

#
Expand Down Expand Up @@ -183,7 +185,7 @@ def paste_pages_into_existing_doc(
)

cmd = [
"pdftk",
settings.BINARY_PDFTK,
]
# add A=doc1_path, B=doc2_path
cmd.extend(letters_2_doc_map)
Expand Down Expand Up @@ -272,7 +274,7 @@ def paste_pages(
)

cmd = [
"pdftk",
settings.BINARY_PDFTK,
]
# add A=doc1_path, B=doc2_path
cmd.extend(letters_2_doc_map)
Expand Down Expand Up @@ -315,7 +317,7 @@ def reorder_pages(
)

cmd = [
"pdftk",
settings.BINARY_PDFTK,
src,
"cat"
]
Expand All @@ -338,7 +340,7 @@ def delete_pages(src, dst, page_numbers):
)

cmd = [
"pdftk",
settings.BINARY_PDFTK,
src,
"cat"
]
Expand Down
36 changes: 5 additions & 31 deletions mglib/shortcuts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging

from mglib.runcmd import run
from .conf import settings

logger = logging.getLogger(__name__)

Expand All @@ -28,7 +29,7 @@ def resize_img(page_path, media_root):
logger.debug(f"PPMROOT {ppmroot_dirname} already exists.")

cmd = (
"convert",
settings.BINARY_CONVERT,
"-resize",
f"{width}x",
local_abspath,
Expand Down Expand Up @@ -61,7 +62,7 @@ def extract_img(page_path, media_root):
else:
logger.debug(f"PPMROOT {ppmroot_dirname} already exists.")
cmd = (
"pdftoppm",
settings.BINARY_PDFTOPPM,
"-jpeg",
"-f",
str(page_num),
Expand Down Expand Up @@ -89,7 +90,7 @@ def extract_hocr(page_url, lang, media_root):
os.path.join(media_root, page_url.hocr_url())
)
cmd = (
"tesseract",
settings.BINARY_OCR,
"-l",
lang,
page_abspath,
Expand All @@ -112,37 +113,10 @@ def extract_txt(page_url, lang, media_root):
)
)
cmd = (
"tesseract",
settings.BINARY_OCR,
"-l",
lang,
page_abspath,
txt_root
)
run(cmd)


#def text_from_pdf(filepath, lang, dry_run=False):
#
# # suffix .tiff in file name is required by conver utility, otherwise
# # it won't convert to tiff format!
# tiff = tempfile.NamedTemporaryFile(suffix=".tiff")
# conv = convert.Convert(dry_run=dry_run)
# conv(filepath=filepath, fout=tiff)
# try:
# tsact = tesseract.Tesseract()
# text = tsact(filepath=tiff.name, lang=lang)
# except subprocess.CalledProcessError as e:
# print(e)
# print(e.stderr)
# return
#
# return text
#
#
#def text_from_image(filepath, lang, dry_run=False):
#
# tsact = tesseract.Tesseract(dry_run=dry_run)
# text = tsact(filepath=filepath, lang=lang)
#
# return text
#
3 changes: 2 additions & 1 deletion mglib/tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging

from mglib.runcmd import run
from .conf import settings

logger = logging.getLogger(__name__)

Expand All @@ -21,7 +22,7 @@ def convert_tiff2pdf(doc_url):
)

cmd = (
"convert",
settings.BINARY_CONVERT,
doc_url,
new_doc_url,
)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name="mglib",
version="1.2.5",
version="1.2.6",
author="Eugen Ciur",
author_email="eugen@papermerge.com",
url="https://github.com/papermerge/mglib",
Expand Down
45 changes: 45 additions & 0 deletions test/test_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import os
from pathlib import Path
import unittest

from mglib.conf.settings import (
MgLibSettings,
DefaultSettings
)

DATA_DIR = os.path.join(
Path(__file__).parent,
'data'
)


class TestMgLibSettings(unittest.TestCase):

def setUp(self):
self.settings = MgLibSettings(DefaultSettings())

def test_settings_outside_django_should_work(self):
"""
Without django there should be default values
for settings
"""
# check default value for pdfinfo
self.assertEqual(
"/usr/bin/pdfinfo",
self.settings.BINARY_PDFINFO
)

def test_settings_are_configurable(self):
"""
User should be able to reconfigure mglibsettings
on the go (i.e. change default values).
"""
# check default value for pdfinfo
self.settings.configure(
BINARY_PDFINFO="/usr/bin/xyz"
)
self.assertEqual(
"/usr/bin/xyz",
self.settings.BINARY_PDFINFO
)

0 comments on commit 5edd196

Please sign in to comment.