Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up imports #8

Merged
merged 1 commit into from Feb 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
46 changes: 21 additions & 25 deletions opentaxforms/Form.py
@@ -1,15 +1,23 @@
from __future__ import absolute_import
import re
try:
from urllib2 import urlopen, URLError, HTTPError
except ImportError:
from urllib.request import urlopen
from urllib.error import URLError, HTTPError
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import (
LTTextBox, LTTextLine, LTTextBoxHorizontal, LAParams, LTChar,
LTTextLineHorizontal
)
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfpage import PDFPage
from pdfminer.pdftypes import resolve1
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from six.moves.urllib.request import urlopen
from six.moves.urllib.error import URLError, HTTPError

import opentaxforms.ut as ut
from opentaxforms.ut import (log, ntuple, logg, stdout, Qnty, NL,
pathjoin)
import opentaxforms.irs as irs
from opentaxforms.config import cfg
from . import ut, irs
from .ut import log, ntuple, logg, stdout, Qnty, NL, pathjoin
from .config import cfg
from .xmp import xmp_to_dict
from .cmds import CommandParser, normalize, adjustNegativeField, CannotParse

# global so that theyre pickle-able
PageInfo = ntuple('PageInfo', 'pagenum pagewidth pageheight textpoz')
Expand Down Expand Up @@ -133,16 +141,11 @@ def download(self, year, failurls, dirName='forms'):

def pdfInfo(self):
# collect metadata from pdf file at document and page levels
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfpage import PDFPage
with open(self.fpath, 'rb') as fp:
parser = PDFParser(fp)
doc = PDFDocument(parser)
docinfo = {}
if 'Metadata' in doc.catalog:
from pdfminer.pdftypes import resolve1
from opentaxforms.xmp import xmp_to_dict
metadata = resolve1(doc.catalog['Metadata']).get_data()
xmpdict = xmp_to_dict(metadata)
docinfo['titl'] = xmpdict['dc']['title']['x-default']
Expand Down Expand Up @@ -233,8 +236,6 @@ def orderDependencies(self):
def computeMath(self):
# determines which fields are computed from others
# 'dep' means dependency
from opentaxforms.cmds import (
CommandParser, normalize, adjustNegativeField, CannotParse)
fields = self.fields if 'm' in cfg.steps else []
for field in fields:
math = CommandParser(field, self)
Expand All @@ -258,9 +259,6 @@ def computeMath(self):

class Renderer(object):
def __init__(self):
from pdfminer.layout import LAParams
from pdfminer.converter import PDFPageAggregator
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
# Create a PDF resource manager object that stores shared resources.
rsrcmgr = PDFResourceManager()
# la=layout analysis
Expand All @@ -270,21 +268,19 @@ def __init__(self):
self.textPoz = None

def renderPage(self, page):
from pdfminer.layout import LTTextBox, LTTextLine, LTTextBoxHorizontal
self.interpreter.process_page(page)
layout = self.device.get_result()
# http://denis.papathanasiou.org/2010/08/04/extracting-text-images-
# from-pdf-files/
textPoz = TextPoz()
for lt in layout:
if lt.__class__ in (LTTextBoxHorizontal, LTTextBox, LTTextLine):
if isinstance(lt, (LTTextBoxHorizontal, LTTextBox, LTTextLine)):
textPoz.add(lt)
return textPoz


class TextPoz(object):
# text positions
from pdfminer.layout import LTChar, LTTextLineHorizontal
FormPos = ntuple('FormPos', 'itxt ichar chrz bbox')

def __init__(self):
Expand All @@ -296,13 +292,13 @@ def quantify(tupl, unit):

def accum(ltobj, ltchars, chars):
for lto in ltobj:
if isinstance(lto, self.LTChar):
if isinstance(lto, LTChar):
ltchartext = lto.get_text()
ltchars.append(
(ltchartext,
ut.Bbox(*quantify(lto.bbox, 'printers_point'))))
chars.append(ltchartext)
elif isinstance(lto, self.LTTextLineHorizontal):
elif isinstance(lto, LTTextLineHorizontal):
accum(lto, ltchars, chars)
ltchars = []
chars = []
Expand Down
7 changes: 4 additions & 3 deletions opentaxforms/cmds.py
@@ -1,8 +1,9 @@
from __future__ import absolute_import
import re
import six
import opentaxforms.ut as ut
from opentaxforms.ut import log, jj, numerify
import opentaxforms.irs as irs

from . import ut, irs
from .ut import log, jj, numerify


def normalize(s):
Expand Down
25 changes: 13 additions & 12 deletions opentaxforms/config.py
@@ -1,11 +1,17 @@
from __future__ import print_function
import sys
from __future__ import print_function, absolute_import
import os
import os.path
import re
import shutil
import sys
from argparse import ArgumentParser
import opentaxforms.ut as ut
from opentaxforms.ut import log, Bag, setupLogging, logg, NL, pathjoin
from opentaxforms.version import appname, appversion
from os.path import isfile, join as joinpath
from six.moves.urllib.request import urlopen
from six.moves.urllib.error import URLError

from . import ut
from .ut import log, Bag, setupLogging, logg, NL, pathjoin
from .version import appname, appversion

RecurseInfinitely = -1
RecursionRootLevel = 0
Expand Down Expand Up @@ -123,15 +129,13 @@ def getFileList(dirName):
except Exception as e:
log.warn('cannot symlink %s to %s because %s, copying instead'%(
allpdfpath, allpdfLink, e, ))
import shutil
shutil.copy(allpdfpath,allpdfLink)
elif not cfg.okToDownload:
msg = 'allPdfNames file [%s] not found but dontDownload' % (
allpdfpath)
raise Exception(msg)
else:
# todo why did this stop working? my own env?
from urllib2 import urlopen, URLError
try:
# could use https://www.irs.gov/pub/irs-pdf/pdfnames.txt but
# this way we avoid errors in that file
Expand All @@ -141,7 +145,6 @@ def getFileList(dirName):
fin.getcode(), ))
allpdffiles_html = fin.read()
fin.close()
import re
allpdfnames = re.findall(r'f[\w\d-]+\.pdf', allpdffiles_html)
allpdfnames = ut.uniqify(sorted(allpdfnames))
with open(allpdfpath, 'w') as f:
Expand Down Expand Up @@ -212,12 +215,12 @@ def setup(**overrideArgs):
log.warn('commandline: %s at %s', ' '.join(sys.argv), ut.now())

if dirName is not None:
from opentaxforms.Form import Form
# deferred import to avoid circular reference
from .Form import Form
if rootForms:
cfg.formsRequested = [
Form(rootForm, RecursionRootLevel) for rootForm in rootForms]
else:
from os.path import isfile, join as joinpath
cfg.formsRequested = [
Form(f, RecursionRootLevel)
for f in os.listdir(dirName)
Expand All @@ -235,15 +238,13 @@ def setup(**overrideArgs):
ut.ensure_dir(dirName)
staticDir = ut.Resource(appname, 'static').path()
staticLink = pathjoin(dirName, 'static')
import os.path
try:
if not os.path.lexists(staticLink):
os.symlink(staticDir, staticLink)
except Exception as e:
log.warn('cannot symlink %s to %s because %s, copying instead'%(
staticDir, staticLink, e))
try:
import shutil
shutil.copytree(staticDir, staticLink)
except Exception as e:
log.warn('cannot copy %s to %s because %s,'
Expand Down
17 changes: 9 additions & 8 deletions opentaxforms/db.py
@@ -1,13 +1,16 @@
from __future__ import absolute_import
import os
import six
import sys
from itertools import chain
import opentaxforms.ut as ut
import opentaxforms.config as config
from opentaxforms.ut import log
from opentaxforms.config import cfg
from sqlalchemy import MetaData, create_engine, select
from sqlalchemy import UniqueConstraint
from sqlalchemy import (
MetaData, create_engine, select, UniqueConstraint)
# from sqlalchemy.exc import ProgrammingError

from . import ut, config
from .ut import log
from .config import cfg

engine, metadata, conn = None, None, None


Expand All @@ -23,7 +26,6 @@ def connect(appname, **kw):
user = pw = 'user'
dbname = appname.lower()
# optionally override defaults
import os
user = os.environ.get(appname.upper() + '_DBUSER', user)
pw = os.environ.get(appname.upper() + '_DBPASS', pw)
dbname = os.environ.get(appname.upper() + '_DBNAME', dbname)
Expand Down Expand Up @@ -240,7 +242,6 @@ def stripifstring(s):


if __name__ == "__main__":
import sys
args = sys.argv
if any([arg in args for arg in '-t --testing'.split()]):
import doctest
Expand Down
28 changes: 19 additions & 9 deletions opentaxforms/html.py
@@ -1,11 +1,14 @@
from __future__ import print_function
from os import remove as removeFile
from __future__ import print_function, absolute_import
import os.path
import re
import traceback
from os import remove as removeFile
from itertools import chain
from opentaxforms.config import cfg,setup
from opentaxforms.irs import computeTitle, computeFormId, sortableFieldname
import opentaxforms.ut as ut
from opentaxforms.ut import log, jdb, Qnty, NL, pathjoin

from . import ut
from .config import cfg, setup
from .irs import computeTitle, computeFormId, sortableFieldname
from .ut import log, jdb, Qnty, NL, pathjoin


def computeFormFilename(form):
Expand Down Expand Up @@ -44,7 +47,6 @@ def computePageTitle(titlebase, npage, npages):

def createSvgFile(dirName, prefix, npage):
ipage = npage - 1
import os.path
infpath = os.path.join(dirName,'{}.pdf'.format(prefix))
print('infpath', infpath)
outfpath = os.path.join(dirName,'{}-p{}-fixedDims.svg'.format(prefix, ipage))
Expand Down Expand Up @@ -75,6 +77,15 @@ def createSvgFile(dirName, prefix, npage):
removeFile(outfpath)


def readImgSize(fname, dirName):
# deferred import: PIL/Pillow isn't a hard dependency
from PIL import Image
with open(pathjoin(dirName,fname), 'rb') as fh:
img = Image.open(fh)
# (width, height) in pixels
return img.size


def createGifFile(dirName, prefix, npage):
ipage = npage - 1
imgfname = prefix + '.gif'
Expand All @@ -92,7 +103,7 @@ def createGifFile(dirName, prefix, npage):
try:
imgw, imgh = ut.readImgSize(imgfname, dirName)
except:
log.error('err re file ' + imgfname + ' in dir ' + dirName)
log.error('err re file %s in dir %s', imgfname, dirName)
raise
return imgw, imgh

Expand Down Expand Up @@ -260,7 +271,6 @@ def titleValue(f):
shorten(f.xpos),
shorten(f.ypos)) if tooltip else '')
except Exception:
import traceback
log.warn(ut.jj('caughtError:', traceback.format_exc()))
return ''

Expand Down
6 changes: 4 additions & 2 deletions opentaxforms/link.py
@@ -1,7 +1,9 @@
from __future__ import absolute_import
import six
import re
from opentaxforms.ut import log, jj, ddict
import opentaxforms.irs as irs

from . import irs
from .ut import log, jj, ddict


def findLineAndUnit(speak):
Expand Down
6 changes: 3 additions & 3 deletions opentaxforms/main.py
Expand Up @@ -10,14 +10,16 @@
pos,poz=position,positions
'''
from __future__ import print_function, absolute_import
import json
import sys
from os import remove as removeFile
import traceback
from os import remove as removeFile

from . import ut, irs, link, schema, html, refs as references
from .ut import log, jj, Bag, logg, stdout, Qnty, pathjoin
from .config import cfg, setup, RecurseInfinitely
from .extractFillableFields import extractFields
from .Form import Form


def cleanup_files(form):
Expand All @@ -33,7 +35,6 @@ def cleanup_files(form):


def addFormsTodo(form, formsdone, formstodo, formsfail):
from opentaxforms.Form import Form
recurselevel = form.recurselevel
refs = form.refs
if cfg.recurse and (cfg.maxrecurselevel == RecurseInfinitely or
Expand Down Expand Up @@ -129,7 +130,6 @@ def logRunStatus(formsdone, formsfail, status):
msg = 'failed to process %d forms: %s' % (
len(formsfail), [irs.computeFormId(f) for f in formsfail])
logg(msg, [log.error, stdout])
import json
status.update({'f' + irs.computeFormId(f).lower(): None
for f in formsfail})
statusStr = json.dumps(status.__dict__)
Expand Down
9 changes: 5 additions & 4 deletions opentaxforms/refs.py
@@ -1,9 +1,10 @@
from __future__ import absolute_import
import re
import six
import opentaxforms.ut as ut
from opentaxforms.ut import log, jj, pathjoin
from opentaxforms.config import cfg
import opentaxforms.irs as irs

from . import ut, irs
from .ut import log, jj, pathjoin
from .config import cfg

# nonforms are numbers that dont represent forms
nonforms = [str(yr) for yr in range(2000, 2050)]
Expand Down