From 39f7d0eb4ba5f8eb5476868de6631feff4d0c75b Mon Sep 17 00:00:00 2001
From: Ross Reedstrom <reedstrm@gmail.com>
Date: Tue, 27 Oct 2015 17:06:44 -0500
Subject: [PATCH] flake8 compliance, and a couple bug fixes

---
 cnxarchive/database.py             | 99 ++++++++++++++++++------------
 cnxarchive/transforms/resolvers.py | 12 ++--
 cnxarchive/views.py                | 68 ++++++++++----------
 3 files changed, 103 insertions(+), 76 deletions(-)

diff --git a/cnxarchive/database.py b/cnxarchive/database.py
index 24a57261..10a09acb 100644
--- a/cnxarchive/database.py
+++ b/cnxarchive/database.py
@@ -5,12 +5,10 @@
 # Public License version 3 (AGPLv3).
 # See LICENCE.txt for details.
 # ###
-"""Database models and utilities"""
-import datetime
+"""Database models and utilities."""
 import os
 import json
 import psycopg2
-import re
 
 from . import config
 from .transforms import (
@@ -19,13 +17,18 @@
     )
 from .utils import split_ident_hash
 
-
 here = os.path.abspath(os.path.dirname(__file__))
 SQL_DIRECTORY = os.path.join(here, 'sql')
 DB_SCHEMA_DIRECTORY = os.path.join(SQL_DIRECTORY, 'schema')
 SCHEMA_MANIFEST_FILENAME = 'manifest.json'
 
 
+class ContentNotFound(Exception):
+    """Used when database retrival fails."""
+
+    pass
+
+
 def _read_sql_file(name):
     path = os.path.join(SQL_DIRECTORY, '{}.sql'.format(name))
     with open(path, 'r') as fp:
@@ -73,7 +76,8 @@ def _read_schema_manifest(manifest_filepath):
 
 
 def _compile_manifest(manifest, content_modifier=None):
-    """Compiles a given ``manifest`` into a sequence of schema items.
+    """Compile a given ``manifest`` into a sequence of schema items.
+
     Apply the optional ``content_modifier`` to each file's contents.
     """
     items = []
@@ -90,6 +94,7 @@ def _compile_manifest(manifest, content_modifier=None):
 
 
 def get_schema():
+    """Return the current schema."""
     manifest_filepath = os.path.join(DB_SCHEMA_DIRECTORY,
                                      SCHEMA_MANIFEST_FILENAME)
     schema_manifest = _read_schema_manifest(manifest_filepath)
@@ -111,8 +116,9 @@ def initdb(settings):
 
 
 def get_module_ident_from_ident_hash(ident_hash, cursor):
-    """Returns the moduleid for a given ``ident_hash``."""
-    uuid, (mj_ver, mn_ver), id_type = split_ident_hash(ident_hash, split_version=True)
+    """Return the moduleid for a given ``ident_hash``."""
+    uuid, (mj_ver, mn_ver), id_type = split_ident_hash(ident_hash,
+                                                       split_version=True)
     args = [uuid]
     stmt = "SELECT module_ident FROM {} WHERE uuid = %s"
     table_name = 'modules'
@@ -135,9 +141,7 @@ def get_module_ident_from_ident_hash(ident_hash, cursor):
 
 
 def get_tree(ident_hash, cursor):
-    """Given an ``ident_hash``, return a JSON representation
-    of the binder tree.
-    """
+    """Return a JSON representation of the binder tree for ``ident_hash``."""
     uuid, version, id_type = split_ident_hash(ident_hash)
     cursor.execute(SQL['get-tree-by-uuid-n-version'],
                    (uuid, version,))
@@ -153,6 +157,7 @@ def get_tree(ident_hash, cursor):
 
 
 def get_module_uuid(db_connection, moduleid):
+    """Retrieve page uuid from legacy moduleid."""
     with db_connection.cursor() as cursor:
         cursor.execute("SELECT uuid FROM modules WHERE moduleid = %s;",
                        (moduleid,))
@@ -164,6 +169,11 @@ def get_module_uuid(db_connection, moduleid):
 
 
 def get_current_module_ident(moduleid, cursor):
+    """Retrieve module_ident for a given moduleid.
+
+    Note that module_ident is used only for internal database relational
+    associations, and is equivalent to a uuid@version for a given document.
+    """
     sql = '''SELECT m.module_ident FROM modules m
         WHERE m.moduleid = %s ORDER BY revised DESC'''
     cursor.execute(sql, [moduleid])
@@ -173,6 +183,7 @@ def get_current_module_ident(moduleid, cursor):
 
 
 def get_minor_version(module_ident, cursor):
+    """Retrieve minor version only given module_ident."""
     sql = '''SELECT m.minor_version
             FROM modules m
             WHERE m.module_ident = %s
@@ -183,13 +194,16 @@ def get_minor_version(module_ident, cursor):
 
 
 def next_version(module_ident, cursor):
+    """Determine next minor version for a given module_ident.
+
+    Note potential race condition!
+    """
     minor = get_minor_version(module_ident, cursor)
     return minor + 1
 
 
 def get_collections(module_ident, cursor):
-    """Get all the collections that the module is part of
-    """
+    """Get all the collections that the module is part of."""
     sql = '''
     WITH RECURSIVE t(node, parent, path, document) AS (
         SELECT tr.nodeid, tr.parent_id, ARRAY[tr.nodeid], tr.documentid
@@ -210,8 +224,9 @@ def get_collections(module_ident, cursor):
 
 
 def rebuild_collection_tree(old_collection_ident, new_document_id_map, cursor):
-    """Create a new tree for the collection based on the old tree but with
-    new document ids
+    """Create a new tree for the collection based on the old tree.
+
+    This usesnew document ids, replacing old ones.
     """
     sql = '''
     WITH RECURSIVE t(node, parent, document, title, childorder, latest, path)
@@ -264,8 +279,9 @@ def build_tree(node, parent):
 
 def republish_collection(next_minor_version, collection_ident, cursor,
                          revised=None):
-    """Insert a new row for collection_ident with a new version and return
-    the module_ident of the row inserted
+    """Insert a new row for collection_ident with a new version.
+
+    Returns the module_ident of the row inserted.
     """
     sql = '''
     INSERT INTO modules (portal_type, moduleid, uuid, version, name, created,
@@ -304,10 +320,7 @@ def republish_collection(next_minor_version, collection_ident, cursor,
 
 
 def set_version(portal_type, legacy_version, td):
-    """Sets the major_version and minor_version if they are not set
-    """
-    major = td['new']['major_version']
-    minor = td['new']['minor_version']
+    """Set the major_version and minor_version if they are not set."""
     modified = 'OK'
     legacy_major, legacy_minor = legacy_version.split('.')
 
@@ -330,8 +343,10 @@ def set_version(portal_type, legacy_version, td):
 
 
 def republish_module(td, cursor, db_connection):
-    """When a module is republished, the versions of the collections that it is
-    part of will need to be updated (a minor update).
+    """When a module is republished, create new minor versions of collections.
+
+    All collections that this module is contained in part of will need to be
+    updated (a minor update).
 
 
     e.g. there is a collection c1 v2.1, which contains module m1 v3
@@ -378,7 +393,7 @@ def republish_module(td, cursor, db_connection):
 
 
 def republish_module_trigger(plpy, td):
-    """Postgres database trigger for republishing a module
+    """Trigger called from postgres database when republishing a module.
 
     When a module is republished, the versions of the collections that it is
     part of will need to be updated (a minor update).
@@ -417,8 +432,7 @@ def republish_module_trigger(plpy, td):
 
 
 def assign_moduleid_default_trigger(plpy, td):
-    """A compatibilty trigger to fill in legacy ``moduleid`` field when
-    defined while inserting publications.
+    """Trigger to fill in legacy ``moduleid`` when publishing.
 
     This correctly assigns ``moduleid`` value to
     cnx-publishing publications. This does NOT include
@@ -434,8 +448,6 @@ def assign_moduleid_default_trigger(plpy, td):
     uuid = td['new']['uuid']
     moduleid = td['new']['moduleid']
     version = td['new']['version']
-    major_version = td['new']['major_version']
-    minor_version = td['new']['minor_version']
 
     # Is this an insert from legacy? Legacy always supplies the version.
     is_legacy_publication = version is not None
@@ -478,7 +490,9 @@ def assign_moduleid_default_trigger(plpy, td):
 
 
 def assign_version_default_trigger(plpy, td):
-    """A compatibilty trigger to fill in legacy data fields that are not
+    """Trigger to fill in legacy data fields.
+
+    A compatibilty trigger to fill in legacy data fields that are not
     populated when inserting publications from cnx-publishing.
 
     If this is not a legacy publication the ``version`` will be set
@@ -506,7 +520,9 @@ def assign_version_default_trigger(plpy, td):
 
 
 def assign_document_controls_default_trigger(plpy, td):
-    """A compatibilty trigger to fill in ``uuid`` and ``licenseid`` columns
+    """Trigger to fill in document_controls when legacy publishes.
+
+    A compatibilty trigger to fill in ``uuid`` and ``licenseid`` columns
     of the ``document_controls`` table that are not
     populated when inserting publications from legacy.
 
@@ -531,7 +547,9 @@ def assign_document_controls_default_trigger(plpy, td):
 
 
 def upsert_document_acl_trigger(plpy, td):
-    """A compatibility trigger to upsert authorization control entries (ACEs)
+    """Trigger for filling in acls when legacy publishes.
+
+    A compatibility trigger to upsert authorization control entries (ACEs)
     for legacy publications.
     """
     modified_state = "OK"
@@ -566,10 +584,8 @@ def upsert_document_acl_trigger(plpy, td):
 
 
 def upsert_users_from_legacy_publication_trigger(plpy, td):
-    """A compatibility trigger to upsert users from the legacy persons table.
-    """
+    """A compatibility trigger to upsert users from legacy persons table."""
     modified_state = "OK"
-    uuid_ = td['new']['uuid']
     authors = td['new']['authors'] and td['new']['authors'] or []
     maintainers = td['new']['maintainers'] and td['new']['maintainers'] or []
     licensors = td['new']['licensors'] and td['new']['licensors'] or []
@@ -602,7 +618,9 @@ def upsert_users_from_legacy_publication_trigger(plpy, td):
 
 
 def insert_users_for_optional_roles_trigger(plpy, td):
-    """A compatibility trigger to insert users from moduleoptionalroles
+    """Trigger to update users from optional roles entries.
+
+    A compatibility trigger to insert users from moduleoptionalroles
     records. This is primarily for legacy compatibility, but it is not
     possible to tell whether the entry came from legacy or cnx-publishing.
     Therefore, we only insert into users.
@@ -627,7 +645,7 @@ def insert_users_for_optional_roles_trigger(plpy, td):
 
 
 def add_module_file(plpy, td):
-    """Postgres database trigger for adding a module file
+    """Database trigger for adding a module file.
 
     When a legacy ``index.cnxml`` is added, this trigger
     transforms it into html and stores it as ``index.cnxml.html``.
@@ -642,12 +660,13 @@ def add_module_file(plpy, td):
     import plpydbapi
 
     module_ident = td['new']['module_ident']
-    fileid = td['new']['fileid']
     filename = td['new']['filename']
     msg = "produce {}->{} for module_ident = {}"
 
     def check_for(filenames, module_ident):
-        """Check for a file at ``filename`` associated with
+        """Find filenames associated with module_ident.
+
+        Check for a file at ``filename`` associated with
         module at ``module_ident``.
         """
         stmt = plpy.prepare("""\
@@ -700,7 +719,9 @@ def check_for(filenames, module_ident):
 
 
 def _transform_abstract(cursor, module_ident):
-    """Transforms an abstract using one of content columns
+    """Transform abstract, bi-directionally.
+
+    Transforms an abstract using one of content columns
     ('abstract' or 'html') to determine which direction the transform
     will go (cnxml->html or html->cnxml).
     A transform is done on either one of them to make
@@ -739,6 +760,7 @@ def _transform_abstract(cursor, module_ident):
 
 
 def get_collection_tree(collection_ident, cursor):
+    """Build and retrieve json tree representation of a book."""
     cursor.execute('''
     WITH RECURSIVE t(node, parent, document, path) AS (
         SELECT tr.nodeid, tr.parent_id, tr.documentid, ARRAY[tr.nodeid]
@@ -757,6 +779,7 @@ def get_collection_tree(collection_ident, cursor):
 
 
 def get_module_can_publish(cursor, id):
+    """Return userids allowed to publish this book."""
     cursor.execute("""
 SELECT DISTINCT user_id
 FROM document_acl
diff --git a/cnxarchive/transforms/resolvers.py b/cnxarchive/transforms/resolvers.py
index 0b24b720..53691f6e 100644
--- a/cnxarchive/transforms/resolvers.py
+++ b/cnxarchive/transforms/resolvers.py
@@ -13,6 +13,7 @@
 
 from lxml import etree
 
+from ..utils import split_ident_hash
 
 __all__ = (
     'MODULE_REFERENCE', 'RESOURCE_REFERENCE',
@@ -253,6 +254,7 @@ def apply_xpath(self, xpath):
 
     def _should_ignore_reference(self, ref):
         """Given an href string, determine if it should be ignored.
+
         For example, external links and mailto references should be ignored.
         """
         ref = ref.strip()
@@ -332,7 +334,8 @@ def get_resource_info(self, filename, document_id=None, version=None):
 
     def get_page_ident_hash(self, page_uuid, page_version,
                             book_uuid, book_version, latest=None):
-        """Returns the uuid of the page and full ident_hash of the page
+        """Return the uuid of the page and full ident_hash of the page.
+
         which may or may not include the book uuid depending on whether
         the page is within the book.
         """
@@ -484,7 +487,8 @@ def get_mid_n_version(self, id, version):
         return module_id, version
 
     def fix_module_id(self):
-        """Assigns the module-id to the document element.
+        """Assign the module-id to the document element.
+
         This is something the transforms are unable to do because they lack
         this piece of information.
         """
@@ -564,8 +568,8 @@ def fix_link_references(self):
                 id, version, bound_document, url_frag = payload
                 mid, version = self.get_mid_n_version(id, version)
                 if mid is None:
-                    bound_ident_hash = split_ident_hash(bound_document)
-                    cid, cversion = self.get_mid_n_version(*bound_ident_hash)
+                    id, version, id_type = split_ident_hash(bound_document)
+                    cid, cversion = self.get_mid_n_version(id, version)
                     if cid is None:
                         # Binder ref doesn't exist, but Document does.
                         # Assign the document specific attributes.
diff --git a/cnxarchive/views.py b/cnxarchive/views.py
index 0b047945..512fa2a2 100644
--- a/cnxarchive/views.py
+++ b/cnxarchive/views.py
@@ -5,14 +5,13 @@
 # Public License version 3 (AGPLv3).
 # See LICENCE.txt for details.
 # ###
+"""All the views."""
 import os
 import json
 import logging
 import psycopg2
-import urlparse
 
 from lxml import etree
-from cnxquerygrammar.query_parser import grammar, DictFormater
 from cnxepub.models import flatten_tree_to_ident_hashes
 from datetime import datetime, timedelta
 from pytz import timezone
@@ -49,6 +48,8 @@
 
 
 class ExportError(Exception):
+    """Used as catchall for other export errors."""
+
     pass
 
 
@@ -58,8 +59,9 @@ class ExportError(Exception):
 
 def redirect_to_canonical(cursor, id, version, id_type,
                           route_name='content', route_args=None):
-    """Redirect to latest version of a module / collection using the provided
-    path
+    """Redirect to latest version of a module / collection.
+
+    Looks up path associated with the provided router.
     """
     if id_type == CNXHash.SHORTID:
         cursor.execute(SQL['get-module-uuid'], {'id': id})
@@ -91,8 +93,7 @@ def redirect_to_canonical(cursor, id, version, id_type,
 
 
 def get_content_metadata(id, version, cursor):
-    """Return metadata related to the content from the database
-    """
+    """Return metadata related to the content from the database."""
     # Do the module lookup
     args = dict(id=id, version=version)
     # FIXME We are doing two queries here that can hopefully be
@@ -117,6 +118,7 @@ def get_content_metadata(id, version, cursor):
 
 
 def is_latest(cursor, id, version):
+    """Determine if this is the latest version of this content."""
     cursor.execute(SQL['get-module-versions'], {'id': id})
     try:
         latest_version = cursor.fetchone()[0]
@@ -130,6 +132,7 @@ def is_latest(cursor, id, version):
 
 
 def get_type_info():
+    """Lookup type info from app configuration."""
     if TYPE_INFO:
         return
     settings = get_current_registry().settings
@@ -148,8 +151,7 @@ def get_type_info():
 
 
 def get_export_allowable_types(cursor, exports_dirs, id, version):
-    """Return export types
-    """
+    """Return export types."""
     get_type_info()
     request = get_current_request()
 
@@ -168,12 +170,13 @@ def get_export_allowable_types(cursor, exports_dirs, id, version):
                     'export', ident_hash=join_ident_hash(id, version),
                     type=type_name, ignore=u'/{}'.format(filename))
                 }
-        except ExportError as e:
+        except ExportError as e:  # noqa
             # Some other problem, skip it
             pass
 
 
 def get_export_file(cursor, id, version, type, exports_dirs):
+    """Retrieve file associated with document."""
     get_type_info()
     type_info = dict(TYPE_INFO)
 
@@ -244,6 +247,7 @@ def get_export_file(cursor, id, version, type, exports_dirs):
 
 
 def html_listify(tree, root_ul_element):
+    """Recursively construct HTML nested list version of book tree."""
     request = get_current_request()
     for node in tree:
         li_elm = etree.SubElement(root_ul_element, 'li')
@@ -258,6 +262,7 @@ def html_listify(tree, root_ul_element):
 
 
 def tree_to_html(tree):
+    """Return html list version of book tree."""
     ul = etree.Element('ul')
     html_listify([tree], ul)
     return HTML_WRAPPER.format(etree.tostring(ul))
@@ -300,7 +305,8 @@ def _get_content_json(request=None, ident_hash=None, reqtype=None):
                 if reqtype:
                     route_name = 'content-{}'.format(reqtype)
                 redirect_to_canonical(cursor, id, version, id_type,
-                                      route_name=route_name, route_args=route_args)
+                                      route_name=route_name,
+                                      route_args=route_args)
 
             result = get_content_metadata(id, version, cursor)
             if result['mediaType'] == COLLECTION_MIMETYPE:
@@ -334,13 +340,16 @@ def _get_content_json(request=None, ident_hash=None, reqtype=None):
 
 def html_date(datetime):
     """
-    Returns the HTTP-date format of python's datetime time as per:
+    Return the HTTP-date format of python's datetime time.
+
+    Based on:
     http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html
     """
     return datetime.strftime("%a, %d %b %Y %X %Z")
 
 
 def notblocked(page):
+    """Determine if given url is a page that should be in sitemap."""
     for blocked in PAGES_TO_BLOCK:
         if blocked[0] != '*':
             blocked = '*' + blocked
@@ -357,9 +366,7 @@ def notblocked(page):
 
 @view_config(route_name='content-json', request_method='GET')
 def get_content_json(request):
-    """Retrieve a piece of content as JSON using
-    the ident-hash (uuid@version).
-    """
+    """Retrieve content as JSON using the ident-hash (uuid@version)."""
     result = _get_content_json(request=request, reqtype='json')
 
     result = json.dumps(result)
@@ -372,9 +379,7 @@ def get_content_json(request):
 
 @view_config(route_name='content-html', request_method='GET')
 def get_content_html(request):
-    """Retrieve a piece of content as HTML using
-    the ident-hash (uuid@version).
-    """
+    """Retrieve content as HTML using the ident-hash (uuid@version)."""
     result = _get_content_json(request=request, reqtype='html')
 
     media_type = result['mediaType']
@@ -392,7 +397,8 @@ def get_content_html(request):
 
 @view_config(route_name='content', request_method='GET')
 def get_content(request):
-    """Retrieve a piece of content using the ident-hash (uuid@version).
+    """Retrieve content using the ident-hash (uuid@version).
+
     Depending on the HTTP_ACCEPT header return HTML or JSON.
     """
     if 'application/xhtml+xml' in request.headers.get('ACCEPT', ''):
@@ -407,7 +413,8 @@ def get_content(request):
 @view_config(route_name='legacy-redirect-w-version', request_method='GET')
 def redirect_legacy_content(request):
     """Redirect from legacy /content/id/version to new /contents/uuid@version.
-       Handles collection context (book) as well
+
+    Handles collection context (book) as well.
     """
     settings = get_current_registry().settings
     db_conn_string = settings[config.CONNECTION_STRING]
@@ -490,8 +497,7 @@ def get_resource(request):
 
 @view_config(route_name='content-extras', request_method='GET')
 def get_extra(request):
-    """Return information about a module / collection that cannot be cached
-    """
+    """Return information about a module / collection that cannot be cached."""
     settings = get_current_registry().settings
     exports_dirs = settings['exports-directories'].split()
     args = request.matchdict
@@ -543,8 +549,7 @@ def get_export(request):
 
 @view_config(route_name='search', request_method='GET')
 def search(request):
-    """Search API
-    """
+    """Search API."""
     empty_response = json.dumps({
         u'query': {
             u'limits': [],
@@ -684,8 +689,7 @@ def search(request):
 
 
 def _get_subject_list(cursor):
-    """Return all subjects (tags) in the database except "internal" scheme
-    """
+    """Return all subjects (tags) in the database except "internal" scheme."""
     subject = None
     last_tagid = None
     cursor.execute(SQL['get-subject-list'])
@@ -710,13 +714,13 @@ def _get_subject_list(cursor):
 
 
 def _get_featured_links(cursor):
-    """Return featured books for the front page"""
+    """Return featured books for the front page."""
     cursor.execute(SQL['get-featured-links'])
     return [i[0] for i in cursor.fetchall()]
 
 
 def _get_service_state_messages(cursor):
-    """Returns a list of service messages."""
+    """Return a list of service messages."""
     cursor.execute(SQL['get-service-state-messages'])
     return [i[0] for i in cursor.fetchall()]
 
@@ -729,8 +733,7 @@ def _get_licenses(cursor):
 
 @view_config(route_name='extras', request_method='GET')
 def extras(request):
-    """Return a dict with archive metadata for webview
-    """
+    """Return a dict with archive metadata for webview."""
     settings = get_current_registry().settings
     with psycopg2.connect(settings[config.CONNECTION_STRING]) as db_connection:
         with db_connection.cursor() as cursor:
@@ -750,8 +753,7 @@ def extras(request):
 
 @view_config(route_name='sitemap', request_method='GET')
 def sitemap(request):
-    """Return a sitemap xml file for search engines
-    """
+    """Return a sitemap xml file for search engines."""
     settings = get_current_registry().settings
     xml = Sitemap()
     connection_string = settings[config.CONNECTION_STRING]
@@ -786,9 +788,7 @@ def sitemap(request):
 
 @view_config(route_name='robots', request_method='GET')
 def robots(request):
-    """
-    Returns a robots.txt file
-    """
+    """Return a robots.txt file."""
     robots_dot_txt = Robots(sitemap=request.route_url('sitemap'))
 
     bot_delays = {