From 39f7d0eb4ba5f8eb5476868de6631feff4d0c75b Mon Sep 17 00:00:00 2001 From: Ross Reedstrom Date: Tue, 27 Oct 2015 17:06:44 -0500 Subject: [PATCH] flake8 compliance, and a couple bug fixes --- cnxarchive/database.py | 99 ++++++++++++++++++------------ cnxarchive/transforms/resolvers.py | 12 ++-- cnxarchive/views.py | 68 ++++++++++---------- 3 files changed, 103 insertions(+), 76 deletions(-) diff --git a/cnxarchive/database.py b/cnxarchive/database.py index 24a57261..10a09acb 100644 --- a/cnxarchive/database.py +++ b/cnxarchive/database.py @@ -5,12 +5,10 @@ # Public License version 3 (AGPLv3). # See LICENCE.txt for details. # ### -"""Database models and utilities""" -import datetime +"""Database models and utilities.""" import os import json import psycopg2 -import re from . import config from .transforms import ( @@ -19,13 +17,18 @@ ) from .utils import split_ident_hash - here = os.path.abspath(os.path.dirname(__file__)) SQL_DIRECTORY = os.path.join(here, 'sql') DB_SCHEMA_DIRECTORY = os.path.join(SQL_DIRECTORY, 'schema') SCHEMA_MANIFEST_FILENAME = 'manifest.json' +class ContentNotFound(Exception): + """Used when database retrival fails.""" + + pass + + def _read_sql_file(name): path = os.path.join(SQL_DIRECTORY, '{}.sql'.format(name)) with open(path, 'r') as fp: @@ -73,7 +76,8 @@ def _read_schema_manifest(manifest_filepath): def _compile_manifest(manifest, content_modifier=None): - """Compiles a given ``manifest`` into a sequence of schema items. + """Compile a given ``manifest`` into a sequence of schema items. + Apply the optional ``content_modifier`` to each file's contents. """ items = [] @@ -90,6 +94,7 @@ def _compile_manifest(manifest, content_modifier=None): def get_schema(): + """Return the current schema.""" manifest_filepath = os.path.join(DB_SCHEMA_DIRECTORY, SCHEMA_MANIFEST_FILENAME) schema_manifest = _read_schema_manifest(manifest_filepath) @@ -111,8 +116,9 @@ def initdb(settings): def get_module_ident_from_ident_hash(ident_hash, cursor): - """Returns the moduleid for a given ``ident_hash``.""" - uuid, (mj_ver, mn_ver), id_type = split_ident_hash(ident_hash, split_version=True) + """Return the moduleid for a given ``ident_hash``.""" + uuid, (mj_ver, mn_ver), id_type = split_ident_hash(ident_hash, + split_version=True) args = [uuid] stmt = "SELECT module_ident FROM {} WHERE uuid = %s" table_name = 'modules' @@ -135,9 +141,7 @@ def get_module_ident_from_ident_hash(ident_hash, cursor): def get_tree(ident_hash, cursor): - """Given an ``ident_hash``, return a JSON representation - of the binder tree. - """ + """Return a JSON representation of the binder tree for ``ident_hash``.""" uuid, version, id_type = split_ident_hash(ident_hash) cursor.execute(SQL['get-tree-by-uuid-n-version'], (uuid, version,)) @@ -153,6 +157,7 @@ def get_tree(ident_hash, cursor): def get_module_uuid(db_connection, moduleid): + """Retrieve page uuid from legacy moduleid.""" with db_connection.cursor() as cursor: cursor.execute("SELECT uuid FROM modules WHERE moduleid = %s;", (moduleid,)) @@ -164,6 +169,11 @@ def get_module_uuid(db_connection, moduleid): def get_current_module_ident(moduleid, cursor): + """Retrieve module_ident for a given moduleid. + + Note that module_ident is used only for internal database relational + associations, and is equivalent to a uuid@version for a given document. + """ sql = '''SELECT m.module_ident FROM modules m WHERE m.moduleid = %s ORDER BY revised DESC''' cursor.execute(sql, [moduleid]) @@ -173,6 +183,7 @@ def get_current_module_ident(moduleid, cursor): def get_minor_version(module_ident, cursor): + """Retrieve minor version only given module_ident.""" sql = '''SELECT m.minor_version FROM modules m WHERE m.module_ident = %s @@ -183,13 +194,16 @@ def get_minor_version(module_ident, cursor): def next_version(module_ident, cursor): + """Determine next minor version for a given module_ident. + + Note potential race condition! + """ minor = get_minor_version(module_ident, cursor) return minor + 1 def get_collections(module_ident, cursor): - """Get all the collections that the module is part of - """ + """Get all the collections that the module is part of.""" sql = ''' WITH RECURSIVE t(node, parent, path, document) AS ( SELECT tr.nodeid, tr.parent_id, ARRAY[tr.nodeid], tr.documentid @@ -210,8 +224,9 @@ def get_collections(module_ident, cursor): def rebuild_collection_tree(old_collection_ident, new_document_id_map, cursor): - """Create a new tree for the collection based on the old tree but with - new document ids + """Create a new tree for the collection based on the old tree. + + This usesnew document ids, replacing old ones. """ sql = ''' WITH RECURSIVE t(node, parent, document, title, childorder, latest, path) @@ -264,8 +279,9 @@ def build_tree(node, parent): def republish_collection(next_minor_version, collection_ident, cursor, revised=None): - """Insert a new row for collection_ident with a new version and return - the module_ident of the row inserted + """Insert a new row for collection_ident with a new version. + + Returns the module_ident of the row inserted. """ sql = ''' INSERT INTO modules (portal_type, moduleid, uuid, version, name, created, @@ -304,10 +320,7 @@ def republish_collection(next_minor_version, collection_ident, cursor, def set_version(portal_type, legacy_version, td): - """Sets the major_version and minor_version if they are not set - """ - major = td['new']['major_version'] - minor = td['new']['minor_version'] + """Set the major_version and minor_version if they are not set.""" modified = 'OK' legacy_major, legacy_minor = legacy_version.split('.') @@ -330,8 +343,10 @@ def set_version(portal_type, legacy_version, td): def republish_module(td, cursor, db_connection): - """When a module is republished, the versions of the collections that it is - part of will need to be updated (a minor update). + """When a module is republished, create new minor versions of collections. + + All collections that this module is contained in part of will need to be + updated (a minor update). e.g. there is a collection c1 v2.1, which contains module m1 v3 @@ -378,7 +393,7 @@ def republish_module(td, cursor, db_connection): def republish_module_trigger(plpy, td): - """Postgres database trigger for republishing a module + """Trigger called from postgres database when republishing a module. When a module is republished, the versions of the collections that it is part of will need to be updated (a minor update). @@ -417,8 +432,7 @@ def republish_module_trigger(plpy, td): def assign_moduleid_default_trigger(plpy, td): - """A compatibilty trigger to fill in legacy ``moduleid`` field when - defined while inserting publications. + """Trigger to fill in legacy ``moduleid`` when publishing. This correctly assigns ``moduleid`` value to cnx-publishing publications. This does NOT include @@ -434,8 +448,6 @@ def assign_moduleid_default_trigger(plpy, td): uuid = td['new']['uuid'] moduleid = td['new']['moduleid'] version = td['new']['version'] - major_version = td['new']['major_version'] - minor_version = td['new']['minor_version'] # Is this an insert from legacy? Legacy always supplies the version. is_legacy_publication = version is not None @@ -478,7 +490,9 @@ def assign_moduleid_default_trigger(plpy, td): def assign_version_default_trigger(plpy, td): - """A compatibilty trigger to fill in legacy data fields that are not + """Trigger to fill in legacy data fields. + + A compatibilty trigger to fill in legacy data fields that are not populated when inserting publications from cnx-publishing. If this is not a legacy publication the ``version`` will be set @@ -506,7 +520,9 @@ def assign_version_default_trigger(plpy, td): def assign_document_controls_default_trigger(plpy, td): - """A compatibilty trigger to fill in ``uuid`` and ``licenseid`` columns + """Trigger to fill in document_controls when legacy publishes. + + A compatibilty trigger to fill in ``uuid`` and ``licenseid`` columns of the ``document_controls`` table that are not populated when inserting publications from legacy. @@ -531,7 +547,9 @@ def assign_document_controls_default_trigger(plpy, td): def upsert_document_acl_trigger(plpy, td): - """A compatibility trigger to upsert authorization control entries (ACEs) + """Trigger for filling in acls when legacy publishes. + + A compatibility trigger to upsert authorization control entries (ACEs) for legacy publications. """ modified_state = "OK" @@ -566,10 +584,8 @@ def upsert_document_acl_trigger(plpy, td): def upsert_users_from_legacy_publication_trigger(plpy, td): - """A compatibility trigger to upsert users from the legacy persons table. - """ + """A compatibility trigger to upsert users from legacy persons table.""" modified_state = "OK" - uuid_ = td['new']['uuid'] authors = td['new']['authors'] and td['new']['authors'] or [] maintainers = td['new']['maintainers'] and td['new']['maintainers'] or [] licensors = td['new']['licensors'] and td['new']['licensors'] or [] @@ -602,7 +618,9 @@ def upsert_users_from_legacy_publication_trigger(plpy, td): def insert_users_for_optional_roles_trigger(plpy, td): - """A compatibility trigger to insert users from moduleoptionalroles + """Trigger to update users from optional roles entries. + + A compatibility trigger to insert users from moduleoptionalroles records. This is primarily for legacy compatibility, but it is not possible to tell whether the entry came from legacy or cnx-publishing. Therefore, we only insert into users. @@ -627,7 +645,7 @@ def insert_users_for_optional_roles_trigger(plpy, td): def add_module_file(plpy, td): - """Postgres database trigger for adding a module file + """Database trigger for adding a module file. When a legacy ``index.cnxml`` is added, this trigger transforms it into html and stores it as ``index.cnxml.html``. @@ -642,12 +660,13 @@ def add_module_file(plpy, td): import plpydbapi module_ident = td['new']['module_ident'] - fileid = td['new']['fileid'] filename = td['new']['filename'] msg = "produce {}->{} for module_ident = {}" def check_for(filenames, module_ident): - """Check for a file at ``filename`` associated with + """Find filenames associated with module_ident. + + Check for a file at ``filename`` associated with module at ``module_ident``. """ stmt = plpy.prepare("""\ @@ -700,7 +719,9 @@ def check_for(filenames, module_ident): def _transform_abstract(cursor, module_ident): - """Transforms an abstract using one of content columns + """Transform abstract, bi-directionally. + + Transforms an abstract using one of content columns ('abstract' or 'html') to determine which direction the transform will go (cnxml->html or html->cnxml). A transform is done on either one of them to make @@ -739,6 +760,7 @@ def _transform_abstract(cursor, module_ident): def get_collection_tree(collection_ident, cursor): + """Build and retrieve json tree representation of a book.""" cursor.execute(''' WITH RECURSIVE t(node, parent, document, path) AS ( SELECT tr.nodeid, tr.parent_id, tr.documentid, ARRAY[tr.nodeid] @@ -757,6 +779,7 @@ def get_collection_tree(collection_ident, cursor): def get_module_can_publish(cursor, id): + """Return userids allowed to publish this book.""" cursor.execute(""" SELECT DISTINCT user_id FROM document_acl diff --git a/cnxarchive/transforms/resolvers.py b/cnxarchive/transforms/resolvers.py index 0b24b720..53691f6e 100644 --- a/cnxarchive/transforms/resolvers.py +++ b/cnxarchive/transforms/resolvers.py @@ -13,6 +13,7 @@ from lxml import etree +from ..utils import split_ident_hash __all__ = ( 'MODULE_REFERENCE', 'RESOURCE_REFERENCE', @@ -253,6 +254,7 @@ def apply_xpath(self, xpath): def _should_ignore_reference(self, ref): """Given an href string, determine if it should be ignored. + For example, external links and mailto references should be ignored. """ ref = ref.strip() @@ -332,7 +334,8 @@ def get_resource_info(self, filename, document_id=None, version=None): def get_page_ident_hash(self, page_uuid, page_version, book_uuid, book_version, latest=None): - """Returns the uuid of the page and full ident_hash of the page + """Return the uuid of the page and full ident_hash of the page. + which may or may not include the book uuid depending on whether the page is within the book. """ @@ -484,7 +487,8 @@ def get_mid_n_version(self, id, version): return module_id, version def fix_module_id(self): - """Assigns the module-id to the document element. + """Assign the module-id to the document element. + This is something the transforms are unable to do because they lack this piece of information. """ @@ -564,8 +568,8 @@ def fix_link_references(self): id, version, bound_document, url_frag = payload mid, version = self.get_mid_n_version(id, version) if mid is None: - bound_ident_hash = split_ident_hash(bound_document) - cid, cversion = self.get_mid_n_version(*bound_ident_hash) + id, version, id_type = split_ident_hash(bound_document) + cid, cversion = self.get_mid_n_version(id, version) if cid is None: # Binder ref doesn't exist, but Document does. # Assign the document specific attributes. diff --git a/cnxarchive/views.py b/cnxarchive/views.py index 0b047945..512fa2a2 100644 --- a/cnxarchive/views.py +++ b/cnxarchive/views.py @@ -5,14 +5,13 @@ # Public License version 3 (AGPLv3). # See LICENCE.txt for details. # ### +"""All the views.""" import os import json import logging import psycopg2 -import urlparse from lxml import etree -from cnxquerygrammar.query_parser import grammar, DictFormater from cnxepub.models import flatten_tree_to_ident_hashes from datetime import datetime, timedelta from pytz import timezone @@ -49,6 +48,8 @@ class ExportError(Exception): + """Used as catchall for other export errors.""" + pass @@ -58,8 +59,9 @@ class ExportError(Exception): def redirect_to_canonical(cursor, id, version, id_type, route_name='content', route_args=None): - """Redirect to latest version of a module / collection using the provided - path + """Redirect to latest version of a module / collection. + + Looks up path associated with the provided router. """ if id_type == CNXHash.SHORTID: cursor.execute(SQL['get-module-uuid'], {'id': id}) @@ -91,8 +93,7 @@ def redirect_to_canonical(cursor, id, version, id_type, def get_content_metadata(id, version, cursor): - """Return metadata related to the content from the database - """ + """Return metadata related to the content from the database.""" # Do the module lookup args = dict(id=id, version=version) # FIXME We are doing two queries here that can hopefully be @@ -117,6 +118,7 @@ def get_content_metadata(id, version, cursor): def is_latest(cursor, id, version): + """Determine if this is the latest version of this content.""" cursor.execute(SQL['get-module-versions'], {'id': id}) try: latest_version = cursor.fetchone()[0] @@ -130,6 +132,7 @@ def is_latest(cursor, id, version): def get_type_info(): + """Lookup type info from app configuration.""" if TYPE_INFO: return settings = get_current_registry().settings @@ -148,8 +151,7 @@ def get_type_info(): def get_export_allowable_types(cursor, exports_dirs, id, version): - """Return export types - """ + """Return export types.""" get_type_info() request = get_current_request() @@ -168,12 +170,13 @@ def get_export_allowable_types(cursor, exports_dirs, id, version): 'export', ident_hash=join_ident_hash(id, version), type=type_name, ignore=u'/{}'.format(filename)) } - except ExportError as e: + except ExportError as e: # noqa # Some other problem, skip it pass def get_export_file(cursor, id, version, type, exports_dirs): + """Retrieve file associated with document.""" get_type_info() type_info = dict(TYPE_INFO) @@ -244,6 +247,7 @@ def get_export_file(cursor, id, version, type, exports_dirs): def html_listify(tree, root_ul_element): + """Recursively construct HTML nested list version of book tree.""" request = get_current_request() for node in tree: li_elm = etree.SubElement(root_ul_element, 'li') @@ -258,6 +262,7 @@ def html_listify(tree, root_ul_element): def tree_to_html(tree): + """Return html list version of book tree.""" ul = etree.Element('ul') html_listify([tree], ul) return HTML_WRAPPER.format(etree.tostring(ul)) @@ -300,7 +305,8 @@ def _get_content_json(request=None, ident_hash=None, reqtype=None): if reqtype: route_name = 'content-{}'.format(reqtype) redirect_to_canonical(cursor, id, version, id_type, - route_name=route_name, route_args=route_args) + route_name=route_name, + route_args=route_args) result = get_content_metadata(id, version, cursor) if result['mediaType'] == COLLECTION_MIMETYPE: @@ -334,13 +340,16 @@ def _get_content_json(request=None, ident_hash=None, reqtype=None): def html_date(datetime): """ - Returns the HTTP-date format of python's datetime time as per: + Return the HTTP-date format of python's datetime time. + + Based on: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html """ return datetime.strftime("%a, %d %b %Y %X %Z") def notblocked(page): + """Determine if given url is a page that should be in sitemap.""" for blocked in PAGES_TO_BLOCK: if blocked[0] != '*': blocked = '*' + blocked @@ -357,9 +366,7 @@ def notblocked(page): @view_config(route_name='content-json', request_method='GET') def get_content_json(request): - """Retrieve a piece of content as JSON using - the ident-hash (uuid@version). - """ + """Retrieve content as JSON using the ident-hash (uuid@version).""" result = _get_content_json(request=request, reqtype='json') result = json.dumps(result) @@ -372,9 +379,7 @@ def get_content_json(request): @view_config(route_name='content-html', request_method='GET') def get_content_html(request): - """Retrieve a piece of content as HTML using - the ident-hash (uuid@version). - """ + """Retrieve content as HTML using the ident-hash (uuid@version).""" result = _get_content_json(request=request, reqtype='html') media_type = result['mediaType'] @@ -392,7 +397,8 @@ def get_content_html(request): @view_config(route_name='content', request_method='GET') def get_content(request): - """Retrieve a piece of content using the ident-hash (uuid@version). + """Retrieve content using the ident-hash (uuid@version). + Depending on the HTTP_ACCEPT header return HTML or JSON. """ if 'application/xhtml+xml' in request.headers.get('ACCEPT', ''): @@ -407,7 +413,8 @@ def get_content(request): @view_config(route_name='legacy-redirect-w-version', request_method='GET') def redirect_legacy_content(request): """Redirect from legacy /content/id/version to new /contents/uuid@version. - Handles collection context (book) as well + + Handles collection context (book) as well. """ settings = get_current_registry().settings db_conn_string = settings[config.CONNECTION_STRING] @@ -490,8 +497,7 @@ def get_resource(request): @view_config(route_name='content-extras', request_method='GET') def get_extra(request): - """Return information about a module / collection that cannot be cached - """ + """Return information about a module / collection that cannot be cached.""" settings = get_current_registry().settings exports_dirs = settings['exports-directories'].split() args = request.matchdict @@ -543,8 +549,7 @@ def get_export(request): @view_config(route_name='search', request_method='GET') def search(request): - """Search API - """ + """Search API.""" empty_response = json.dumps({ u'query': { u'limits': [], @@ -684,8 +689,7 @@ def search(request): def _get_subject_list(cursor): - """Return all subjects (tags) in the database except "internal" scheme - """ + """Return all subjects (tags) in the database except "internal" scheme.""" subject = None last_tagid = None cursor.execute(SQL['get-subject-list']) @@ -710,13 +714,13 @@ def _get_subject_list(cursor): def _get_featured_links(cursor): - """Return featured books for the front page""" + """Return featured books for the front page.""" cursor.execute(SQL['get-featured-links']) return [i[0] for i in cursor.fetchall()] def _get_service_state_messages(cursor): - """Returns a list of service messages.""" + """Return a list of service messages.""" cursor.execute(SQL['get-service-state-messages']) return [i[0] for i in cursor.fetchall()] @@ -729,8 +733,7 @@ def _get_licenses(cursor): @view_config(route_name='extras', request_method='GET') def extras(request): - """Return a dict with archive metadata for webview - """ + """Return a dict with archive metadata for webview.""" settings = get_current_registry().settings with psycopg2.connect(settings[config.CONNECTION_STRING]) as db_connection: with db_connection.cursor() as cursor: @@ -750,8 +753,7 @@ def extras(request): @view_config(route_name='sitemap', request_method='GET') def sitemap(request): - """Return a sitemap xml file for search engines - """ + """Return a sitemap xml file for search engines.""" settings = get_current_registry().settings xml = Sitemap() connection_string = settings[config.CONNECTION_STRING] @@ -786,9 +788,7 @@ def sitemap(request): @view_config(route_name='robots', request_method='GET') def robots(request): - """ - Returns a robots.txt file - """ + """Return a robots.txt file.""" robots_dot_txt = Robots(sitemap=request.route_url('sitemap')) bot_delays = {