Skip to content

Commit

Permalink
Merge pull request #125 from Connexions/collate
Browse files Browse the repository at this point in the history
Collation
  • Loading branch information
reedstrm committed Apr 26, 2016
2 parents 2e250f1 + 3732481 commit acd7554
Show file tree
Hide file tree
Showing 12 changed files with 558 additions and 25 deletions.
7 changes: 4 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@ before_install:
# * Install rhaptos.cnxmlutils
- git clone https://github.com/Connexions/rhaptos.cnxmlutils.git
- cd rhaptos.cnxmlutils && python setup.py install && cd ..
# * Install bug-fixes branch of plpydbapi
- git clone -b bug-fixes https://github.com/Connexions/plpydbapi.git
- cd plpydbapi && python setup.py install && cd ..

# Installation for cnx-publishing
# Install cssselect2 (unreleased), required by cnx-easybake
- pip install git+https://github.com/Connexions/cssselect2.git#egg=cssselect2
# Install cnx-easybake
- pip install git+https://github.com/Connexions/cnx-easybake.git#egg=cnx-easybake
# * Install cnx-epub
- git clone https://github.com/Connexions/cnx-epub.git
- cd cnx-epub && python setup.py install && cd ..
Expand Down
47 changes: 47 additions & 0 deletions cnxpublishing/collation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
# ###
# Copyright (c) 2016, Rice University
# This software is subject to the provisions of the GNU Affero General
# Public License version 3 (AGPLv3).
# See LICENCE.txt for details.
# ###
"""Provides a means of collating a binder and persisting it to the archive."""
import cnxepub
from cnxepub.collation import collate as collate_models

from .db import with_db_cursor
from .publish import (
publish_collated_document,
publish_collated_tree,
publish_composite_model,
)


@with_db_cursor
def collate(binder, publisher, message, cursor):
"""Given a `Binder` as `binder`, collate the contents and
persist those changes alongside the published content.
"""
collate_models(binder)

def flatten_filter(model):
return isinstance(model, cnxepub.CompositeDocument)

def only_documents_filter(model):
return isinstance(model, cnxepub.Document) \
and not isinstance(model, cnxepub.CompositeDocument)

for doc in cnxepub.flatten_to(binder, flatten_filter):
publish_composite_model(cursor, doc, binder, publisher, message)

for doc in cnxepub.flatten_to(binder, only_documents_filter):
publish_collated_document(cursor, doc, binder)

tree = cnxepub.model_to_tree(binder)
publish_collated_tree(cursor, tree)

return []


__all__ = ('collate',)
3 changes: 2 additions & 1 deletion cnxpublishing/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
)
from .utils import (parse_archive_uri, parse_user_uri, join_ident_hash,
split_ident_hash)
from .publish import publish_model, republish_binders


here = os.path.abspath(os.path.dirname(__file__))
Expand Down Expand Up @@ -949,6 +948,7 @@ def publish_pending(cursor, publication_id):

all_models = []

from .publish import publish_model
# Commit documents one at a time...
type_ = cnxepub.Document.__name__
cursor.execute("""\
Expand Down Expand Up @@ -995,6 +995,7 @@ def publish_pending(cursor, publication_id):
all_models.append(binder)

# Republish binders containing shared documents.
from .publish import republish_binders
republished_ident_hashes = republish_binders(cursor, all_models)

# Lastly, update the publication status.
Expand Down
1 change: 1 addition & 0 deletions cnxpublishing/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def declare_api_routes(config):
'/publications/{id}/license-acceptances/{uid}')
add_route('publication-role-acceptance',
'/publications/{id}/role-acceptances/{uid}')
add_route('collate-content', '/contents/{ident_hash}/collate-content')

# Moderation routes
add_route('moderation', '/moderations')
Expand Down
129 changes: 117 additions & 12 deletions cnxpublishing/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,23 @@
"""\
Functions used to commit publication works to the archive.
"""
import collections
import hashlib

import cnxepub
import psycopg2
from cnxepub import Document, Binder
from cnxepub import (
Binder,
CompositeDocument,
Document,
)

from .utils import parse_user_uri, join_ident_hash, split_ident_hash
from .utils import (
issequence,
join_ident_hash,
parse_user_uri,
split_ident_hash,
)


ATTRIBUTED_ROLE_KEYS = (
Expand Down Expand Up @@ -92,16 +102,18 @@
TREE_NODE_INSERT = """
INSERT INTO trees
(nodeid, parent_id, documentid,
title, childorder, latest)
title, childorder, latest, is_collated)
VALUES
(DEFAULT, %(parent_id)s, %(document_id)s,
%(title)s, %(child_order)s, %(is_latest)s)
%(title)s, %(child_order)s, %(is_latest)s, %(is_collated)s)
RETURNING nodeid
"""


def _model_to_portaltype(model):
if isinstance(model, Document):
if isinstance(model, CompositeDocument):
type_ = 'CompositeModule'
elif isinstance(model, Document):
type_ = 'Module'
elif isinstance(model, Binder):
type_ = 'Collection'
Expand Down Expand Up @@ -241,7 +253,7 @@ def _insert_resource_file(cursor, module_ident, resource):
VALUES (%s, %s, %s)""", args)


def _insert_tree(cursor, tree, parent_id=None, index=0):
def _insert_tree(cursor, tree, parent_id=None, index=0, is_collated=False):
"""Inserts a binder tree into the archive."""
if isinstance(tree, dict):
if tree['id'] == 'subcol':
Expand All @@ -268,14 +280,15 @@ def _insert_tree(cursor, tree, parent_id=None, index=0):
cursor.execute(TREE_NODE_INSERT,
dict(document_id=document_id, parent_id=parent_id,
title=title, child_order=index,
is_latest=is_latest))
is_latest=is_latest, is_collated=is_collated))
node_id = cursor.fetchone()[0]
if 'contents' in tree:
_insert_tree(cursor, tree['contents'], parent_id=node_id)
_insert_tree(cursor, tree['contents'], parent_id=node_id,
is_collated=is_collated)
elif isinstance(tree, list):
for tree_node in tree:
_insert_tree(cursor, tree_node, parent_id=parent_id,
index=tree.index(tree_node))
index=tree.index(tree_node), is_collated=is_collated)


def publish_model(cursor, model, publisher, message):
Expand Down Expand Up @@ -322,6 +335,94 @@ def publish_model(cursor, model, publisher, message):
return ident_hash


def publish_composite_model(cursor, model, parent_model, publisher, message):
"""Publishes the ``model`` and return its ident_hash."""
if not isinstance(model, CompositeDocument):
raise ValueError("This function only publishes CompositeDocument "
"objects. '{}' was given.".format(type(model)))
if issequence(publisher) and len(publisher) > 1:
raise ValueError("Only one publisher is allowed. '{}' "
"were given: {}"
.format(len(publisher), publisher))
module_ident, ident_hash = _insert_metadata(cursor, model,
publisher, message)
for resource in model.resources:
_insert_resource_file(cursor, module_ident, resource)
html = str(cnxepub.DocumentContentFormatter(model))
file_arg = {
'module_ident': module_ident,
'parent_ident_hash': parent_model.ident_hash,
'media_type': 'text/html',
'data': psycopg2.Binary(html.encode('utf-8')),
}
cursor.execute("""\
WITH file_insertion AS (
INSERT INTO files (file, media_type) VALUES (%(data)s, %(media_type)s)
RETURNING fileid)
INSERT INTO collated_file_associations
(context, item, fileid)
VALUES
((SELECT module_ident FROM modules
WHERE uuid || '@' || concat_ws('.', major_version, minor_version)
= %(parent_ident_hash)s),
%(module_ident)s,
(SELECT fileid FROM file_insertion))""", file_arg)

model.id, model.metadata['version'] = split_ident_hash(ident_hash)
model.set_uri('cnx-archive', ident_hash)
return ident_hash


def publish_collated_document(cursor, model, parent_model):
"""Publish a given `module`'s collated content in the context of
the `parent_model`. Note, the model's content is expected to already
have the collated content. This will just persist that content to
the archive.
"""
html = str(cnxepub.DocumentContentFormatter(model)).encode('utf-8')
sha1 = hashlib.new('sha1', html).hexdigest()
cursor.execute("SELECT fileid FROM files WHERE sha1 = %s", (sha1,))
try:
fileid = cursor.fetchone()[0]
except TypeError:
file_args = {
'media_type': 'text/html',
'data': psycopg2.Binary(html),
}
cursor.execute("""\
INSERT INTO files (file, media_type)
VALUES (%(data)s, %(media_type)s)
RETURNING fileid""", file_args)
fileid = cursor.fetchone()[0]
args = {
'module_ident_hash': model.ident_hash,
'parent_ident_hash': parent_model.ident_hash,
'fileid': fileid,
}
stmt = """\
INSERT INTO collated_file_associations (context, item, fileid)
VALUES
((SELECT module_ident FROM modules
WHERE uuid || '@' || concat_ws('.', major_version, minor_version)
= %(parent_ident_hash)s),
(SELECT module_ident FROM modules
WHERE uuid || '@' || concat_ws('.', major_version, minor_version)
= %(module_ident_hash)s),
%(fileid)s)"""
cursor.execute(stmt, args)


def publish_collated_tree(cursor, tree):
"""Publish a given collated `tree` (containing newly added
`CompositeDocument` objects and number inforation)
alongside the original tree.
"""
tree = _insert_tree(cursor, tree, is_collated=True)
return tree


def republish_binders(cursor, models):
"""Republish the Binders that share Documents in the publication context.
This needs to be given all the models in the publication context."""
Expand Down Expand Up @@ -494,7 +595,7 @@ def rebuild_collection_tree(cursor, ident_hash, history_map):
new document ids
"""
collection_tree_sql = """\
WITH RECURSIVE t(nodeid, parent_id, documentid, title, childorder, latest, \
WITH RECURSIVE t(nodeid, parent_id, documentid, title, childorder, latest,
ident_hash, path) AS (
SELECT
tr.nodeid, tr.parent_id, tr.documentid,
Expand All @@ -508,15 +609,16 @@ def rebuild_collection_tree(cursor, ident_hash, history_map):
SELECT module_ident
FROM modules
WHERE uuid||'@'||concat_ws('.', major_version, minor_version) = %s)
AND tr.is_collated = FALSE
UNION ALL
SELECT
c.*,
c.nodeid, c.parent_id, c.documentid, c.title, c.childorder, c.latest,
(SELECT uuid||'@'||concat_ws('.', major_version, minor_version)
FROM modules
WHERE module_ident = c.documentid) AS ident_hash,
path || ARRAY[c.nodeid]
FROM trees AS c JOIN t ON (c.parent_id = t.nodeid)
WHERE not c.nodeid = ANY(t.path)
WHERE not c.nodeid = ANY(t.path) AND c.is_collated = FALSE
)
SELECT row_to_json(row) FROM (SELECT * FROM t) AS row"""

Expand Down Expand Up @@ -568,6 +670,9 @@ def build_tree(nodeid, parent_id):
__all__ = (
'bump_version',
'get_previous_publication',
'publish_collated_document',
'publish_collated_tree',
'publish_composite_model',
'publish_model',
'rebuild_collection_tree',
'republish_binders',
Expand Down
Loading

0 comments on commit acd7554

Please sign in to comment.