diff --git a/cnxpublishing/db.py b/cnxpublishing/db.py index 40374bc9..485d68f5 100644 --- a/cnxpublishing/db.py +++ b/cnxpublishing/db.py @@ -16,14 +16,17 @@ import cnxepub import psycopg2 import jinja2 +import requests from cnxarchive.utils import ( IdentHashSyntaxError, join_ident_hash, split_ident_hash, ) from cnxepub import ATTRIBUTED_ROLE_KEYS +from lxml import etree from openstax_accounts.interfaces import IOpenstaxAccounts from psycopg2.extras import register_uuid from pyramid.security import has_permission +from pyramid.settings import asbool from pyramid.threadlocal import ( get_current_request, get_current_registry, ) @@ -531,6 +534,40 @@ def lookup_document_pointer(ident_hash, cursor): return cnxepub.DocumentPointer(ident_hash, metadata) +def _inject_mathml_svgs(model): + """Inject MathML SVG annotations into the model's content.""" + settings = get_current_registry().settings + is_enabled = asbool(settings.get('mathml2svg.enabled?', False)) + url = settings.get('mathml2svg.url') + + # Bailout when svg generation is disabled. + if not is_enabled: + return + + xml = etree.fromstring(model.content) + mathml_namespace = "http://www.w3.org/1998/Math/MathML" + mathml_blocks = xml.xpath( + '//m:math[not(/m:annotation-xml[@encoding="image/svg+xml"])]', + namespaces={'m': mathml_namespace}) + for mathml_block in mathml_blocks: + # Submit the MathML block to the SVG generation service. + payload = {'MathML': etree.tostring(mathml_block)} + response = requests.post(url, data=payload) + # Inject the SVG into the MathML as an annotation + # only if the resposne was good, otherwise skip over it. + semantic_block = mathml_block.getchildren()[0] + if response.status_code == 200: + svg = response.text + content_type = response.headers['content-type'] + # Insert the svg into the content + annotation = etree.SubElement( + semantic_block, + '{{{}}}annotation-xml'.format(mathml_namespace)) + annotation.set('encoding', content_type) + annotation.append(etree.fromstring(svg)) + model.content = etree.tostring(xml) + + def add_pending_model_content(cursor, publication_id, model): """Updates the pending model's content. This is a secondary step not in ``add_pending_model, because @@ -582,6 +619,9 @@ def mark_invalid_reference(reference): mark_invalid_reference(reference) # else, it's a remote or cnx.org reference ...Do nothing. + # Generate SVGs for MathML + _inject_mathml_svgs(model) + args = (psycopg2.Binary(model.content.encode('utf-8')), publication_id, model.id,) stmt = """\ diff --git a/cnxpublishing/tests/test_db.py b/cnxpublishing/tests/test_db.py index bbef2ed9..12b9929a 100644 --- a/cnxpublishing/tests/test_db.py +++ b/cnxpublishing/tests/test_db.py @@ -22,6 +22,7 @@ from cnxarchive import config as archive_config from cnxarchive.database import initdb as archive_initdb from cnxarchive.utils import join_ident_hash, split_ident_hash +from lxml import etree from pyramid import testing from . import use_cases @@ -1137,6 +1138,78 @@ def test_add_pending_document_w_invalid_references(self, cursor): self.assertEqual(len(state_messages), 2) self.assertEqual(state_messages[-1], expected_state_message) + @db_connect + def test_add_pending_document_w_mathml(self, cursor): + """Add a pending document with mathml that generates SVG.""" + publication_id = self.make_publication() + + # Insert a valid module for referencing... + cursor.execute("""\ +INSERT INTO abstracts (abstract) VALUES ('abstract') +RETURNING abstractid""") + cursor.execute("""\ +INSERT INTO modules +(module_ident, portal_type, name, + created, revised, abstractid, licenseid, + doctype, submitter, submitlog, stateid, parent, parentauthors, + language, authors, maintainers, licensors, + google_analytics, buylink) +VALUES +(1, 'Module', 'mathml module', + DEFAULT, DEFAULT, 1, 1, + 0, 'admin', 'log', NULL, NULL, NULL, + 'en', '{admin}', NULL, '{admin}', + DEFAULT, DEFAULT) RETURNING uuid || '@' || major_version""") + doc_ident_hash = cursor.fetchone()[0] + + # Create and add a document for the publication. + metadata = { + 'title': 'Document Title', + 'summary': 'Document Summary', + 'authors': [{u'id': u'able', u'type': u'cnx-id'}], + 'publishers': [{'id': 'able', 'type': 'cnx-id'}], + 'license_url': VALID_LICENSE_URL, + } + content = """\ +