diff --git a/cnxpublishing/db.py b/cnxpublishing/db.py index 40374bc9..485d68f5 100644 --- a/cnxpublishing/db.py +++ b/cnxpublishing/db.py @@ -16,14 +16,17 @@ import cnxepub import psycopg2 import jinja2 +import requests from cnxarchive.utils import ( IdentHashSyntaxError, join_ident_hash, split_ident_hash, ) from cnxepub import ATTRIBUTED_ROLE_KEYS +from lxml import etree from openstax_accounts.interfaces import IOpenstaxAccounts from psycopg2.extras import register_uuid from pyramid.security import has_permission +from pyramid.settings import asbool from pyramid.threadlocal import ( get_current_request, get_current_registry, ) @@ -531,6 +534,40 @@ def lookup_document_pointer(ident_hash, cursor): return cnxepub.DocumentPointer(ident_hash, metadata) +def _inject_mathml_svgs(model): + """Inject MathML SVG annotations into the model's content.""" + settings = get_current_registry().settings + is_enabled = asbool(settings.get('mathml2svg.enabled?', False)) + url = settings.get('mathml2svg.url') + + # Bailout when svg generation is disabled. + if not is_enabled: + return + + xml = etree.fromstring(model.content) + mathml_namespace = "http://www.w3.org/1998/Math/MathML" + mathml_blocks = xml.xpath( + '//m:math[not(/m:annotation-xml[@encoding="image/svg+xml"])]', + namespaces={'m': mathml_namespace}) + for mathml_block in mathml_blocks: + # Submit the MathML block to the SVG generation service. + payload = {'MathML': etree.tostring(mathml_block)} + response = requests.post(url, data=payload) + # Inject the SVG into the MathML as an annotation + # only if the resposne was good, otherwise skip over it. + semantic_block = mathml_block.getchildren()[0] + if response.status_code == 200: + svg = response.text + content_type = response.headers['content-type'] + # Insert the svg into the content + annotation = etree.SubElement( + semantic_block, + '{{{}}}annotation-xml'.format(mathml_namespace)) + annotation.set('encoding', content_type) + annotation.append(etree.fromstring(svg)) + model.content = etree.tostring(xml) + + def add_pending_model_content(cursor, publication_id, model): """Updates the pending model's content. This is a secondary step not in ``add_pending_model, because @@ -582,6 +619,9 @@ def mark_invalid_reference(reference): mark_invalid_reference(reference) # else, it's a remote or cnx.org reference ...Do nothing. + # Generate SVGs for MathML + _inject_mathml_svgs(model) + args = (psycopg2.Binary(model.content.encode('utf-8')), publication_id, model.id,) stmt = """\ diff --git a/cnxpublishing/tests/test_db.py b/cnxpublishing/tests/test_db.py index bbef2ed9..12b9929a 100644 --- a/cnxpublishing/tests/test_db.py +++ b/cnxpublishing/tests/test_db.py @@ -22,6 +22,7 @@ from cnxarchive import config as archive_config from cnxarchive.database import initdb as archive_initdb from cnxarchive.utils import join_ident_hash, split_ident_hash +from lxml import etree from pyramid import testing from . import use_cases @@ -1137,6 +1138,78 @@ def test_add_pending_document_w_invalid_references(self, cursor): self.assertEqual(len(state_messages), 2) self.assertEqual(state_messages[-1], expected_state_message) + @db_connect + def test_add_pending_document_w_mathml(self, cursor): + """Add a pending document with mathml that generates SVG.""" + publication_id = self.make_publication() + + # Insert a valid module for referencing... + cursor.execute("""\ +INSERT INTO abstracts (abstract) VALUES ('abstract') +RETURNING abstractid""") + cursor.execute("""\ +INSERT INTO modules +(module_ident, portal_type, name, + created, revised, abstractid, licenseid, + doctype, submitter, submitlog, stateid, parent, parentauthors, + language, authors, maintainers, licensors, + google_analytics, buylink) +VALUES +(1, 'Module', 'mathml module', + DEFAULT, DEFAULT, 1, 1, + 0, 'admin', 'log', NULL, NULL, NULL, + 'en', '{admin}', NULL, '{admin}', + DEFAULT, DEFAULT) RETURNING uuid || '@' || major_version""") + doc_ident_hash = cursor.fetchone()[0] + + # Create and add a document for the publication. + metadata = { + 'title': 'Document Title', + 'summary': 'Document Summary', + 'authors': [{u'id': u'able', u'type': u'cnx-id'}], + 'publishers': [{'id': 'able', 'type': 'cnx-id'}], + 'license_url': VALID_LICENSE_URL, + } + content = """\ +
+ x = b ± b 2 4 a c 2 a +
""" + document = self.make_document(content=content, metadata=metadata) + + # Here we are testing the function of add_pending_document. + from ..db import add_pending_model, add_pending_model_content + document_ident_hash = add_pending_model( + cursor, publication_id, document) + from pyramid.threadlocal import get_current_registry + + # Enable the mathml2svg service for this test. + get_current_registry().settings['mathml2svg.enabled?'] = 'on' + + with mock.patch('requests.post') as post: + post.return_value.status_code = 200 + post.return_value.headers = {'content-type': 'image/svg+xml'} + post.return_value.text = 'mocked' + add_pending_model_content(cursor, publication_id, document) + + # The communication to the mathml2svg service is mocked to return + # a stub svg element. + + # This doesn't seem like much, but we only need to check that + # the entry was added and the SVG annotation exists. + cursor.execute(""" +SELECT convert_from(content, 'utf8') +FROM pending_documents +WHERE publication_id = %s""", (publication_id,)) + persisted_content = cursor.fetchone()[0] + self.assertNotEqual(persisted_content, content) + + elms = etree.fromstring(persisted_content) + annotation = elms.xpath( + '/div/m:math//m:annotation-xml[@encoding="image/svg+xml"]', + namespaces={'m': "http://www.w3.org/1998/Math/MathML"})[0] + expected = """mocked""" + self.assertEqual(etree.tostring(annotation), expected) + @db_connect def test_add_pending_binder_w_document_pointers(self, cursor): """Add a pending binder with document pointers.""" diff --git a/development.ini b/development.ini index 085b469b..310ad946 100644 --- a/development.ini +++ b/development.ini @@ -21,6 +21,11 @@ db-connection-string = dbname=cnxarchive user=cnxarchive password=cnxarchive # size limit of file uploads in MB file-upload-limit = 50 +# mathml2svg is disabled by default. +# To enable it use ``mathml2svg.enabled? = yes`` and set the url. +mathml2svg.url = http://localhost:5689 +mathml2svg.enabled? = no + session_key = 'somkindaseekret' # Application API keys with authentication information. # This information is organized in the following form: diff --git a/setup.py b/setup.py index 4fd59127..b870a441 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,7 @@ 'pyramid>=1.5', 'pyramid_jinja2', 'pyramid_multiauth', + 'requests', ) tests_require = [ 'webtest', diff --git a/testing.ini b/testing.ini index e659df67..04a0b36a 100644 --- a/testing.ini +++ b/testing.ini @@ -29,6 +29,9 @@ openstax_accounts.login_path = /login openstax_accounts.callback_path = /callback openstax_accounts.logout_path = /logout +mathml2svg.url = http://localhost:5689 +mathml2svg.enabled? = no + [server:main] use = egg:waitress#main host = 0.0.0.0