diff --git a/HISTORY.rst b/HISTORY.rst index c3e3c16..9d29091 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,13 +3,19 @@ History ------- +0.1.2 (2017-11-22) +~~~~~~~~~~~~~~~~~~ + +* Fixed: Not removing all spaces between html tags. + Sometimes spaces matter for formatting. + For example ``Hello World`` cannot be minified any further. + 0.1.1 (2016-09-26) ~~~~~~~~~~~~~~~~~~ * Fixed: Cache properties now allow to set cache value via ``foo = bar`` syntax when cache descriptor has ``as_property == True`` - 0.1.0 (2015-11-26) ~~~~~~~~~~~~~~~~~~ diff --git a/django_auxilium/__init__.py b/django_auxilium/__init__.py index b6574bc..dc371ce 100644 --- a/django_auxilium/__init__.py +++ b/django_auxilium/__init__.py @@ -1,2 +1,2 @@ -__version__ = '0.1.1' +__version__ = '0.1.2' __author__ = 'Miroslav Shubernetskiy' diff --git a/django_auxilium/utils/html.py b/django_auxilium/utils/html.py index a86415f..57041d8 100644 --- a/django_auxilium/utils/html.py +++ b/django_auxilium/utils/html.py @@ -2,7 +2,6 @@ import re import six -from django.utils.html import strip_spaces_between_tags from six.moves.html_entities import name2codepoint from six.moves.html_parser import HTMLParser @@ -11,6 +10,7 @@ EXCLUDE_TAGS = ('textarea', 'pre', 'code', 'script',) RE_WHITESPACE = re.compile(r'\s{2,}|\n') +RE_SPACE_BETWEEN_TAGS = re.compile(r'>(?:\s{2,}|\n)<') RE_EXCLUDE_TAGS = re.compile( """( # group for results to be included in re.split <(?:{0}) # match beginning of one of exclude tags @@ -27,10 +27,11 @@ def simple_minify(html): """ Minify HTML with very simple algorithm. - This function tries to minify HTML by stripping all spaces between all html tags - (e.g. ``
`` -> ``
``). This step is accomplished by using - Django's ``strip_spaces_between_tags`` method. In addition to that, this function - replaces all whitespace (more then two consecutive whitespace characters or new line) + This function tries to minify HTML by stripping most spaces between all html tags + (e.g. ``
`` -> ``
``). Note that not all spaces are removed + since sometimes that can adjust rendered HTML (e.g. ``Hello ``). + In addition to that, this function replaces all whitespace + (more then two consecutive whitespace characters or new line) with a space character except inside excluded tags such as ``pre`` or ``textarea``. **Though process**: @@ -54,8 +55,7 @@ def simple_minify(html): appended to final HTML since as explained above, they are guaranteed to be content of excluded tags hence do not require minification. #. All even indexed elements are minified by stripping whitespace between - tags by using Django's ``strip_spaces_between_tags`` and redundant - whitespace is stripped in general via simple regex. + tags and redundant whitespace is stripped in general via simple regex. You can notice that the process does not involve parsing HTML since that usually adds some overhead (e.g. using beautiful soup). By using 2 regex @@ -65,7 +65,8 @@ def simple_minify(html): html = '' for i, component in enumerate(components): if i % 2 == 0: - component = strip_spaces_between_tags(component.strip()) + component = component.strip() + component = RE_SPACE_BETWEEN_TAGS.sub('> <', component) component = RE_WHITESPACE.sub(' ', component) html += component else: diff --git a/requirements-dev.txt b/requirements-dev.txt index f2250af..95d80fa 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,9 +5,10 @@ django-formtools flake8 importanize mock -pytest>=2.9 +pdbpp pytest-cov pytest-django +pytest>=2.9 python-magic sphinx sphinx-autobuild diff --git a/tests/utils/test_html.py b/tests/utils/test_html.py index a25cb3a..8a45412 100644 --- a/tests/utils/test_html.py +++ b/tests/utils/test_html.py @@ -18,6 +18,8 @@ + Hello World + HelloMars
Content Here