tensorboard/plugin_util_test.py

# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import textwrap


from tensorboard import context
from tensorboard import plugin_util
from tensorboard import test as tb_test
from tensorboard.backend import experiment_id


class MarkdownToSafeHTMLTest(tb_test.TestCase):
    def _test(self, markdown_string, expected):
        actual = plugin_util.markdown_to_safe_html(markdown_string)
        self.assertEqual(expected, actual)

    def test_empty_input(self):
        self._test("", "")

    def test_basic_formatting(self):
        self._test(
            "# _Hello_, **world!**\n\n"
            "Check out [my website](http://example.com)!",
            "<h1><em>Hello</em>, <strong>world!</strong></h1>\n"
            '<p>Check out <a href="http://example.com">my website</a>!</p>',
        )

    def test_table_formatting(self):
        self._test(
            textwrap.dedent(
                """\
                Here is some data:

                TensorBoard usage | Happiness
                ------------------|----------
                              0.0 |       0.0
                              0.5 |       0.5
                              1.0 |       1.0

                Wouldn't you agree?
                """
            ),
            textwrap.dedent(
                """\
                <p>Here is some data:</p>
                <table>
                <thead>
                <tr>
                <th>TensorBoard usage</th>
                <th>Happiness</th>
                </tr>
                </thead>
                <tbody>
                <tr>
                <td>0.0</td>
                <td>0.0</td>
                </tr>
                <tr>
                <td>0.5</td>
                <td>0.5</td>
                </tr>
                <tr>
                <td>1.0</td>
                <td>1.0</td>
                </tr>
                </tbody>
                </table>
                <p>Wouldn't you agree?</p>
                """.rstrip()
            ),
        )

    def test_whitelisted_tags_and_attributes_allowed(self):
        s = (
            'Check out <a href="http://example.com" title="do it">'
            "my website</a>!"
        )
        self._test(s, "<p>%s</p>" % s)

    def test_arbitrary_tags_and_attributes_removed(self):
        self._test(
            "We should bring back the <blink>blink tag</blink>; "
            '<a name="bookmark" href="http://please-dont.com">'
            "sign the petition!</a>",
            "<p>We should bring back the "
            "&lt;blink&gt;blink tag&lt;/blink&gt;; "
            '<a href="http://please-dont.com">sign the petition!</a></p>',
        )

    def test_javascript_hrefs_sanitized(self):
        self._test(
            'A <a href="javascript:void0">sketchy link</a> for you',
            "<p>A <a>sketchy link</a> for you</p>",
        )

    def test_byte_strings_interpreted_as_utf8(self):
        s = "> Look\u2014some UTF-8!".encode("utf-8")
        assert isinstance(s, bytes), (type(s), bytes)
        self._test(
            s, "<blockquote>\n<p>Look\u2014some UTF-8!</p>\n</blockquote>"
        )

    def test_unicode_strings_passed_through(self):
        s = "> Look\u2014some UTF-8!"
        assert not isinstance(s, bytes), (type(s), bytes)
        self._test(
            s, "<blockquote>\n<p>Look\u2014some UTF-8!</p>\n</blockquote>"
        )

    def test_null_bytes_stripped_before_markdown_processing(self):
        # If this function is mistakenly called with UTF-16 or UTF-32 encoded text,
        # there will probably be a bunch of null bytes. These would be stripped by
        # the sanitizer no matter what, but make sure we remove them before markdown
        # interpretation to avoid affecting output (e.g. middle-word underscores
        # would generate erroneous <em> tags like "un<em>der</em>score") and add an
        # HTML comment with a warning.
        s = "un_der_score".encode("utf-32-le")
        # UTF-32 encoding of ASCII will have 3 null bytes per char. 36 = 3 * 12.
        self._test(
            s,
            "<!-- WARNING: discarded 36 null bytes in markdown string "
            "after UTF-8 decoding -->\n"
            "<p>un_der_score</p>",
        )


class MarkdownsToSafeHTMLTest(tb_test.TestCase):
    # Most of the heavy lifting is tested by `MarkdownToSafeHTMLTest`.

    def test_simple(self):
        inputs = ["0", "*1*", "**2**"]
        combine = lambda xs: "<br>".join(xs)
        actual = plugin_util.markdowns_to_safe_html(inputs, combine)
        expected = "<p>0</p><br><p><em>1</em></p><br><p><strong>2</strong></p>"
        self.assertEqual(actual, expected)

    def test_sanitizes_combination_result(self):
        inputs = ["safe"]
        combine = lambda xs: "<script>alert('unsafe!')</script>%s" % xs[0]
        actual = plugin_util.markdowns_to_safe_html(inputs, combine)
        expected = "&lt;script&gt;alert('unsafe!')&lt;/script&gt;<p>safe</p>"
        self.assertEqual(actual, expected)

    def test_sanitization_can_have_collateral_damage(self):
        inputs = ['<table title="*chuckles* ', "I'm in danger", '<table>">']
        combine = lambda xs: "".join(xs)
        actual = plugin_util.markdowns_to_safe_html(inputs, combine)
        expected = "<table></table>"
        self.assertEqual(actual, expected)


class ContextTest(tb_test.TestCase):
    def test_context(self):
        ctx = context.RequestContext()
        environ = {}
        context.set_in_environ(environ, ctx)
        self.assertEqual(context.from_environ(environ), ctx)


class ExperimentIdTest(tb_test.TestCase):
    """Tests for `plugin_util.experiment_id`."""

    def test_default(self):
        # This shouldn't happen; the `ExperimentIdMiddleware` always set an
        # experiment ID. In case something goes wrong, degrade gracefully.
        environ = {}
        self.assertEqual(plugin_util.experiment_id(environ), "")

    def test_present(self):
        environ = {experiment_id.WSGI_ENVIRON_KEY: "123"}
        self.assertEqual(plugin_util.experiment_id(environ), "123")


if __name__ == "__main__":
    tb_test.main()