python-cmd2 · tleonhardt · Aug 23, 2017 · Aug 20, 2017 · Aug 20, 2017 · Aug 20, 2017
diff --git a/cmd2.py b/cmd2.py
@@ -2021,10 +2021,12 @@ def parsed(self, raw):
 class HistoryItem(str):
     """Class used to represent an item in the History list.
 
-    Thing wrapper around str class which adds a custom format for printing.  It also keeps track of its index in the
-    list as well as a lowercase representation of itself for convenience/efficiency.
+    Thin wrapper around str class which adds a custom format for printing. It
+    also keeps track of its index in the list as well as a lowercase
+    representation of itself for convenience/efficiency.
+
     """
-    listformat = '-------------------------[%d]\n%s\n'
+    listformat = '-------------------------[{}]\n{}\n'
 
     # noinspection PyUnusedLocal
     def __init__(self, instr):
@@ -2037,7 +2039,7 @@ def pr(self):
 
         :return: str - pretty print string version of a HistoryItem
         """
-        return self.listformat % (self.idx, str(self))
+        return self.listformat.format(self.idx, str(self).rstrip())
 
 
 class History(list):
@@ -2230,12 +2232,6 @@ class Cmd2TestCase(unittest.TestCase):
        that will execute the commands in a transcript file and expect the results shown.
        See example.py"""
     cmdapp = None
-    regexPattern = pyparsing.QuotedString(quoteChar=r'/', escChar='\\', multiline=True, unquoteResults=True)
-    regexPattern.ignore(pyparsing.cStyleComment)
-    notRegexPattern = pyparsing.Word(pyparsing.printables)
-    notRegexPattern.setParseAction(lambda t: re.escape(t[0]))
-    expectationParser = regexPattern | notRegexPattern
-    anyWhitespace = re.compile(r'\s', re.DOTALL | re.MULTILINE)
 
     def fetchTranscripts(self):
         self.transcripts = {}
@@ -2295,8 +2291,8 @@ def _test_transcript(self, fname, transcript):
             result = self.cmdapp.stdout.read()
             # Read the expected result from transcript
             if strip_ansi(line).startswith(self.cmdapp.visible_prompt):
-                message = '\nFile %s, line %d\nCommand was:\n%r\nExpected: (nothing)\nGot:\n%r\n' % \
-                          (fname, line_num, command, result)
+                message = '\nFile {}, line {}\nCommand was:\n{}\nExpected: (nothing)\nGot:\n{}\n'.format(
+                          fname, line_num, command, result)
                 self.assert_(not (result.strip()), message)
                 continue
             expected = []
@@ -2309,15 +2305,95 @@ def _test_transcript(self, fname, transcript):
                     break
                 line_num += 1
             expected = ''.join(expected)
-            # Compare actual result to expected
-            message = '\nFile %s, line %d\nCommand was:\n%s\nExpected:\n%s\nGot:\n%s\n' % \
-                      (fname, line_num, command, expected, result)
-            expected = self.expectationParser.transformString(expected)
-            # checking whitespace is a pain - let's skip it
-            expected = self.anyWhitespace.sub('', expected)
-            result = self.anyWhitespace.sub('', result)
+
+            # transform the expected text into a valid regular expression
+            expected = self._transform_transcript_expected(expected)
+            message = '\nFile {}, line {}\nCommand was:\n{}\nExpected:\n{}\nGot:\n{}\n'.format(
+                      fname, line_num, command, expected, result)
             self.assertTrue(re.match(expected, result, re.MULTILINE | re.DOTALL), message)
 
+    def _transform_transcript_expected(self, s):
+        """parse the string with slashed regexes into a valid regex"""
+        slash = '/'
+        backslash = '\\'
+        regex = ''
+        start = 0
+
+        while True:
+            (regex, first_slash_pos, start) = self._escaped_find(regex, s, start, False)
+            if first_slash_pos == -1:
+                # no more slashes, add the rest of the string and bail
+                regex += re.escape(s[start:])
+                break
+            else:
+                # there is a slash, add everything we have found so far
+                # add stuff before the first slash as plain text
+                regex += re.escape(s[start:first_slash_pos])
+                start = first_slash_pos+1
+                # and go find the next one
+                (regex, second_slash_pos, start) = self._escaped_find(regex, s, start, True)
+                if second_slash_pos > 0:
+                    # add everything between the slashes (but not the slashes)
+                    # as a regular expression
+                    regex += s[start:second_slash_pos]
+                    # and change where we start looking for slashed on the
+                    # turn through the loop
+                    start = second_slash_pos + 1
+                else:
+                    # No closing slash, we have to add the first slash,
+                    # and the rest of the text
+                    regex += re.escape(s[start-1:])
+                    break
+        return regex
+
+    def _escaped_find(self, regex, s, start, in_regex):
+        """
+        Find the next slash in {s} after {start} that is not preceded by a backslash.
+
+        If we find an escaped slash, add everything up to and including it to regex,
+        updating {start}. {start} therefore serves two purposes, tells us where to start
+        looking for the next thing, and also tells us where in {s} we have already
+        added things to {regex}
+
+        {in_regex} specifies whether we are currently searching in a regex, we behave
+        differently if we are or if we aren't.
+        """
+
+        while True:
+            pos = s.find('/', start)
+            if pos == -1:
+                # no match, return to caller
+                break
+            elif pos == 0:
+                # slash at the beginning of the string, so it can't be
+                # escaped. We found it.
+                break
+            else:
+                # check if the slash is preceeded by a backslash
+                if s[pos-1:pos] == '\\':
+                    # it is.
+                    if in_regex:
+                        # add everything up to the backslash as a
+                        # regular expression
+                        regex += s[start:pos-1]
+                        # skip the backslash, and add the slash
+                        regex += s[pos]
+                    else:
+                        # add everything up to the backslash as escaped
+                        # plain text
+                        regex += re.escape(s[start:pos-1])
+                        # and then add the slash as escaped
+                        # plain text
+                        regex += re.escape(s[pos])
+                    # update start to show we have handled everything
+                    # before it
+                    start = pos+1
+                    # and continue to look
+                else:
+                    # slash is not escaped, this is what we are looking for
+                    break
+        return (regex, pos, start)
+
     def tearDown(self):
         if self.cmdapp:
             # Restore stdout

diff --git a/docs/freefeatures.rst b/docs/freefeatures.rst
@@ -82,6 +82,8 @@ quotation marks if it is more than a one-word command.
 
 .. _Argparse: https://docs.python.org/3/library/argparse.html
 
+.. _output_redirection:
+
 Output redirection
 ==================
 
@@ -301,34 +303,20 @@ is equivalent to ``shell ls``.)
 Transcript-based testing
 ========================
 
-If the entire transcript (input and output) of a successful session of
-a ``cmd2``-based app is copied from the screen and pasted into a text
-file, ``transcript.txt``, then a transcript test can be run against it::
-
-  python app.py --test transcript.txt
+A transcript is both the input and output of a successful session of a
+``cmd2``-based app which is saved to a text file. The transcript can be played
+back into the app as a unit test.
 
-Any non-whitespace deviations between the output prescribed in ``transcript.txt`` and
-the actual output from a fresh run of the application will be reported
-as a unit test failure.  (Whitespace is ignored during the comparison.)
+.. code-block:: none
 
-Regular expressions can be embedded in the transcript inside paired ``/``
-slashes.  These regular expressions should not include any whitespace
-expressions.
-
-.. note::
+   $ python example.py --test transcript_regex.txt
+   .
+   ----------------------------------------------------------------------
+   Ran 1 test in 0.013s
 
-   If you have set ``allow_cli_args`` to False in order to disable parsing of command line arguments at invocation,
-   then the use of ``-t`` or ``--test`` to run transcript testing is automatically disabled.  In this case, you can
-   alternatively provide a value for the optional ``transcript_files`` when constructing the instance of your
-   ``cmd2.Cmd`` derived class in order to cause a transcript test to run::
-
-       from cmd2 import Cmd
-       class App(Cmd):
-         # customized attributes and methods here
+   OK
 
-       if __name__ == '__main__':
-           app = App(transcript_files=['exampleSession.txt'])
-           app.cmdloop()
+See :doc:`transcript` for more details.
 
 
 Tab-Completion

diff --git a/docs/index.rst b/docs/index.rst
@@ -66,6 +66,7 @@ Contents:
    freefeatures
    settingchanges
    unfreefeatures
+   transcript
    integrating
    hooks
    alternatives

diff --git a/docs/transcript.rst b/docs/transcript.rst
@@ -0,0 +1,161 @@
+========================
+Transcript based testing
+========================
+
+A transcript is both the input and output of a successful session of a
+``cmd2``-based app which is saved to a text file. With no extra work on your
+part, your app can play back these transcripts as a unit test. Transcripts can
+contain regular expressions, which provide the flexibility to match responses
+from commands that produce dynamic or variable output.
+
+.. highlight:: none
+
+Creating a transcript
+=====================
+
+Here's a transcript created from ``python examples/example.py``::
+
+   (Cmd) say -r 3 Goodnight, Gracie
+   Goodnight, Gracie
+   Goodnight, Gracie
+   Goodnight, Gracie
+   (Cmd) mumble maybe we could go to lunch
+   like maybe we ... could go to hmmm lunch
+   (Cmd) mumble maybe we could go to lunch
+   well maybe we could like go to er lunch right?
+
+This transcript has three commands: they are on the lines that begin with the
+prompt. The first command looks like this::
+
+   (Cmd) say -r 3 Goodnight, Gracie
+
+Following each command is the output generated by that command.
+
+The transcript ignores all lines in the file until it reaches the first line
+that begins with the prompt. You can take advantage of this by using the first
+lines of the transcript as comments::
+
+   # Lines at the beginning of the transcript that do not
+   ; start with the prompt i.e. '(Cmd) ' are ignored.
+   /* You can use them for comments. */
+
+   All six of these lines before the first prompt are treated as comments.
+
+   (Cmd) say -r 3 Goodnight, Gracie
+   Goodnight, Gracie
+   Goodnight, Gracie
+   Goodnight, Gracie
+   (Cmd) mumble maybe we could go to lunch
+   like maybe we ... could go to hmmm lunch
+   (Cmd) mumble maybe we could go to lunch
+   maybe we could like go to er lunch right?
+
+In this example I've used several different commenting styles, and even bare
+text. It doesn't matter what you put on those beginning lines. Everything before::
+
+   (Cmd) say -r 3 Goodnight, Gracie
+
+will be ignored.
+
+
+Regular Expressions
+===================
+
+If we used the above transcript as-is, it would likely fail. As you can see,
+the ``mumble`` command doesn't always return the same thing: it inserts random
+words into the input.
+
+Regular expressions can be included in the response portion of a transcript,
+and are surrounded by slashes::
+
+   (Cmd) mumble maybe we could go to lunch
+   /.*\bmaybe\b.*\bcould\b.*\blunch\b.*/
+   (Cmd) mumble maybe we could go to lunch
+   /.*\bmaybe\b.*\bcould\b.*\blunch\b.*/
+
+Without creating a tutorial on regular expressions, this one matches anything
+that has the words ``maybe``, ``could``, and ``lunch`` in that order. It doesn't
+ensure that ``we`` or ``go`` or ``to`` appear in the output, but it does work if
+mumble happens to add words to the beginning or the end of the output.
+
+Since the output could be multiple lines long, ``cmd2`` uses multiline regular
+expression matching, and also uses the ``DOTALL`` flag. These two flags subtly
+change the behavior of commonly used special characters like ``.``, ``^`` and
+``$``, so you may want to double check the `Python regular expression
+documentation <https://docs.python.org/3/library/re.html>`_.
+
+If your output has slashes in it, you will need to escape those slashes so the
+stuff between them is not interpred as a regular expression. In this transcript::
+
+   (Cmd) say cd /usr/local/lib/python3.6/site-packages
+   /usr/local/lib/python3.6/site-packages
+
+the output contains slashes. The text between the first slash and the second
+slash, will be interpreted as a regular expression, and those two slashes will
+not be included in the comparison. When replayed, this transcript would
+therefore fail. To fix it, we could either write a regular expression to match
+the path instead of specifying it verbatim, or we can escape the slashes::
+
+   (Cmd) say cd /usr/local/lib/python3.6/site-packages
+   \/usr\/local\/lib\/python3.6\/site-packages
+
+.. warning::
+
+   Be aware of trailing spaces and newlines. Your commands might output
+   trailing spaces which are impossible to see. Instead of leaving them
+   invisible, you can add a regular expression to match them, so that you can
+   see where they are when you look at the transcript::
+
+      (Cmd) set prompt
+      prompt: (Cmd)/ /
+
+   Some terminal emulators strip trailing space when you copy text from them.
+   This could make the actual data generated by your app different than the
+   text you pasted into the transcript, and it might not be readily obvious why
+   the transcript is not passing. Consider using :ref:`output_redirection` to
+   the clipboard or to a file to ensure you accurately capture the output of
+   your command.
+
+   If you aren't using regular expressions, make sure the newlines at the end
+   of your transcript exactly match the output of your commands. A common cause
+   of a failing transcript is an extra or missing newline.
+
+   If you are using regular expressions, be aware that depending on how you
+   write your regex, the newlines after the regex may or may not matter.
+   ``\Z`` matches *after* the newline at the end of the string, whereas
+   ``$`` matches the end of the string *or* just before a newline.
+
+
+Running a transcript
+====================
+
+Once you have created a transcript, it's easy to have your application play it
+back and check the output. From within the ``examples/`` directory::
+
+   $ python example.py --test transcript_regex.txt
+   .
+   ----------------------------------------------------------------------
+   Ran 1 test in 0.013s
+
+   OK
+
+The output will look familiar if you use ``unittest``, because that's exactly
+what happens. Each command in the transcript is run, and we ``assert`` the
+output matches the expected result from the transcript.
+
+.. note::
+
+   If you have set ``allow_cli_args`` to False in order to disable parsing of
+   command line arguments at invocation, then the use of ``-t`` or ``--test``
+   to run transcript testing is automatically disabled. In this case, you can
+   alternatively provide a value for the optional ``transcript_files`` when
+   constructing the instance of your ``cmd2.Cmd`` derived class in order to
+   cause a transcript test to run::
+
+       from cmd2 import Cmd
+       class App(Cmd):
+         # customized attributes and methods here
+
+       if __name__ == '__main__':
+           app = App(transcript_files=['exampleSession.txt'])
+           app.cmdloop()