Fix Documentation.from_params(...).value.

1. Fix `value` when tokens don't have no line numbers. 2. Fix `from_params` when there are empty lines. Fixes #4670.
robotframework · Mar 21, 2023 · 0d90aba · 0d90aba
1 parent c3e1765
commit 0d90aba
Show file tree

Hide file tree

Showing 4 changed files with 154 additions and 41 deletions.
diff --git a/src/robot/parsing/model/statements.py b/src/robot/parsing/model/statements.py
@@ -163,28 +163,37 @@ def __repr__(self):
 
 class DocumentationOrMetadata(Statement):
 
-    def _join_value(self, tokens):
-        lines = self._get_lines(tokens)
-        return ''.join(self._yield_lines_with_newlines(lines))
+    @property
+    def value(self):
+        return ''.join(self._get_lines_with_newlines()).rstrip()
+
+    def _get_lines_with_newlines(self):
+        for parts in self._get_line_parts():
+            line = ' '.join(parts)
+            yield line
+            if not self._escaped_or_has_newline(line):
+                yield '\n'
 
-    def _get_lines(self, tokens):
-        lines = []
-        line = None
+    def _get_line_parts(self):
+        line = []
         lineno = -1
-        for t in tokens:
-            if t.lineno != lineno:
+        # There are no EOLs during execution or if data has been parsed with
+        # `data_only=True` otherwise, so we need to look at line numbers to
+        # know when lines change. If model is created programmatically using
+        # `from_params` or otherwise, line numbers may not be set, but there
+        # ought to be EOLs. If both EOLs and line numbers are missing,
+        # everything is considered to be on the same line.
+        for token in self.get_tokens(Token.ARGUMENT, Token.EOL):
+            eol = token.type == Token.EOL
+            if token.lineno != lineno or eol:
+                if line:
+                    yield line
                 line = []
-                lines.append(line)
-            line.append(t.value)
-            lineno = t.lineno
-        return [' '.join(line) for line in lines]
-
-    def _yield_lines_with_newlines(self, lines):
-        last_index = len(lines) - 1
-        for index, line in enumerate(lines):
+            if not eol:
+                line.append(token.value)
+            lineno = token.lineno
+        if line:
             yield line
-            if index < last_index and not self._escaped_or_has_newline(line):
-                yield '\n'
 
     def _escaped_or_has_newline(self, line):
         match = re.search(r'(\\+)n?$', line)
@@ -350,16 +359,11 @@ def from_params(cls, value, indent=FOUR_SPACES, separator=FOUR_SPACES,
                 tokens.append(Token(Token.SEPARATOR, indent))
             tokens.append(Token(Token.CONTINUATION))
             if line:
-                tokens.extend([Token(Token.SEPARATOR, multiline_separator),
-                               Token(Token.ARGUMENT, line)])
-            tokens.append(Token(Token.EOL, eol))
+                tokens.append(Token(Token.SEPARATOR, multiline_separator))
+            tokens.extend([Token(Token.ARGUMENT, line),
+                           Token(Token.EOL, eol)])
         return cls(tokens)
 
-    @property
-    def value(self):
-        tokens = self.get_tokens(Token.ARGUMENT)
-        return self._join_value(tokens)
-
 
 @Statement.register
 class Metadata(DocumentationOrMetadata):
@@ -386,11 +390,6 @@ def from_params(cls, name, value, separator=FOUR_SPACES, eol=EOL):
     def name(self):
         return self.get_value(Token.NAME)
 
-    @property
-    def value(self):
-        tokens = self.get_tokens(Token.ARGUMENT)
-        return self._join_value(tokens)
-
 
 @Statement.register
 class ForceTags(MultiValue):

diff --git a/utest/parsing/test_model.py b/utest/parsing/test_model.py
@@ -1007,6 +1007,114 @@ def test_continue(self):
         get_and_assert_model(data, expected)
 
 
+class TestDocumentation(unittest.TestCase):
+
+    def test_empty(self):
+        data = '''\
+*** Settings ***
+Documentation
+'''
+        expected = Documentation(
+            tokens=[Token(Token.DOCUMENTATION, 'Documentation', 2, 0),
+                    Token(Token.EOL, '\n', 2, 13)]
+        )
+        self._verify_documentation(data, expected, '')
+
+    def test_one_line(self):
+        data = '''\
+*** Settings ***
+Documentation    Hello!
+'''
+        expected = Documentation(
+            tokens=[Token(Token.DOCUMENTATION, 'Documentation', 2, 0),
+                    Token(Token.SEPARATOR, '    ', 2, 13),
+                    Token(Token.ARGUMENT, 'Hello!', 2, 17),
+                    Token(Token.EOL, '\n', 2, 23)]
+        )
+        self._verify_documentation(data, expected, 'Hello!')
+
+    def test_multi_part(self):
+        data = '''\
+*** Settings ***
+Documentation    Hello    world
+'''
+        expected = Documentation(
+            tokens=[Token(Token.DOCUMENTATION, 'Documentation', 2, 0),
+                    Token(Token.SEPARATOR, '    ', 2, 13),
+                    Token(Token.ARGUMENT, 'Hello', 2, 17),
+                    Token(Token.SEPARATOR, '    ', 2, 22),
+                    Token(Token.ARGUMENT, 'world', 2, 26),
+                    Token(Token.EOL, '\n', 2, 31)]
+        )
+        self._verify_documentation(data, expected, 'Hello world')
+
+    def test_multi_line(self):
+        data = '''\
+*** Settings ***
+Documentation    Documentation
+...              in
+...              multiple lines    and parts
+'''
+        expected = Documentation(
+            tokens=[Token(Token.DOCUMENTATION, 'Documentation', 2, 0),
+                    Token(Token.SEPARATOR, '    ', 2, 13),
+                    Token(Token.ARGUMENT, 'Documentation', 2, 17),
+                    Token(Token.EOL, '\n', 2, 30),
+                    Token(Token.CONTINUATION, '...', 3, 0),
+                    Token(Token.SEPARATOR, '              ', 3, 3),
+                    Token(Token.ARGUMENT, 'in', 3, 17),
+                    Token(Token.EOL, '\n', 3, 19),
+                    Token(Token.CONTINUATION, '...', 4, 0),
+                    Token(Token.SEPARATOR, '              ', 4, 3),
+                    Token(Token.ARGUMENT, 'multiple lines', 4, 17),
+                    Token(Token.SEPARATOR, '    ', 4, 31),
+                    Token(Token.ARGUMENT, 'and parts', 4, 35),
+                    Token(Token.EOL, '\n', 4, 44)]
+        )
+        self._verify_documentation(data, expected,
+                                   'Documentation\nin\nmultiple lines and parts')
+
+    def test_multi_line_with_empty_lines(self):
+        data = '''\
+*** Settings ***
+Documentation    Documentation
+...
+...              with empty
+'''
+        expected = Documentation(
+            tokens=[Token(Token.DOCUMENTATION, 'Documentation', 2, 0),
+                    Token(Token.SEPARATOR, '    ', 2, 13),
+                    Token(Token.ARGUMENT, 'Documentation', 2, 17),
+                    Token(Token.EOL, '\n', 2, 30),
+                    Token(Token.CONTINUATION, '...', 3, 0),
+                    Token(Token.ARGUMENT, '', 3, 3),
+                    Token(Token.EOL, '\n', 3, 3),
+                    Token(Token.CONTINUATION, '...', 4, 0),
+                    Token(Token.SEPARATOR, '              ', 4, 3),
+                    Token(Token.ARGUMENT, 'with empty', 4, 17),
+                    Token(Token.EOL, '\n', 4, 27)]
+        )
+        self._verify_documentation(data, expected, 'Documentation\n\nwith empty')
+
+    def _verify_documentation(self, data, expected, value):
+        # Model has both EOLs and line numbers.
+        doc = get_model(data).sections[0].body[0]
+        assert_model(doc, expected)
+        assert_equal(doc.value, value)
+        # Model has only line numbers, no EOLs or other non-data tokens.
+        doc = get_model(data, data_only=True).sections[0].body[0]
+        expected.tokens = [token for token in expected.tokens
+                           if token.type not in Token.NON_DATA_TOKENS]
+        assert_model(doc, expected)
+        assert_equal(doc.value, value)
+        # Model has only EOLS, no line numbers.
+        doc = Documentation.from_params(value)
+        assert_equal(doc.value, value)
+        # Model has no EOLs nor line numbers. Everything is just one line.
+        doc.tokens = [token for token in doc.tokens if token.type != Token.EOL]
+        assert_equal(doc.value, ' '.join(value.splitlines()))
+
+
 class TestError(unittest.TestCase):
 
     def test_get_errors_from_tokens(self):

diff --git a/utest/parsing/test_statements.py b/utest/parsing/test_statements.py
@@ -7,24 +7,25 @@
 
 
 def assert_created_statement(tokens, base_class, **params):
-    new_statement = base_class.from_params(**params)
+    statement = base_class.from_params(**params)
     assert_statements(
-        new_statement,
+        statement,
         base_class(tokens)
     )
     assert_statements(
-        new_statement,
+        statement,
         base_class.from_tokens(tokens)
     )
     assert_statements(
-        new_statement,
+        statement,
         Statement.from_tokens(tokens)
     )
-    if len(set(id(t) for t in new_statement.tokens)) != len(tokens):
+    if len(set(id(t) for t in statement.tokens)) != len(tokens):
         lines = '\n'.join(f'{i:18}{t}' for i, t in
                           [('ID', 'TOKEN')] +
-                          [(str(id(t)), repr(t)) for t in new_statement.tokens])
+                          [(str(id(t)), repr(t)) for t in statement.tokens])
         raise AssertionError(f'Tokens should not be reused!\n\n{lines}')
+    return statement
 
 
 def compare_statements(first, second):
@@ -407,11 +408,12 @@ def test_Documentation(self):
             Token(Token.ARGUMENT, 'Example documentation'),
             Token(Token.EOL, '\n')
         ]
-        assert_created_statement(
+        doc = assert_created_statement(
             tokens,
             Documentation,
             value='Example documentation'
         )
+        assert_equal(doc.value, 'Example documentation')
 
         # Documentation    First line.
         # ...              Second line aligned.
@@ -427,17 +429,19 @@ def test_Documentation(self):
             Token(Token.ARGUMENT, 'Second line aligned.'),
             Token(Token.EOL),
             Token(Token.CONTINUATION),
+            Token(Token.ARGUMENT, ''),
             Token(Token.EOL),
             Token(Token.CONTINUATION),
             Token(Token.SEPARATOR, '              '),
             Token(Token.ARGUMENT, 'Second paragraph.'),
             Token(Token.EOL),
         ]
-        assert_created_statement(
+        doc = assert_created_statement(
             tokens,
             Documentation,
             value='First line.\nSecond line aligned.\n\nSecond paragraph.'
         )
+        assert_equal(doc.value, 'First line.\nSecond line aligned.\n\nSecond paragraph.')
 
         # Test/Keyword
         #     [Documentation]      First line
@@ -457,21 +461,23 @@ def test_Documentation(self):
             Token(Token.EOL),
             Token(Token.SEPARATOR, '  '),
             Token(Token.CONTINUATION),
+            Token(Token.ARGUMENT, ''),
             Token(Token.EOL),
             Token(Token.SEPARATOR, '  '),
             Token(Token.CONTINUATION),
             Token(Token.SEPARATOR, '                  '),
             Token(Token.ARGUMENT, 'Second paragraph.'),
             Token(Token.EOL),
         ]
-        assert_created_statement(
+        doc = assert_created_statement(
             tokens,
             Documentation,
             value='First line.\nSecond line aligned.\n\nSecond paragraph.\n',
             indent='  ',
             separator='      ',
             settings_section=False
         )
+        assert_equal(doc.value, 'First line.\nSecond line aligned.\n\nSecond paragraph.')
 
     def test_Metadata(self):
         tokens = [

diff --git a/utest/parsing/test_tokenizer.py b/utest/parsing/test_tokenizer.py
@@ -58,7 +58,7 @@ def test_internal_spaces(self):
                       (DATA, 'S p a c e s', 1, 17),
                       (EOL, '', 1, 28)])
 
-    def test_single_tab_is_enough_as_sepator(self):
+    def test_single_tab_is_enough_as_separator(self):
         verify_split('\tT\ta\t\t\tb\t\t',
                      [(DATA, '', 1, 0),
                       (SEPA, '\t', 1, 0),