Enhancements for per-file language config.

- Config tokens and statements in model have separate type (CONFIG) and node (Config), respectively. - Config must start a row and no other content, except explicit comments, are allowed on same row. - File node has `languages` attribute that contains detected languages as a tuple of language codes.
robotframework · Aug 26, 2022 · 9057450 · 9057450
1 parent 2ca8371
commit 9057450
Show file tree

Hide file tree

Showing 11 changed files with 199 additions and 28 deletions.
diff --git a/atest/robot/parsing/translations.robot b/atest/robot/parsing/translations.robot
@@ -44,7 +44,7 @@ Per file configuration with multiple languages
     Should Be Equal    ${tc.doc}    приклад
 
 Invalid per file configuration
-    Error in file    0    parsing/translations/per_file_config/many.robot    6
+    Error in file    0    parsing/translations/per_file_config/many.robot    4
     ...    Invalid language configuration: No language with name 'invalid' found.
 
 Per file configuration bleeds to other files

diff --git a/atest/testdata/parsing/translations/per_file_config/many.robot b/atest/testdata/parsing/translations/per_file_config/many.robot
@@ -1,9 +1,11 @@
 Language: DE
 LANGUAGE: Brazilian Portuguese
 
-This is not language: config
+language: invalid
+language: bad again    but not recognized due to this text
 
-language: THAI    language: invalid    language: ukrainian
+language: THAI         # comment here is fine
+language:ukrainian
 
 *** Einstellungen ***
 Documentação          Exemplo

diff --git a/src/robot/api/parsing.py b/src/robot/api/parsing.py
@@ -239,6 +239,7 @@ class were exposed directly via the :mod:`robot.api` package, but other
 - :class:`~robot.parsing.model.statements.Break`
 - :class:`~robot.parsing.model.statements.Continue`
 - :class:`~robot.parsing.model.statements.Comment`
+- :class:`~robot.parsing.model.statements.Config` (new in 5.1)
 - :class:`~robot.parsing.model.statements.Error`
 - :class:`~robot.parsing.model.statements.EmptyLine`
 
@@ -257,7 +258,7 @@ class were exposed directly via the :mod:`robot.api` package, but other
     class TestNamePrinter(ModelVisitor):
 
         def visit_File(self, node):
-            print(f"File '{node.source}' has following tests:")
+            print(f"File '{node.source}' has the following tests:")
             # Call `generic_visit` to visit also child nodes.
             self.generic_visit(node)
 
@@ -272,7 +273,7 @@ def visit_TestCaseName(self, node):
 When the above code is run using the earlier :file:`example.robot`, the
 output is this::
 
-    File 'example.robot' has following tests:
+    File 'example.robot' has the following tests:
     - Example (on line 2)
     - Second example (on line 5)
 
@@ -544,6 +545,7 @@ def visit_File(self, node):
     Continue,
     Break,
     Comment,
+    Config,
     Error,
     EmptyLine
 )

diff --git a/src/robot/parsing/lexer/statementlexers.py b/src/robot/parsing/lexer/statementlexers.py
@@ -115,17 +115,17 @@ class CommentLexer(SingleType):
 
 
 class ImplicitCommentLexer(CommentLexer):
-    language = re.compile(r'language:(.+)', re.IGNORECASE)
 
     def input(self, statement):
         super().input(statement)
-        for token in statement:
-            match = self.language.match(token.value)
-            if match:
-                try:
-                    self.ctx.add_language(match.group(1).strip())
-                except ValueError as err:
-                    token.set_error(f'Invalid language configuration: {err}')
+        if len(statement) == 1 and statement[0].value.lower().startswith('language:'):
+            lang = statement[0].value.split(':', 1)[1].strip()
+            try:
+                self.ctx.add_language(lang)
+            except ValueError as err:
+                statement[0].set_error(f'Invalid language configuration: {err}')
+            else:
+                statement[0].type = Token.CONFIG
 
     def lex(self):
         for token in self.statement:

diff --git a/src/robot/parsing/lexer/tokens.py b/src/robot/parsing/lexer/tokens.py
@@ -99,6 +99,7 @@ class Token:
     SEPARATOR = 'SEPARATOR'
     COMMENT = 'COMMENT'
     CONTINUATION = 'CONTINUATION'
+    CONFIG = 'CONFIG'
     EOL = 'EOL'
     EOS = 'EOS'
 

diff --git a/src/robot/parsing/model/blocks.py b/src/robot/parsing/model/blocks.py
@@ -71,11 +71,12 @@ def __init__(self, header, body=None, errors=()):
 
 class File(Block):
     _fields = ('sections',)
-    _attributes = ('source',) + Block._attributes
+    _attributes = ('source', 'languages') + Block._attributes
 
-    def __init__(self, sections=None, source=None):
+    def __init__(self, sections=None, source=None, languages=()):
         self.sections = sections or []
         self.source = source
+        self.languages = languages
 
     def save(self, output=None):
         """Save model to the given ``output`` or to the original source file.

diff --git a/src/robot/parsing/model/statements.py b/src/robot/parsing/model/statements.py
@@ -16,6 +16,7 @@
 import ast
 import re
 
+from robot.conf import Language
 from robot.running.arguments import UserKeywordArgumentParser
 from robot.utils import is_list_like, normalize_whitespace, seq2str, split_from_equals
 from robot.variables import is_scalar_assign, is_dict_variable, search_variable
@@ -1057,6 +1058,23 @@ def from_params(cls, comment, indent=FOUR_SPACES, eol=EOL):
         ])
 
 
+@Statement.register
+class Config(Statement):
+    type = Token.CONFIG
+
+    @classmethod
+    def from_params(cls, config, eol=EOL):
+        return cls([
+            Token(Token.CONFIG, config),
+            Token(Token.EOL, eol)
+        ])
+
+    @property
+    def language(self):
+        value = self.get_value(Token.CONFIG)
+        return Language.from_name(value[len('language:'):]) if value else None
+
+
 @Statement.register
 class Error(Statement):
     type = Token.ERROR

diff --git a/src/robot/parsing/parser/fileparser.py b/src/robot/parsing/parser/fileparser.py
@@ -49,6 +49,7 @@ def parse(self, statement):
             Token.TASK_HEADER: TestCaseSectionParser,
             Token.KEYWORD_HEADER: KeywordSectionParser,
             Token.COMMENT_HEADER: CommentSectionParser,
+            Token.CONFIG: ImplicitCommentSectionParser,
             Token.COMMENT: ImplicitCommentSectionParser,
             Token.ERROR: ImplicitCommentSectionParser,
             Token.EOL: ImplicitCommentSectionParser

diff --git a/src/robot/parsing/parser/parser.py b/src/robot/parsing/parser/parser.py
@@ -14,7 +14,7 @@
 #  limitations under the License.
 
 from ..lexer import Token, get_tokens, get_resource_tokens, get_init_tokens
-from ..model import Statement
+from ..model import Statement, ModelVisitor
 
 from .fileparser import FileParser
 
@@ -100,4 +100,16 @@ def _statements_to_model(statements, source=None):
         parser = stack[-1].parse(statement)
         if parser:
             stack.append(parser)
+    # Implicit comment sections have no header.
+    if model.sections and model.sections[0].header is None:
+        SetLanguages(model).visit(model.sections[0])
     return model
+
+
+class SetLanguages(ModelVisitor):
+
+    def __init__(self, file):
+        self.file = file
+
+    def visit_Config(self, node):
+        self.file.languages += (node.language.code,)
diff --git a/utest/parsing/test_lexer.py b/utest/parsing/test_lexer.py
@@ -2197,28 +2197,28 @@ def _verify(self, data, expected, test=False):
 class TestLanguageConfig(unittest.TestCase):
 
     def test_lang_as_code(self):
-        self._test('fi')
-        self._test('F-I')
+        self._test_explicit_config('fi')
+        self._test_explicit_config('F-I')
 
     def test_lang_as_name(self):
-        self._test('Finnish')
-        self._test('FINNISH')
+        self._test_explicit_config('Finnish')
+        self._test_explicit_config('FINNISH')
 
     def test_lang_as_Language(self):
-        self._test(Language.from_name('fi'))
+        self._test_explicit_config(Language.from_name('fi'))
 
     def test_lang_as_list(self):
-        self._test(['fi', Language.from_name('de')])
-        self._test([Language.from_name('fi'), 'de'])
+        self._test_explicit_config(['fi', Language.from_name('de')])
+        self._test_explicit_config([Language.from_name('fi'), 'de'])
 
     def test_lang_as_tuple(self):
-        self._test(('f-i', Language.from_name('de')))
-        self._test((Language.from_name('fi'), 'de'))
+        self._test_explicit_config(('f-i', Language.from_name('de')))
+        self._test_explicit_config((Language.from_name('fi'), 'de'))
 
     def test_lang_as_Languages(self):
-        self._test(Languages('fi'))
+        self._test_explicit_config(Languages('fi'))
 
-    def _test(self, lang):
+    def _test_explicit_config(self, lang):
         data = '''\
 *** Asetukset ***
 Dokumentaatio    Documentation
@@ -2237,6 +2237,84 @@ def _test(self, lang):
         assert_tokens(data, expected, get_init_tokens, lang=lang)
         assert_tokens(data, expected, get_resource_tokens, lang=lang)
 
+    def test_per_file_config(self):
+        data = '''\
+language: pt    not recognized
+language: fi
+ignored    language: pt
+Language:German    # ok!
+*** Asetukset ***
+Dokumentaatio    Documentation
+'''
+        expected = [
+            (T.COMMENT, 'language: pt', 1, 0),
+            (T.SEPARATOR, '    ', 1, 12),
+            (T.COMMENT, 'not recognized', 1, 16),
+            (T.EOL, '\n', 1, 30),
+            (T.EOS, '', 1, 31),
+            (T.CONFIG, 'language: fi', 2, 0),
+            (T.EOL, '\n', 2, 12),
+            (T.EOS, '', 2, 13),
+            (T.COMMENT, 'ignored', 3, 0),
+            (T.SEPARATOR, '    ', 3, 7),
+            (T.COMMENT, 'language: pt', 3, 11),
+            (T.EOL, '\n', 3, 23),
+            (T.EOS, '', 3, 24),
+            (T.CONFIG, 'Language:German', 4, 0),
+            (T.SEPARATOR, '    ', 4, 15),
+            (T.COMMENT, '# ok!', 4, 19),
+            (T.EOL, '\n', 4, 24),
+            (T.EOS, '', 4, 25),
+            (T.SETTING_HEADER, '*** Asetukset ***', 5, 0),
+            (T.EOL, '\n', 5, 17),
+            (T.EOS, '', 5, 18),
+            (T.DOCUMENTATION, 'Dokumentaatio', 6, 0),
+            (T.SEPARATOR, '    ', 6, 13),
+            (T.ARGUMENT, 'Documentation', 6, 17),
+            (T.EOL, '\n', 6, 30),
+            (T.EOS, '', 6, 31),
+        ]
+        assert_tokens(data, expected, get_tokens)
+        lang = Languages()
+        assert_tokens(data, expected, get_init_tokens, lang=lang)
+        assert_equal(lang.languages,
+                     [Language.from_name(lang) for lang in ('en', 'fi', 'de')])
+
+    def test_invalid_per_file_config(self):
+        data = '''\
+language: in:va:lid
+language: bad again    but not recognized as config and ignored
+Language: Finnish
+*** Asetukset ***
+Dokumentaatio    Documentation
+'''
+        expected = [
+            (T.ERROR, 'language: in:va:lid', 1, 0,
+             "Invalid language configuration: No language with name 'in:va:lid' found."),
+            (T.EOL, '\n', 1, 19),
+            (T.EOS, '', 1, 20),
+            (T.COMMENT, 'language: bad again', 2, 0),
+            (T.SEPARATOR, '    ', 2, 19),
+            (T.COMMENT, 'but not recognized as config and ignored', 2, 23),
+            (T.EOL, '\n', 2, 63),
+            (T.EOS, '', 2, 64),
+            (T.CONFIG, 'Language: Finnish', 3, 0),
+            (T.EOL, '\n', 3, 17),
+            (T.EOS, '', 3, 18),
+            (T.SETTING_HEADER, '*** Asetukset ***', 4, 0),
+            (T.EOL, '\n', 4, 17),
+            (T.EOS, '', 4, 18),
+            (T.DOCUMENTATION, 'Dokumentaatio', 5, 0),
+            (T.SEPARATOR, '    ', 5, 13),
+            (T.ARGUMENT, 'Documentation', 5, 17),
+            (T.EOL, '\n', 5, 30),
+            (T.EOS, '', 5, 31),
+        ]
+        assert_tokens(data, expected, get_tokens)
+        lang = Languages()
+        assert_tokens(data, expected, get_init_tokens, lang=lang)
+        assert_equal(lang.languages,
+                     [Language.from_name(lang) for lang in ('en', 'fi')])
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/utest/parsing/test_model.py b/utest/parsing/test_model.py
@@ -10,7 +10,7 @@
     Keyword, KeywordSection, SettingSection, TestCase, TestCaseSection, VariableSection
 )
 from robot.parsing.model.statements import (
-    Arguments, Break, Comment, Continue, Documentation, ForHeader, End, ElseHeader,
+    Arguments, Break, Comment, Config, Continue, Documentation, ForHeader, End, ElseHeader,
     ElseIfHeader, EmptyLine, Error, IfHeader, InlineIfHeader, TryHeader, ExceptHeader,
     FinallyHeader, KeywordCall, KeywordName, ReturnStatement, SectionHeader,
     TestCaseName, Variable, WhileHeader
@@ -1266,5 +1266,61 @@ def visit_Block(self, node):
         assert_model(model, expected)
 
 
+class TestLanguageConfig(unittest.TestCase):
+
+    def test_valid(self):
+        model = get_model('''\
+language: fi
+language: bad
+language: bad    but ignored
+language: de     # ok
+*** Einstellungen ***
+Dokumentaatio    Header is de and setting is fi.
+''')
+        expected = File(
+            languages=('fi', 'de'),
+            sections=[
+                CommentSection(body=[
+                    Config([
+                        Token('CONFIG', 'language: fi', 1, 0),
+                        Token('EOL', '\n', 1, 12)
+                    ]),
+                    Error([
+                        Token('ERROR', 'language: bad', 2, 0,
+                              "Invalid language configuration: No language with name 'bad' found."),
+                        Token('EOL', '\n', 2, 13)
+                    ]),
+                    Comment([
+                        Token('COMMENT', 'language: bad', 3, 0),
+                        Token('SEPARATOR', '    ', 3, 13),
+                        Token('COMMENT', 'but ignored', 3, 17),
+                        Token('EOL', '\n', 3, 28)
+                    ]),
+                    Config([
+                        Token('CONFIG', 'language: de', 4, 0),
+                        Token('SEPARATOR', '     ', 4, 12),
+                        Token('COMMENT', '# ok', 4, 17),
+                        Token('EOL', '\n', 4, 21)
+                    ]),
+                ]),
+                SettingSection(
+                    header=SectionHeader([
+                        Token('SETTING HEADER', '*** Einstellungen ***', 5, 0),
+                        Token('EOL', '\n', 5, 21)
+                    ]),
+                    body=[
+                        Documentation([
+                            Token('DOCUMENTATION', 'Dokumentaatio', 6, 0),
+                            Token('SEPARATOR', '    ', 6, 13),
+                            Token('ARGUMENT', 'Header is de and setting is fi.', 6, 17),
+                            Token('EOL', '\n', 6, 48)
+                        ])
+                    ]
+                )
+            ]
+        )
+        assert_model(model, expected)
+
+
 if __name__ == '__main__':
     unittest.main()