From 06d86eb442f39c4eda068896d1ec86178effb98c Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sun, 20 Aug 2023 16:49:07 +0200 Subject: [PATCH 01/15] Add more tests for pygettext --- Lib/test/test_tools/test_i18n/__init__.py | 6 ++ .../test_tools/test_i18n/data/docstrings.pot | 40 +++++++++++ .../test_tools/test_i18n/data/docstrings.py | 41 ++++++++++++ .../test_tools/test_i18n/data/fileloc.pot | 35 ++++++++++ Lib/test/test_tools/test_i18n/data/fileloc.py | 26 +++++++ .../test_tools/test_i18n/data/messages.pot | 67 +++++++++++++++++++ .../test_tools/test_i18n/data/messages.py | 64 ++++++++++++++++++ .../test_tools/{ => test_i18n}/test_i18n.py | 45 ++++++++++++- 8 files changed, 321 insertions(+), 3 deletions(-) create mode 100644 Lib/test/test_tools/test_i18n/__init__.py create mode 100644 Lib/test/test_tools/test_i18n/data/docstrings.pot create mode 100644 Lib/test/test_tools/test_i18n/data/docstrings.py create mode 100644 Lib/test/test_tools/test_i18n/data/fileloc.pot create mode 100644 Lib/test/test_tools/test_i18n/data/fileloc.py create mode 100644 Lib/test/test_tools/test_i18n/data/messages.pot create mode 100644 Lib/test/test_tools/test_i18n/data/messages.py rename Lib/test/test_tools/{ => test_i18n}/test_i18n.py (86%) diff --git a/Lib/test/test_tools/test_i18n/__init__.py b/Lib/test/test_tools/test_i18n/__init__.py new file mode 100644 index 00000000000000..2f1449063fe6bb --- /dev/null +++ b/Lib/test/test_tools/test_i18n/__init__.py @@ -0,0 +1,6 @@ +import os.path +from test import support + + +def load_tests(*args): + return support.load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_i18n/data/docstrings.pot b/Lib/test/test_tools/test_i18n/data/docstrings.pot new file mode 100644 index 00000000000000..b9d6f4dc287594 --- /dev/null +++ b/Lib/test/test_tools/test_i18n/data/docstrings.pot @@ -0,0 +1,40 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2023-08-20 16:42+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: docstrings.py:7 +#, docstring +msgid "" +msgstr "" + +#: docstrings.py:18 +#, docstring +msgid "" +"multiline\n" +" docstring\n" +" " +msgstr "" + +#: docstrings.py:25 +#, docstring +msgid "docstring1" +msgstr "" + +#: docstrings.py:30 +#, docstring +msgid "Hello, {}!" +msgstr "" + diff --git a/Lib/test/test_tools/test_i18n/data/docstrings.py b/Lib/test/test_tools/test_i18n/data/docstrings.py new file mode 100644 index 00000000000000..85d7f159d37775 --- /dev/null +++ b/Lib/test/test_tools/test_i18n/data/docstrings.py @@ -0,0 +1,41 @@ +# Test docstring extraction +from gettext import gettext as _ + + +# Empty docstring +def test(x): + """""" + + +# Leading empty line +def test2(x): + + """docstring""" # XXX This should be extracted but isn't. + + +# XXX Multiline docstrings should be cleaned with `inspect.cleandoc`. +def test3(x): + """multiline + docstring + """ + + +# Multiple docstrings - only the first should be extracted +def test4(x): + """docstring1""" + """docstring2""" + + +def test5(x): + """Hello, {}!""".format("world!") # XXX This should not be extracted. + + +# Nested docstrings +def test6(x): + def inner(y): + """nested docstring""" # XXX This should be extracted but isn't. + + +class Outer: + class Inner: + "nested class docstring" # XXX This should be extracted but isn't. diff --git a/Lib/test/test_tools/test_i18n/data/fileloc.pot b/Lib/test/test_tools/test_i18n/data/fileloc.pot new file mode 100644 index 00000000000000..09d7b27e0893a4 --- /dev/null +++ b/Lib/test/test_tools/test_i18n/data/fileloc.pot @@ -0,0 +1,35 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2023-08-20 16:15+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: fileloc.py:5 fileloc.py:6 +msgid "foo" +msgstr "" + +#: fileloc.py:9 +msgid "bar" +msgstr "" + +#: fileloc.py:14 fileloc.py:18 +#, docstring +msgid "docstring" +msgstr "" + +#: fileloc.py:22 fileloc.py:26 +#, docstring +msgid "baz" +msgstr "" + diff --git a/Lib/test/test_tools/test_i18n/data/fileloc.py b/Lib/test/test_tools/test_i18n/data/fileloc.py new file mode 100644 index 00000000000000..c5d4d0595fea52 --- /dev/null +++ b/Lib/test/test_tools/test_i18n/data/fileloc.py @@ -0,0 +1,26 @@ +# Test file locations +from gettext import gettext as _ + +# Duplicate strings +_('foo') +_('foo') + +# Duplicate strings on the same line should only add one location to the output +_('bar'), _('bar') + + +# Duplicate docstrings +class A: + """docstring""" + + +def f(): + """docstring""" + + +# Duplicate message and docstring +_('baz') + + +def g(): + """baz""" diff --git a/Lib/test/test_tools/test_i18n/data/messages.pot b/Lib/test/test_tools/test_i18n/data/messages.pot new file mode 100644 index 00000000000000..fd3dbe11108924 --- /dev/null +++ b/Lib/test/test_tools/test_i18n/data/messages.pot @@ -0,0 +1,67 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2023-08-20 16:42+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: messages.py:5 +msgid "" +msgstr "" + +#: messages.py:8 messages.py:9 +msgid "parentheses" +msgstr "" + +#: messages.py:12 +msgid "Hello, world!" +msgstr "" + +#: messages.py:15 +msgid "" +"Hello,\n" +" multiline!\n" +msgstr "" + +#: messages.py:29 +msgid "Hello, {}!" +msgstr "" + +#: messages.py:33 +msgid "1" +msgstr "" + +#: messages.py:33 +msgid "2" +msgstr "" + +#: messages.py:34 messages.py:35 +msgid "A" +msgstr "" + +#: messages.py:34 messages.py:35 +msgid "B" +msgstr "" + +#: messages.py:36 +msgid "set" +msgstr "" + +#: messages.py:42 +msgid "nested string" +msgstr "" + +#: messages.py:47 +msgid "baz" +msgstr "" + diff --git a/Lib/test/test_tools/test_i18n/data/messages.py b/Lib/test/test_tools/test_i18n/data/messages.py new file mode 100644 index 00000000000000..f220294b8d5c67 --- /dev/null +++ b/Lib/test/test_tools/test_i18n/data/messages.py @@ -0,0 +1,64 @@ +# Test message extraction +from gettext import gettext as _ + +# Empty string +_("") + +# Extra parentheses +(_("parentheses")) +((_("parentheses"))) + +# Multiline strings +_("Hello, " + "world!") + +_("""Hello, + multiline! +""") + +# Invalid arguments +_() +_(None) +_(1) +_(False) +_(x="kwargs are not allowed") +_("foo", "bar") +_("something", x="something else") + +# .format() +_("Hello, {}!").format("world") # valid +_("Hello, {}!".format("world")) # invalid + +# Nested structures +_("1"), _("2") +arr = [_("A"), _("B")] +obj = {'a': _("A"), 'b': _("B")} +{{{_('set')}}} + + +# Nested functions and classes +def test(): + _("nested string") # XXX This should be extracted but isn't. + [_("nested string")] + + +class Foo: + def bar(self): + return _("baz") + + +def bar(x=_('default value')): # XXX This should be extracted but isn't. + pass + + +def baz(x=[_('default value')]): # XXX This should be extracted but isn't. + pass + + +# Shadowing _() +def _(x): + pass + + +def _(x="don't extract me"): + pass diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n/test_i18n.py similarity index 86% rename from Lib/test/test_tools/test_i18n.py rename to Lib/test/test_tools/test_i18n/test_i18n.py index c083a04475e726..98f879f4b888d1 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n/test_i18n.py @@ -1,6 +1,8 @@ """Tests to cover the Tools/i18n package""" import os +from pathlib import Path +import re import sys import unittest from textwrap import dedent @@ -16,16 +18,16 @@ class Test_pygettext(unittest.TestCase): """Tests for the pygettext.py tool""" - script = os.path.join(toolsdir,'i18n', 'pygettext.py') + script = os.path.join(toolsdir, 'i18n', 'pygettext.py') def get_header(self, data): """ utility: return the header of a .po file as a dictionary """ headers = {} for line in data.split('\n'): - if not line or line.startswith(('#', 'msgid','msgstr')): + if not line or line.startswith(('#', 'msgid', 'msgstr')): continue line = line.strip('"') - key, val = line.split(':',1) + key, val = line.split(':', 1) headers[key] = val.strip() return headers @@ -53,6 +55,25 @@ def get_msgids(self, data): return msgids + def assert_POT_equal(self, expected, actual): + """Check if two POT files are equal""" + # Normalize the creation date + date_pattern = re.compile(r'"POT-Creation-Date: .+?\n"') + header = '"POT-Creation-Date: 2000-01-01 00:00+0000\\n"' + expected = re.sub(date_pattern, header, expected) + actual = re.sub(date_pattern, header, actual) + + # Normalize the file location path separators in case this test is + # running on a platform which does not use '/' as a default separator + fileloc_pattern = re.compile(r'#:.+') + + def replace(match): + return match[0].replace(os.sep, "/") + expected = re.sub(fileloc_pattern, replace, expected) + actual = re.sub(fileloc_pattern, replace, actual) + + self.assertEqual(expected, actual) + def extract_docstrings_from_str(self, module_content): """ utility: return all msgids extracted from module_content """ filename = 'test_docstrings.py' @@ -310,6 +331,24 @@ def test_calls_in_fstring_with_partially_wrong_expression(self): self.assertNotIn('foo', msgids) self.assertIn('bar', msgids) + def test_pygettext_output(self): + """Test that the pygettext output exactly matches a file.""" + data_dir = Path(__file__).parent / 'data' + filenames = (('messages.py', 'messages.pot'), + ('docstrings.py', 'docstrings.pot'), + ('fileloc.py', 'fileloc.pot')) + + for input_file, output_file in filenames: + with self.subTest(f'Input file: data/{input_file}'): + contents = (data_dir / input_file).read_text() + with temp_cwd(None): + Path(input_file).write_text(contents) + assert_python_ok(self.script, '-D', input_file) + output = Path('messages.pot').read_text() + + expected = (data_dir / output_file).read_text() + self.assert_POT_equal(expected, output) + def test_files_list(self): """Make sure the directories are inspected for source files bpo-31920 From b1b08923b0d4583f47c6dec365e56212788a676d Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sun, 20 Aug 2023 19:28:04 +0200 Subject: [PATCH 02/15] Specify file encoding --- Lib/test/test_tools/test_i18n/test_i18n.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_tools/test_i18n/test_i18n.py b/Lib/test/test_tools/test_i18n/test_i18n.py index 98f879f4b888d1..dec7858da8d653 100644 --- a/Lib/test/test_tools/test_i18n/test_i18n.py +++ b/Lib/test/test_tools/test_i18n/test_i18n.py @@ -340,13 +340,13 @@ def test_pygettext_output(self): for input_file, output_file in filenames: with self.subTest(f'Input file: data/{input_file}'): - contents = (data_dir / input_file).read_text() + contents = (data_dir / input_file).read_text(encoding='utf-8') with temp_cwd(None): - Path(input_file).write_text(contents) + Path(input_file).write_text(contents, encoding='utf-8') assert_python_ok(self.script, '-D', input_file) - output = Path('messages.pot').read_text() + output = Path('messages.pot').read_text(encoding='utf-8') - expected = (data_dir / output_file).read_text() + expected = (data_dir / output_file).read_text(encoding='utf-8') self.assert_POT_equal(expected, output) def test_files_list(self): From eb7f4887550d5e64ff75013eedc75c9e59f6bd65 Mon Sep 17 00:00:00 2001 From: Tomas R Date: Sun, 20 Aug 2023 18:25:55 +0200 Subject: [PATCH 03/15] Normalize charset --- Lib/test/test_tools/test_i18n/test_i18n.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_tools/test_i18n/test_i18n.py b/Lib/test/test_tools/test_i18n/test_i18n.py index dec7858da8d653..1890ac789352d4 100644 --- a/Lib/test/test_tools/test_i18n/test_i18n.py +++ b/Lib/test/test_tools/test_i18n/test_i18n.py @@ -63,6 +63,12 @@ def assert_POT_equal(self, expected, actual): expected = re.sub(date_pattern, header, expected) actual = re.sub(date_pattern, header, actual) + # Normalize charset (currently there's no way to specify the output charset) + charset_pattern = re.compile(r'"Content-Type: text/plain; charset=.+?\n"') + charset = "Content-Type: text/plain; charset=UTF-8\\n" + expected = re.sub(charset_pattern, charset, expected) + actual = re.sub(charset_pattern, charset, actual) + # Normalize the file location path separators in case this test is # running on a platform which does not use '/' as a default separator fileloc_pattern = re.compile(r'#:.+') @@ -342,9 +348,9 @@ def test_pygettext_output(self): with self.subTest(f'Input file: data/{input_file}'): contents = (data_dir / input_file).read_text(encoding='utf-8') with temp_cwd(None): - Path(input_file).write_text(contents, encoding='utf-8') + Path(input_file).write_text(contents) assert_python_ok(self.script, '-D', input_file) - output = Path('messages.pot').read_text(encoding='utf-8') + output = Path('messages.pot').read_text() expected = (data_dir / output_file).read_text(encoding='utf-8') self.assert_POT_equal(expected, output) From 7428393802af6076526a46030bfdae623884a0e4 Mon Sep 17 00:00:00 2001 From: Tomas R Date: Mon, 21 Aug 2023 20:20:11 +0200 Subject: [PATCH 04/15] Apply suggestions from code review Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Lib/test/test_tools/test_i18n/test_i18n.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_tools/test_i18n/test_i18n.py b/Lib/test/test_tools/test_i18n/test_i18n.py index 1890ac789352d4..df37eeee794aef 100644 --- a/Lib/test/test_tools/test_i18n/test_i18n.py +++ b/Lib/test/test_tools/test_i18n/test_i18n.py @@ -63,14 +63,14 @@ def assert_POT_equal(self, expected, actual): expected = re.sub(date_pattern, header, expected) actual = re.sub(date_pattern, header, actual) - # Normalize charset (currently there's no way to specify the output charset) + # Normalize charset to UTF-8 (currently there's no way to specify the output charset) charset_pattern = re.compile(r'"Content-Type: text/plain; charset=.+?\n"') charset = "Content-Type: text/plain; charset=UTF-8\\n" expected = re.sub(charset_pattern, charset, expected) actual = re.sub(charset_pattern, charset, actual) # Normalize the file location path separators in case this test is - # running on a platform which does not use '/' as a default separator + # running on Windows (which uses '\') fileloc_pattern = re.compile(r'#:.+') def replace(match): @@ -349,7 +349,7 @@ def test_pygettext_output(self): contents = (data_dir / input_file).read_text(encoding='utf-8') with temp_cwd(None): Path(input_file).write_text(contents) - assert_python_ok(self.script, '-D', input_file) + assert_python_ok(self.script, '--docstrings', input_file) output = Path('messages.pot').read_text() expected = (data_dir / output_file).read_text(encoding='utf-8') From f06cbb56fac2f3bca204282b41126394395f18d2 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Mon, 21 Aug 2023 20:30:04 +0200 Subject: [PATCH 05/15] Apply suggestions from code review --- Lib/test/test_tools/test_i18n/test_i18n.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_tools/test_i18n/test_i18n.py b/Lib/test/test_tools/test_i18n/test_i18n.py index df37eeee794aef..0b5df86fbbb527 100644 --- a/Lib/test/test_tools/test_i18n/test_i18n.py +++ b/Lib/test/test_tools/test_i18n/test_i18n.py @@ -14,6 +14,8 @@ skip_if_missing() +DATA_DIR = Path(__file__).resolve().parent / 'data' + class Test_pygettext(unittest.TestCase): """Tests for the pygettext.py tool""" @@ -339,20 +341,19 @@ def test_calls_in_fstring_with_partially_wrong_expression(self): def test_pygettext_output(self): """Test that the pygettext output exactly matches a file.""" - data_dir = Path(__file__).parent / 'data' filenames = (('messages.py', 'messages.pot'), ('docstrings.py', 'docstrings.pot'), ('fileloc.py', 'fileloc.pot')) for input_file, output_file in filenames: - with self.subTest(f'Input file: data/{input_file}'): - contents = (data_dir / input_file).read_text(encoding='utf-8') + with self.subTest(input_file=f'data/{input_file}'): + contents = (DATA_DIR / input_file).read_text(encoding='utf-8') with temp_cwd(None): Path(input_file).write_text(contents) assert_python_ok(self.script, '--docstrings', input_file) output = Path('messages.pot').read_text() - expected = (data_dir / output_file).read_text(encoding='utf-8') + expected = (DATA_DIR / output_file).read_text(encoding='utf-8') self.assert_POT_equal(expected, output) def test_files_list(self): From cc4e6635bcf57b684496bd331b2cf11d37149162 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Mon, 28 Oct 2024 20:32:53 +0100 Subject: [PATCH 06/15] Do not create a new package --- .../test_tools/{test_i18n/data => i18n_data}/docstrings.pot | 0 .../test_tools/{test_i18n/data => i18n_data}/docstrings.py | 0 .../test_tools/{test_i18n/data => i18n_data}/fileloc.pot | 0 .../test_tools/{test_i18n/data => i18n_data}/fileloc.py | 0 .../test_tools/{test_i18n/data => i18n_data}/messages.pot | 0 .../test_tools/{test_i18n/data => i18n_data}/messages.py | 0 Lib/test/test_tools/{test_i18n => }/test_i18n.py | 0 Lib/test/test_tools/test_i18n/__init__.py | 6 ------ 8 files changed, 6 deletions(-) rename Lib/test/test_tools/{test_i18n/data => i18n_data}/docstrings.pot (100%) rename Lib/test/test_tools/{test_i18n/data => i18n_data}/docstrings.py (100%) rename Lib/test/test_tools/{test_i18n/data => i18n_data}/fileloc.pot (100%) rename Lib/test/test_tools/{test_i18n/data => i18n_data}/fileloc.py (100%) rename Lib/test/test_tools/{test_i18n/data => i18n_data}/messages.pot (100%) rename Lib/test/test_tools/{test_i18n/data => i18n_data}/messages.py (100%) rename Lib/test/test_tools/{test_i18n => }/test_i18n.py (100%) delete mode 100644 Lib/test/test_tools/test_i18n/__init__.py diff --git a/Lib/test/test_tools/test_i18n/data/docstrings.pot b/Lib/test/test_tools/i18n_data/docstrings.pot similarity index 100% rename from Lib/test/test_tools/test_i18n/data/docstrings.pot rename to Lib/test/test_tools/i18n_data/docstrings.pot diff --git a/Lib/test/test_tools/test_i18n/data/docstrings.py b/Lib/test/test_tools/i18n_data/docstrings.py similarity index 100% rename from Lib/test/test_tools/test_i18n/data/docstrings.py rename to Lib/test/test_tools/i18n_data/docstrings.py diff --git a/Lib/test/test_tools/test_i18n/data/fileloc.pot b/Lib/test/test_tools/i18n_data/fileloc.pot similarity index 100% rename from Lib/test/test_tools/test_i18n/data/fileloc.pot rename to Lib/test/test_tools/i18n_data/fileloc.pot diff --git a/Lib/test/test_tools/test_i18n/data/fileloc.py b/Lib/test/test_tools/i18n_data/fileloc.py similarity index 100% rename from Lib/test/test_tools/test_i18n/data/fileloc.py rename to Lib/test/test_tools/i18n_data/fileloc.py diff --git a/Lib/test/test_tools/test_i18n/data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot similarity index 100% rename from Lib/test/test_tools/test_i18n/data/messages.pot rename to Lib/test/test_tools/i18n_data/messages.pot diff --git a/Lib/test/test_tools/test_i18n/data/messages.py b/Lib/test/test_tools/i18n_data/messages.py similarity index 100% rename from Lib/test/test_tools/test_i18n/data/messages.py rename to Lib/test/test_tools/i18n_data/messages.py diff --git a/Lib/test/test_tools/test_i18n/test_i18n.py b/Lib/test/test_tools/test_i18n.py similarity index 100% rename from Lib/test/test_tools/test_i18n/test_i18n.py rename to Lib/test/test_tools/test_i18n.py diff --git a/Lib/test/test_tools/test_i18n/__init__.py b/Lib/test/test_tools/test_i18n/__init__.py deleted file mode 100644 index 2f1449063fe6bb..00000000000000 --- a/Lib/test/test_tools/test_i18n/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -import os.path -from test import support - - -def load_tests(*args): - return support.load_package_tests(os.path.dirname(__file__), *args) From 13647052e6b1cf5b7c18da31ee3c44353ef6ee54 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Mon, 28 Oct 2024 20:33:20 +0100 Subject: [PATCH 07/15] Use pathlib everywhere --- Lib/test/test_tools/test_i18n.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 0b5df86fbbb527..38940450099171 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -1,11 +1,11 @@ """Tests to cover the Tools/i18n package""" import os -from pathlib import Path import re import sys import unittest from textwrap import dedent +from pathlib import Path from test.support.script_helper import assert_python_ok from test.test_tools import skip_if_missing, toolsdir @@ -14,13 +14,13 @@ skip_if_missing() -DATA_DIR = Path(__file__).resolve().parent / 'data' +DATA_DIR = Path(__file__).resolve().parent / 'i18n_data' class Test_pygettext(unittest.TestCase): """Tests for the pygettext.py tool""" - script = os.path.join(toolsdir, 'i18n', 'pygettext.py') + script = Path(toolsdir, 'i18n', 'pygettext.py') def get_header(self, data): """ utility: return the header of a .po file as a dictionary """ @@ -364,18 +364,18 @@ def test_files_list(self): text2 = 'Text to translate2' text3 = 'Text to ignore' with temp_cwd(None), temp_dir(None) as sdir: - os.mkdir(os.path.join(sdir, 'pypkg')) - with open(os.path.join(sdir, 'pypkg', 'pymod.py'), 'w', - encoding='utf-8') as sfile: - sfile.write(f'_({text1!r})') - os.mkdir(os.path.join(sdir, 'pkg.py')) - with open(os.path.join(sdir, 'pkg.py', 'pymod2.py'), 'w', - encoding='utf-8') as sfile: - sfile.write(f'_({text2!r})') - os.mkdir(os.path.join(sdir, 'CVS')) - with open(os.path.join(sdir, 'CVS', 'pymod3.py'), 'w', - encoding='utf-8') as sfile: - sfile.write(f'_({text3!r})') + pymod = Path(sdir, 'pypkg', 'pymod.py') + pymod.parent.mkdir() + pymod.write_text(f'_({text1!r})', encoding='utf-8') + + pymod2 = Path(sdir, 'pkg.py', 'pymod2.py') + pymod2.parent.mkdir() + pymod2.write_text(f'_({text2!r})', encoding='utf-8') + + pymod3 = Path(sdir, 'CVS', 'pymod3.py') + pymod3.parent.mkdir() + pymod3.write_text(f'_({text3!r})', encoding='utf-8') + assert_python_ok(self.script, sdir) with open('messages.pot', encoding='utf-8') as fp: data = fp.read() From a5501b8f02055abc11ee1ff7bd386f3c53762080 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Mon, 28 Oct 2024 20:52:23 +0100 Subject: [PATCH 08/15] Simplify test --- Lib/test/test_tools/test_i18n.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 38940450099171..1c407638ce00c0 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -340,20 +340,18 @@ def test_calls_in_fstring_with_partially_wrong_expression(self): self.assertIn('bar', msgids) def test_pygettext_output(self): - """Test that the pygettext output exactly matches a file.""" - filenames = (('messages.py', 'messages.pot'), - ('docstrings.py', 'docstrings.pot'), - ('fileloc.py', 'fileloc.pot')) - - for input_file, output_file in filenames: - with self.subTest(input_file=f'data/{input_file}'): - contents = (DATA_DIR / input_file).read_text(encoding='utf-8') + """Test that the pygettext output exactly matches snapshots.""" + self.maxDiff = None + for input_file in DATA_DIR.glob('*.py'): + output_file = input_file.with_suffix('.pot') + with self.subTest(input_file=f'i18n_data/{input_file}'): + contents = input_file.read_text(encoding='utf-8') with temp_cwd(None): - Path(input_file).write_text(contents) - assert_python_ok(self.script, '--docstrings', input_file) + Path(input_file.name).write_text(contents) + assert_python_ok(self.script, '--docstrings', input_file.name) output = Path('messages.pot').read_text() - expected = (DATA_DIR / output_file).read_text(encoding='utf-8') + expected = output_file.read_text(encoding='utf-8') self.assert_POT_equal(expected, output) def test_files_list(self): From e6b8c8050bfdd4bd26b92ed0baa24e249e906f15 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Mon, 28 Oct 2024 21:10:34 +0100 Subject: [PATCH 09/15] Extract POT normalization into a function --- Lib/test/test_tools/test_i18n.py | 48 +++++++++++++++++--------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 1c407638ce00c0..54131b72b266fa 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -17,6 +17,31 @@ DATA_DIR = Path(__file__).resolve().parent / 'i18n_data' +def normalize_POT_file(pot): + """Normalize the POT creation timestamp, charset and + file locations to make the POT file easier to compare. + + """ + # Normalize the creation date. + date_pattern = re.compile(r'"POT-Creation-Date: .+?\\n"') + header = r'"POT-Creation-Date: 2000-01-01 00:00+0000\\n"' + pot = re.sub(date_pattern, header, pot) + + # Normalize charset to UTF-8 (currently there's no way to specify the output charset). + charset_pattern = re.compile(r'"Content-Type: text/plain; charset=.+?\\n"') + charset = r'"Content-Type: text/plain; charset=UTF-8\\n"' + pot = re.sub(charset_pattern, charset, pot) + + # Normalize file location path separators in case this test is + # running on Windows (which uses '\'). + fileloc_pattern = re.compile(r'#:.+') + + def replace(match): + return match[0].replace(os.sep, "/") + pot = re.sub(fileloc_pattern, replace, pot) + return pot + + class Test_pygettext(unittest.TestCase): """Tests for the pygettext.py tool""" @@ -59,28 +84,7 @@ def get_msgids(self, data): def assert_POT_equal(self, expected, actual): """Check if two POT files are equal""" - # Normalize the creation date - date_pattern = re.compile(r'"POT-Creation-Date: .+?\n"') - header = '"POT-Creation-Date: 2000-01-01 00:00+0000\\n"' - expected = re.sub(date_pattern, header, expected) - actual = re.sub(date_pattern, header, actual) - - # Normalize charset to UTF-8 (currently there's no way to specify the output charset) - charset_pattern = re.compile(r'"Content-Type: text/plain; charset=.+?\n"') - charset = "Content-Type: text/plain; charset=UTF-8\\n" - expected = re.sub(charset_pattern, charset, expected) - actual = re.sub(charset_pattern, charset, actual) - - # Normalize the file location path separators in case this test is - # running on Windows (which uses '\') - fileloc_pattern = re.compile(r'#:.+') - - def replace(match): - return match[0].replace(os.sep, "/") - expected = re.sub(fileloc_pattern, replace, expected) - actual = re.sub(fileloc_pattern, replace, actual) - - self.assertEqual(expected, actual) + self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual)) def extract_docstrings_from_str(self, module_content): """ utility: return all msgids extracted from module_content """ From 5fba1bbabf5097bc9a8f74a77a8d8383d38c403b Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Mon, 28 Oct 2024 21:11:38 +0100 Subject: [PATCH 10/15] Add a CLI command to regenerate snapshots --- Lib/test/test_tools/test_i18n.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 54131b72b266fa..e8cda42b6311b6 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -384,3 +384,24 @@ def test_files_list(self): self.assertIn(f'msgid "{text1}"', data) self.assertIn(f'msgid "{text2}"', data) self.assertNotIn(text3, data) + + +def update_POT_snapshots(): + for input_file in DATA_DIR.glob('*.py'): + output_file = input_file.with_suffix('.pot') + contents = input_file.read_text(encoding='utf-8') + with temp_cwd(None): + Path(input_file.name).write_text(contents) + assert_python_ok(Test_pygettext.script, '--docstrings', input_file.name) + output = Path('messages.pot').read_text() + + output = normalize_POT_file(output) + output_file.write_text(output, encoding='utf-8') + + +if __name__ == '__main__': + # To regenerate POT files + if len(sys.argv) > 1 and sys.argv[1] == '--snapshot-update': + update_POT_snapshots() + sys.exit(0) + unittest.main() From 9f388afe5d455620e8bc7de87a59450a67b0cf62 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Mon, 28 Oct 2024 21:12:33 +0100 Subject: [PATCH 11/15] Regenerate snapshots --- Lib/test/test_tools/i18n_data/docstrings.pot | 2 +- Lib/test/test_tools/i18n_data/fileloc.pot | 2 +- Lib/test/test_tools/i18n_data/messages.pot | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_tools/i18n_data/docstrings.pot b/Lib/test/test_tools/i18n_data/docstrings.pot index b9d6f4dc287594..5af1d41422ff62 100644 --- a/Lib/test/test_tools/i18n_data/docstrings.pot +++ b/Lib/test/test_tools/i18n_data/docstrings.pot @@ -5,7 +5,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" -"POT-Creation-Date: 2023-08-20 16:42+0200\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" diff --git a/Lib/test/test_tools/i18n_data/fileloc.pot b/Lib/test/test_tools/i18n_data/fileloc.pot index 09d7b27e0893a4..dbd28687a73556 100644 --- a/Lib/test/test_tools/i18n_data/fileloc.pot +++ b/Lib/test/test_tools/i18n_data/fileloc.pot @@ -5,7 +5,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" -"POT-Creation-Date: 2023-08-20 16:15+0200\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot index fd3dbe11108924..ddfbd18349ef4f 100644 --- a/Lib/test/test_tools/i18n_data/messages.pot +++ b/Lib/test/test_tools/i18n_data/messages.pot @@ -5,7 +5,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" -"POT-Creation-Date: 2023-08-20 16:42+0200\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" From 88f6350c0762f87c05e7e980fdba79dbf9f660a9 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Mon, 28 Oct 2024 21:13:31 +0100 Subject: [PATCH 12/15] Simplify code --- Lib/test/test_tools/test_i18n.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index e8cda42b6311b6..e8792e379de993 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -379,8 +379,7 @@ def test_files_list(self): pymod3.write_text(f'_({text3!r})', encoding='utf-8') assert_python_ok(self.script, sdir) - with open('messages.pot', encoding='utf-8') as fp: - data = fp.read() + data = Path('messages.pot').read_text(encoding='utf-8') self.assertIn(f'msgid "{text1}"', data) self.assertIn(f'msgid "{text2}"', data) self.assertNotIn(text3, data) From f4ed4e43b5d9f2aedaf24e4db675ac9f1c0b12f7 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Mon, 28 Oct 2024 21:15:47 +0100 Subject: [PATCH 13/15] Set maxDiff to None --- Lib/test/test_tools/test_i18n.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index e8792e379de993..40ed87c7acb97b 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -84,6 +84,7 @@ def get_msgids(self, data): def assert_POT_equal(self, expected, actual): """Check if two POT files are equal""" + self.maxDiff = None self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual)) def extract_docstrings_from_str(self, module_content): @@ -345,7 +346,6 @@ def test_calls_in_fstring_with_partially_wrong_expression(self): def test_pygettext_output(self): """Test that the pygettext output exactly matches snapshots.""" - self.maxDiff = None for input_file in DATA_DIR.glob('*.py'): output_file = input_file.with_suffix('.pot') with self.subTest(input_file=f'i18n_data/{input_file}'): From 63eef00b512b5a806124055f025be3dbfaad79d6 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Mon, 28 Oct 2024 21:29:54 +0100 Subject: [PATCH 14/15] Add test dir to Makefile --- Makefile.pre.in | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.pre.in b/Makefile.pre.in index 751e7ab8b427f5..24b04b813f1925 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2532,6 +2532,7 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_tomllib/data/valid/dates-and-times \ test/test_tomllib/data/valid/multiline-basic-str \ test/test_tools \ + test/test_tools/i18n_data \ test/test_ttk \ test/test_unittest \ test/test_unittest/namespace_test_pkg \ From c26d48865744b7a8040bd3325a557236718dd947 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sun, 3 Nov 2024 14:36:13 +0100 Subject: [PATCH 15/15] Use '-Xutf8' --- Lib/test/test_tools/test_i18n.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 40ed87c7acb97b..21dead8f943bb7 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -93,7 +93,7 @@ def extract_docstrings_from_str(self, module_content): with temp_cwd(None) as cwd: with open(filename, 'w', encoding='utf-8') as fp: fp.write(module_content) - assert_python_ok(self.script, '-D', filename) + assert_python_ok('-Xutf8', self.script, '-D', filename) with open('messages.pot', encoding='utf-8') as fp: data = fp.read() return self.get_msgids(data) @@ -103,7 +103,7 @@ def test_header(self): http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry """ with temp_cwd(None) as cwd: - assert_python_ok(self.script) + assert_python_ok('-Xutf8', self.script) with open('messages.pot', encoding='utf-8') as fp: data = fp.read() header = self.get_header(data) @@ -130,7 +130,7 @@ def test_POT_Creation_Date(self): """ Match the date format from xgettext for POT-Creation-Date """ from datetime import datetime with temp_cwd(None) as cwd: - assert_python_ok(self.script) + assert_python_ok('-Xutf8', self.script) with open('messages.pot', encoding='utf-8') as fp: data = fp.read() header = self.get_header(data) @@ -352,8 +352,8 @@ def test_pygettext_output(self): contents = input_file.read_text(encoding='utf-8') with temp_cwd(None): Path(input_file.name).write_text(contents) - assert_python_ok(self.script, '--docstrings', input_file.name) - output = Path('messages.pot').read_text() + assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name) + output = Path('messages.pot').read_text(encoding='utf-8') expected = output_file.read_text(encoding='utf-8') self.assert_POT_equal(expected, output) @@ -378,7 +378,7 @@ def test_files_list(self): pymod3.parent.mkdir() pymod3.write_text(f'_({text3!r})', encoding='utf-8') - assert_python_ok(self.script, sdir) + assert_python_ok('-Xutf8', self.script, sdir) data = Path('messages.pot').read_text(encoding='utf-8') self.assertIn(f'msgid "{text1}"', data) self.assertIn(f'msgid "{text2}"', data) @@ -388,11 +388,11 @@ def test_files_list(self): def update_POT_snapshots(): for input_file in DATA_DIR.glob('*.py'): output_file = input_file.with_suffix('.pot') - contents = input_file.read_text(encoding='utf-8') + contents = input_file.read_bytes() with temp_cwd(None): - Path(input_file.name).write_text(contents) - assert_python_ok(Test_pygettext.script, '--docstrings', input_file.name) - output = Path('messages.pot').read_text() + Path(input_file.name).write_bytes(contents) + assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name) + output = Path('messages.pot').read_text(encoding='utf-8') output = normalize_POT_file(output) output_file.write_text(output, encoding='utf-8')