Skip to content

Commit

Permalink
Rewrite .tr parser
Browse files Browse the repository at this point in the history
  • Loading branch information
rubenwardy committed Feb 25, 2024
1 parent 8014d7b commit d7065a4
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 78 deletions.
151 changes: 75 additions & 76 deletions app/tasks/minetestcheck/translation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
# Adapted from: https://github.com/minetest/minetest/blob/master/util/mod_translation_updater.py
# ContentDB
# Copyright (C) 2024 rubenwardy
#
# Copyright (C) 2019 Joachim Stolberg, 2020 FaceDeer, 2020 Louis Royer, 2023 Wuzzy, 2024 rubenwardy
# License: LGPLv2.1 or later
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import os
import re
Expand All @@ -18,93 +29,81 @@ def __init__(self, language: str, textdomain: str, entries: dict):
self.entries = entries


# Handles a translation line in *.tr file.
# Group 1 is the source string left of the equals sign.
# Group 2 is the translated string, right of the equals sign.
pattern_tr = re.compile(
r'(.*)' # Source string
# the separating equals sign, if NOT preceded by @, unless
# that @ is preceded by another @
r'(?:(?<!(?<!@)@)=)'
r'(.*)' # Translation string
)

# Strings longer than this will have extra space added between
# them in the translation files to make it easier to distinguish their
# beginnings and endings at a glance
doublespace_threshold = 80

# These symbols mark comment lines showing the source file name.
# A comment may look like "##[ init.lua ]##".
symbol_source_prefix = "##["
symbol_source_suffix = "]##"
comment_unused = "##### not used anymore #####"


def parse_tr(filepath: str) -> Translation:
dOut = {}
in_header = True
header_comments = None
textdomain = None

entries = {}
filename = os.path.basename(filepath)
filename_parts = filename.split(".")

assert len(filename_parts) >= 3
assert len(filename_parts) == 3
assert filename_parts[-1] == "tr"
language = filename_parts[-2]
textdomain = ".".join(filename_parts[0:-2])

with open(filepath, "r", encoding='utf-8') as existing_file:
# save the full text to allow for comparison
# of the old version with the new output
existing_file.seek(0)
# a running record of the current comment block
# we're inside, to allow preceeding multi-line comments
# to be retained for a translation line
latest_comment_block = None
for line in existing_file.readlines():
line = line.rstrip('\n')
# "##### not used anymore #####" comment
if line == comment_unused:
# Always delete the 'not used anymore' comment.
# It will be re-added to the file if neccessary.
latest_comment_block = None
if header_comments is not None:
in_header = False
continue
lines = existing_file.readlines()
line_index = 0
while line_index < len(lines):
line = lines[line_index].rstrip('\n')

if line == "":
pass

# Comment lines
elif line.startswith("#"):
# Source file comments: ##[ file.lua ]##
if line.startswith(symbol_source_prefix) and line.endswith(symbol_source_suffix):
continue

# Store first occurance of textdomain
# Store first occurrence of textdomain
# discard all subsequent textdomain lines
if line.startswith("# textdomain:"):
if textdomain is None:
textdomain = line[13:].strip()
continue
elif in_header:
# Save header comments (normal comments at top of file)
if not header_comments:
header_comments = line
else:
header_comments = header_comments + "\n" + line
else:
# Save normal comments
if line.startswith("# textdomain:") and textdomain is None:
textdomain = line
elif not latest_comment_block:
latest_comment_block = line
line_textdomain = line[13:].strip()
if line_textdomain != textdomain:
raise SyntaxError(
f"Line {line_index + 1}: The filename's textdomain ({textdomain}) should match the comment ({line_textdomain})")
else:
i = 0
had_equals = False
source = ""
current_part = ""
while i < len(line):
if line[i] == "@":
if i + 1 < len(line):
i += 1
code = line[i]
if code == "=":
current_part += "="
elif code == "@":
current_part += "@"
elif code == "n":
current_part += "\n"
elif code.isdigit():
current_part += "@" + code
else:
raise SyntaxError(f"Line {line_index + 1}: Unknown escape character: {code}")

else:
# @\n -> add new line
line_index += 1
if line_index >= len(lines):
raise SyntaxError(f"Line {line_index + 1}: Unexpected end of file")
line = lines[line_index]
current_part += "\n"
i = 0
continue
elif not had_equals and line[i] == "=":
had_equals = True
source = current_part
current_part = ""

else:
latest_comment_block = latest_comment_block + "\n" + line
current_part += line[i]

i += 1

translation = current_part
if not had_equals:
raise SyntaxError(f"Line {line_index + 1}: Missing = in line")

continue
entries[source.strip()] = translation.strip()

match = pattern_tr.match(line)
if match:
latest_comment_block = None
in_header = False
dOut[match.group(1).strip()] = match.group(2).strip()
line_index += 1

return Translation(language, textdomain, dOut)
return Translation(language, textdomain, entries)
7 changes: 6 additions & 1 deletion app/tasks/minetestcheck/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,5 +305,10 @@ def get_translations(self, textdomain: str) -> list[Translation]:
ret = []

for name in glob.glob(f"{self.baseDir}/**/locale/{textdomain}.*.tr", recursive=True):
ret.append(parse_tr(name))
try:
ret.append(parse_tr(name))
except SyntaxError as e:
relative_path = os.path.join(self.relative, os.path.relpath(name, self.baseDir))
raise MinetestCheckError(f"Syntax error whilst reading {relative_path}: {e}")

return ret
1 change: 1 addition & 0 deletions app/tests/unit/bad_escape.fr.tr
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Bad @x escape = Bad @x escape
4 changes: 4 additions & 0 deletions app/tests/unit/err_missing_eq.fr.tr
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# textdomain: err_missing_eq

Hello, World! = Bonjour, Monde!
Invalid line
8 changes: 8 additions & 0 deletions app/tests/unit/foobar.fr.tr
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# textdomain: foobar

Hello, World! = Bonjour, Monde!
Hello @1!=@1, salut!
Cats @= cool = Chats @= cool
A @n newline = Une @
nouvelle ligne
Maybe @@@n@@@=@@= Peut être @@@n@@@=@@
1 change: 1 addition & 0 deletions app/tests/unit/no_textdomain_comment.fr.tr
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Hello, World! = Bonjour, Monde!
50 changes: 49 additions & 1 deletion app/tests/unit/test_translation.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,61 @@
import os

import pytest

from app.tasks.minetestcheck.translation import parse_tr


def test_parses_tr():
dirname = os.path.dirname(__file__)
filepath = os.path.join(dirname, "test_file.fr.tr")
filepath = os.path.join(dirname, "foobar.fr.tr")
out = parse_tr(filepath)

assert out.language == "fr"
assert out.textdomain == "foobar"
assert len(out.entries) == 5
assert out.entries["Hello, World!"] == "Bonjour, Monde!"
assert out.entries["Hello @1!"] == "@1, salut!"
assert out.entries["Cats = cool"] == "Chats = cool"
assert out.entries["A \n newline"] == "Une \nnouvelle ligne"
assert out.entries["Maybe @\n@=@"] == "Peut être @\n@=@"


def test_parses_tr_infers_textdomain():
dirname = os.path.dirname(__file__)
filepath = os.path.join(dirname, "no_textdomain_comment.fr.tr")
out = parse_tr(filepath)

assert out.language == "fr"
assert out.textdomain == "no_textdomain_comment"
assert len(out.entries) == 1
assert out.entries["Hello, World!"] == "Bonjour, Monde!"


def test_parses_tr_error_on_textdomain_mismatch():
dirname = os.path.dirname(__file__)
filepath = os.path.join(dirname, "textdomain_mismatch.fr.tr")

with pytest.raises(SyntaxError) as e:
parse_tr(filepath)

assert str(e.value) == "Line 1: The filename's textdomain (textdomain_mismatch) should match the comment (foobar)"


def test_parses_tr_error_on_missing_eq():
dirname = os.path.dirname(__file__)
filepath = os.path.join(dirname, "err_missing_eq.fr.tr")

with pytest.raises(SyntaxError) as e:
parse_tr(filepath)

assert str(e.value) == "Line 4: Missing = in line"


def test_parses_tr_error_on_bad_escape():
dirname = os.path.dirname(__file__)
filepath = os.path.join(dirname, "bad_escape.fr.tr")

with pytest.raises(SyntaxError) as e:
parse_tr(filepath)

assert str(e.value) == "Line 1: Unknown escape character: x"
File renamed without changes.

0 comments on commit d7065a4

Please sign in to comment.