From 374e79208e5d32b38c2dbfe7950daa76da1a007a Mon Sep 17 00:00:00 2001 From: Stanislav Pankevich Date: Sat, 25 Sep 2021 19:23:29 +0200 Subject: [PATCH] Treat inputs as UTF-8 (both stdin and check file) Also: Poetry: remove Python 3.5 --- filecheck/FileCheck.py | 9 +++++++-- poetry.lock | 4 ++-- pyproject.toml | 2 +- .../tests/general/01-utf8-characters/filecheck.check | 3 +++ .../tests/general/01-utf8-characters/filecheck.input | 6 ++++++ .../tests/general/01-utf8-characters/sample.itest | 1 + 6 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 tests/integration/tests/general/01-utf8-characters/filecheck.check create mode 100644 tests/integration/tests/general/01-utf8-characters/filecheck.input create mode 100644 tests/integration/tests/general/01-utf8-characters/sample.itest diff --git a/filecheck/FileCheck.py b/filecheck/FileCheck.py index 4dc44d7..cd7478c 100755 --- a/filecheck/FileCheck.py +++ b/filecheck/FileCheck.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import argparse +import io import os import re import sys @@ -275,7 +276,7 @@ def exit_handler(code): exit_handler(2) checks = [] - with open(check_file) as f: + with open(check_file, encoding="utf-8") as f: for line_idx, line in enumerate(f): line = line.rstrip() @@ -411,7 +412,11 @@ def exit_handler(code): # TODO: Performance implications? # "Getting exit code 141 when reading from stdin with a Python script with “set -o pipefail” set" # https://stackoverflow.com/questions/59436858/getting-exit-code-141-when-reading-from-stdin-with-a-python-script-with-set-o/59436997?noredirect=1#comment105058533_59436997 - input_lines = sys.stdin.readlines() + # Also: Forcing the stdin to be UTF-8 + # Python 3: How to specify stdin encoding + # https://stackoverflow.com/a/16549381/598057 + input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') + input_lines = input_stream.readlines() stdin_input_iter = enumerate(input_lines) try: diff --git a/poetry.lock b/poetry.lock index 7b3ccf3..da5fe8d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -24,8 +24,8 @@ python-versions = "*" [metadata] lock-version = "1.1" -python-versions = "^3.5" -content-hash = "e60fb041589baae05d0cf0b4e08f198a39ce47b06cdbf3156ad8b144a778e6b9" +python-versions = "^3.6" +content-hash = "8f55f024f83b8e499d9e819ac4b7da371c5bf987b20a1fde05bb6ce04074ad26" [metadata.files] bump2version = [ diff --git a/pyproject.toml b/pyproject.toml index 0c27e26..fa5bb6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "Python port of LLVM's FileCheck, flexible pattern matching file v authors = ["Stanislav Pankevich "] [tool.poetry.dependencies] -python = "^3.5" +python = "^3.6" [tool.poetry.dev-dependencies] lit = "^0.9" diff --git a/tests/integration/tests/general/01-utf8-characters/filecheck.check b/tests/integration/tests/general/01-utf8-characters/filecheck.check new file mode 100644 index 0000000..5e9a293 --- /dev/null +++ b/tests/integration/tests/general/01-utf8-characters/filecheck.check @@ -0,0 +1,3 @@ +CHECK: # © A line with UTF-8 produces +CHECK: # UnicodeDecodeError: 'ascii' codec can't decode byte +CHECK: # if not properly handled diff --git a/tests/integration/tests/general/01-utf8-characters/filecheck.input b/tests/integration/tests/general/01-utf8-characters/filecheck.input new file mode 100644 index 0000000..a95ecbc --- /dev/null +++ b/tests/integration/tests/general/01-utf8-characters/filecheck.input @@ -0,0 +1,6 @@ +# © A line with UTF-8 produces +# UnicodeDecodeError: 'ascii' codec can't decode byte +# if not properly handled + +def hello_world(): + print("hello world") diff --git a/tests/integration/tests/general/01-utf8-characters/sample.itest b/tests/integration/tests/general/01-utf8-characters/sample.itest new file mode 100644 index 0000000..6b675bd --- /dev/null +++ b/tests/integration/tests/general/01-utf8-characters/sample.itest @@ -0,0 +1 @@ +RUN: %cat "%S/filecheck.input" | %expect_exit 0 --expect-no-content %FILECHECK_EXEC "%S/filecheck.check"