Skip to content

Commit

Permalink
[lit] Clean up internal diff's encoding handling
Browse files Browse the repository at this point in the history
As suggested by rnk at D67643#1673043, instead of reading files
multiple times until an appropriate encoding is found, read them once
as binary, and then try to decode what was read.

For Python >= 3.5, don't fail when attempting to decode the
`diff_bytes` output in order to print it.

Avoid failures for Python 2.7 used on some Windows bots by
transforming diff output with `lit.util.to_string` before writing it
to stdout.

Finally, add some tests for encoding handling.

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D68664

llvm-svn: 375018
  • Loading branch information
jdenny-ornl committed Oct 16, 2019
1 parent f89cf21 commit f095b8c
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 35 deletions.
55 changes: 22 additions & 33 deletions llvm/utils/lit/lit/TestRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import io
import itertools
import getopt
import locale
import os, signal, subprocess, sys
import re
import stat
Expand Down Expand Up @@ -415,60 +416,48 @@ def getDirTree(path, basedir=""):
return path, sorted(child_trees)

def compareTwoFiles(filepaths):
compare_bytes = False
encoding = None
filelines = []
for file in filepaths:
try:
with open(file, 'r') as f:
filelines.append(f.readlines())
except UnicodeDecodeError:
try:
with io.open(file, 'r', encoding="utf-8") as f:
filelines.append(f.readlines())
encoding = "utf-8"
except:
compare_bytes = True

if compare_bytes:
return compareTwoBinaryFiles(filepaths)
else:
return compareTwoTextFiles(filepaths, encoding)
with open(file, 'rb') as file_bin:
filelines.append(file_bin.readlines())

def compareTwoBinaryFiles(filepaths):
filelines = []
for file in filepaths:
with open(file, 'rb') as f:
filelines.append(f.readlines())
try:
return compareTwoTextFiles(filepaths, filelines,
locale.getpreferredencoding(False))
except UnicodeDecodeError:
try:
return compareTwoTextFiles(filepaths, filelines, "utf-8")
except:
return compareTwoBinaryFiles(filepaths, filelines)

def compareTwoBinaryFiles(filepaths, filelines):
exitCode = 0
if hasattr(difflib, 'diff_bytes'):
# python 3.5 or newer
diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0],
filelines[1], filepaths[0].encode(),
filepaths[1].encode(),
n = num_context_lines)
diffs = [diff.decode() for diff in diffs]
diffs = [diff.decode(errors="backslashreplace") for diff in diffs]
else:
# python 2.7
func = difflib.unified_diff if unified_diff else difflib.context_diff
diffs = func(filelines[0], filelines[1], filepaths[0], filepaths[1],
n = num_context_lines)

for diff in diffs:
stdout.write(diff)
stdout.write(to_string(diff))
exitCode = 1
return exitCode

def compareTwoTextFiles(filepaths, encoding):
def compareTwoTextFiles(filepaths, filelines_bin, encoding):
filelines = []
for file in filepaths:
if encoding is None:
with open(file, 'r') as f:
filelines.append(f.readlines())
else:
with io.open(file, 'r', encoding=encoding) as f:
filelines.append(f.readlines())
for lines_bin in filelines_bin:
lines = []
for line_bin in lines_bin:
line = line_bin.decode(encoding=encoding)
lines.append(line)
filelines.append(lines)

exitCode = 0
def compose2(f, g):
Expand All @@ -488,7 +477,7 @@ def compose2(f, g):
func = difflib.unified_diff if unified_diff else difflib.context_diff
for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1],
n = num_context_lines):
stdout.write(diff)
stdout.write(to_string(diff))
exitCode = 1
return exitCode

Expand Down
9 changes: 9 additions & 0 deletions llvm/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Check that diff falls back to binary mode if it cannot decode a file.

# RUN: diff -u diff-in.bin diff-in.bin
# RUN: diff -u diff-in.utf16 diff-in.bin && false || true
# RUN: diff -u diff-in.utf8 diff-in.bin && false || true
# RUN: diff -u diff-in.bin diff-in.utf8 && false || true

# Fail so lit will print output.
# RUN: false
Binary file not shown.
Binary file not shown.
3 changes: 3 additions & 0 deletions llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
foo
bar
baz
2 changes: 1 addition & 1 deletion llvm/utils/lit/tests/max-failures.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#
# END.

# CHECK: Failing Tests (30)
# CHECK: Failing Tests (31)
# CHECK: Failing Tests (1)
# CHECK: Failing Tests (2)
# CHECK: error: argument --max-failures: requires positive integer, but found '0'
54 changes: 53 additions & 1 deletion llvm/utils/lit/tests/shtest-shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,58 @@
# CHECK: error: command failed with exit status: 127
# CHECK: ***


# CHECK: FAIL: shtest-shell :: diff-encodings.txt
# CHECK: *** TEST 'shtest-shell :: diff-encodings.txt' FAILED ***

# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.bin"
# CHECK-NOT: error

# CHECK: $ "diff" "-u" "diff-in.utf16" "diff-in.bin"
# CHECK: # command output:
# CHECK-NEXT: ---
# CHECK-NEXT: +++
# CHECK-NEXT: @@
# CHECK-NEXT: {{^ .f.o.o.$}}
# CHECK-NEXT: {{^-.b.a.r.$}}
# CHECK-NEXT: {{^\+.b.a.r..}}
# CHECK-NEXT: {{^ .b.a.z.$}}
# CHECK: error: command failed with exit status: 1
# CHECK: $ "true"

# CHECK: $ "diff" "-u" "diff-in.utf8" "diff-in.bin"
# CHECK: # command output:
# CHECK-NEXT: ---
# CHECK-NEXT: +++
# CHECK-NEXT: @@
# CHECK-NEXT: -foo
# CHECK-NEXT: -bar
# CHECK-NEXT: -baz
# CHECK-NEXT: {{^\+.f.o.o.$}}
# CHECK-NEXT: {{^\+.b.a.r..}}
# CHECK-NEXT: {{^\+.b.a.z.$}}
# CHECK: error: command failed with exit status: 1
# CHECK: $ "true"

# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.utf8"
# CHECK: # command output:
# CHECK-NEXT: ---
# CHECK-NEXT: +++
# CHECK-NEXT: @@
# CHECK-NEXT: {{^\-.f.o.o.$}}
# CHECK-NEXT: {{^\-.b.a.r..}}
# CHECK-NEXT: {{^\-.b.a.z.$}}
# CHECK-NEXT: +foo
# CHECK-NEXT: +bar
# CHECK-NEXT: +baz
# CHECK: error: command failed with exit status: 1
# CHECK: $ "true"

# CHECK: $ "false"

# CHECK: ***


# CHECK: FAIL: shtest-shell :: diff-error-0.txt
# CHECK: *** TEST 'shtest-shell :: diff-error-0.txt' FAILED ***
# CHECK: $ "diff" "diff-error-0.txt" "diff-error-0.txt"
Expand Down Expand Up @@ -308,4 +360,4 @@
# CHECK: PASS: shtest-shell :: sequencing-0.txt
# CHECK: XFAIL: shtest-shell :: sequencing-1.txt
# CHECK: PASS: shtest-shell :: valid-shell.txt
# CHECK: Failing Tests (30)
# CHECK: Failing Tests (31)

0 comments on commit f095b8c

Please sign in to comment.