From a2d376ec1202e86a2250657f9c5eeeb55ce1cfeb Mon Sep 17 00:00:00 2001 From: Stanislav Pankevich Date: Wed, 4 Dec 2019 21:24:22 +0100 Subject: [PATCH 1/2] Introduce main() function --- filecheck/FileCheck.py | 555 +++++++++++++++++++++-------------------- 1 file changed, 280 insertions(+), 275 deletions(-) diff --git a/filecheck/FileCheck.py b/filecheck/FileCheck.py index 73121e3..389e630 100755 --- a/filecheck/FileCheck.py +++ b/filecheck/FileCheck.py @@ -57,7 +57,7 @@ def escape_non_regex_or_skip(match_obj): return match_obj.group() -def escape_non_regex_parts(string): +def escape_non_regex_parts(check_expression): regex_line = re.sub(r"((?P[^\{{2}]+)|(?P\{\{(.*?)\}\}))", escape_non_regex_or_skip, check_expression) @@ -84,307 +84,234 @@ def dump_check(check): print("\tstart_index: {}".format(check.start_index)) -# FileCheck always prints its first argument. -filecheck_path = sys.argv[0] -if os.path.exists(filecheck_path): - filecheck_path = os.path.abspath(filecheck_path) +def main(): + # FileCheck always prints its first argument. + filecheck_path = sys.argv[0] + if os.path.exists(filecheck_path): + filecheck_path = os.path.abspath(filecheck_path) -print(filecheck_path) + print(filecheck_path) -if len(sys.argv) == 1: - print(" not specified") - exit(2) + if len(sys.argv) == 1: + print(" not specified") + exit(2) -for arg in sys.argv: - if arg == '--help': - print_help() - exit(0) + for arg in sys.argv: + if arg == '--help': + print_help() + exit(0) -check_file = sys.argv[1] -if not os.path.isfile(check_file): - sys.stdout.flush() - err = "Could not open check file '{}': No such file or directory".format(check_file) - print(err) - exit(2) + check_file = sys.argv[1] + if not os.path.isfile(check_file): + sys.stdout.flush() + err = "Could not open check file '{}': No such file or directory".format(check_file) + print(err) + exit(2) -if os.path.getsize(check_file) == 0: - sys.stdout.flush() - print("error: no check strings found with prefix 'CHECK:'", file=sys.stderr) - exit(2) + if os.path.getsize(check_file) == 0: + sys.stdout.flush() + print("error: no check strings found with prefix 'CHECK:'", file=sys.stderr) + exit(2) -parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser() -parser.add_argument('check_file_arg', type=str, help='TODO') -parser.add_argument('--strict-whitespace', action='store_true', help='TODO') -parser.add_argument('--match-full-lines', action='store_true', help='TODO') -parser.add_argument('--check-prefix', action='store', help='TODO') + parser.add_argument('check_file_arg', type=str, help='TODO') + parser.add_argument('--strict-whitespace', action='store_true', help='TODO') + parser.add_argument('--match-full-lines', action='store_true', help='TODO') + parser.add_argument('--check-prefix', action='store', help='TODO') -args = parser.parse_args() + args = parser.parse_args() -check_prefix = args.check_prefix if args.check_prefix else "CHECK" + check_prefix = args.check_prefix if args.check_prefix else "CHECK" -if not re.search('^[A-Za-z][A-Za-z0-9-_]+$', check_prefix): - sys.stdout.flush() - error_message = "Supplied check-prefix is invalid! Prefixes must be unique and start with a letter and contain only alphanumeric characters, hyphens and underscores" - print(error_message, file=sys.stderr) - exit(2) + if not re.search('^[A-Za-z][A-Za-z0-9-_]+$', check_prefix): + sys.stdout.flush() + error_message = "Supplied check-prefix is invalid! Prefixes must be unique and start with a letter and contain only alphanumeric characters, hyphens and underscores" + print(error_message, file=sys.stderr) + exit(2) -checks = [] -with open(check_file) as f: - for line_idx, line in enumerate(f): - line = line.rstrip() + checks = [] + with open(check_file) as f: + for line_idx, line in enumerate(f): + line = line.rstrip() - if not args.strict_whitespace: - line = canonicalize_whitespace(line) + if not args.strict_whitespace: + line = canonicalize_whitespace(line) - # CHECK and CHECK-NEXT - strict_whitespace_match = "" if args.strict_whitespace and args.match_full_lines else " ?" + # CHECK and CHECK-NEXT + strict_whitespace_match = "" if args.strict_whitespace and args.match_full_lines else " ?" - check_regex = "; {}:{}(.*)".format(check_prefix, strict_whitespace_match) - check_match = re.search(check_regex, line) - check_type = CheckType.CHECK - if not check_match: - check_regex = "; {}-NEXT:{}(.*)".format(check_prefix, strict_whitespace_match) + check_regex = "; {}:{}(.*)".format(check_prefix, strict_whitespace_match) check_match = re.search(check_regex, line) - check_type = CheckType.CHECK_NEXT - - if check_match: - check_expression = check_match.group(1) - - match_type = MatchType.SUBSTRING - - if re.search(r"\{\{.*\}\}", check_expression): - regex_line = escape_non_regex_parts(check_expression) - regex_line = re.sub(r"\{\{(.*?)\}\}", r"\1", regex_line) - match_type = MatchType.REGEX - check_expression = regex_line - - check = Check(check_type=check_type, - match_type=match_type, - expression=check_expression, - source_line=line, - check_line_idx=line_idx, - start_index=check_match.start(1)) - - checks.append(check) - continue - - check_not_regex = "; {}-NOT: (.*)".format(check_prefix) - check_match = re.search(check_not_regex, line) - if check_match: - match_type = MatchType.SUBSTRING - - check_expression = check_match.group(1) - - if re.search(r"\{\{.*\}\}", check_expression): - regex_line = escape_non_regex_parts(check_expression) - regex_line = re.sub(r"\{\{(.*?)\}\}", r"\1", regex_line) - match_type = MatchType.REGEX - check_expression = regex_line - - check = Check(check_type=CheckType.CHECK_NOT, - match_type=match_type, - expression=check_expression, - source_line=line, - check_line_idx=line_idx, - start_index=check_match.start(1)) - - checks.append(check) - continue - - check_empty_regex = "; {}-EMPTY:".format(check_prefix) - check_match = re.search(check_empty_regex, line) - if check_match: - check = Check(check_type=CheckType.CHECK_EMPTY, - match_type=MatchType.SUBSTRING, - expression=None, - source_line=line, - check_line_idx=line_idx, - start_index=-1) - - if len(checks) == 0: - print("{}:{}:{}: error: found 'CHECK-EMPTY' without previous 'CHECK: line".format(check_file, 1, 3)) - print(line) - print(" ^") - exit(2) - - checks.append(check) - continue - -check_iterator = iter(checks) - -current_check = None -try: - current_check = next(check_iterator) -except StopIteration: - error_message = "error: no check strings found with prefix '{}:'".format(check_prefix) - print(error_message, file=sys.stderr) - sys.stdout.flush() - exit(2) - -line_counter = 0 - -input_lines = [] - -current_scan_base = 0 - -stdin_input_iter = enumerate(sys.stdin) -for line_idx, line in stdin_input_iter: - line = line.rstrip() - if not args.strict_whitespace: - line = canonicalize_whitespace(line) - - input_lines.append(line) - - line_counter = line_counter + 1 - - if current_check.check_type == CheckType.CHECK_EMPTY: - if line != '': - assert 0, "Not implemented" - - elif current_check.check_type == CheckType.CHECK: - if current_check.match_type == MatchType.SUBSTRING: - if args.match_full_lines: - if current_check.expression != line: - continue - else: - if current_check.expression not in line: - continue - - elif current_check.match_type == MatchType.REGEX: - if not re.search(current_check.expression, line): + check_type = CheckType.CHECK + if not check_match: + check_regex = "; {}-NEXT:{}(.*)".format(check_prefix, strict_whitespace_match) + check_match = re.search(check_regex, line) + check_type = CheckType.CHECK_NEXT + + if check_match: + check_expression = check_match.group(1) + + match_type = MatchType.SUBSTRING + + if re.search(r"\{\{.*\}\}", check_expression): + regex_line = escape_non_regex_parts(check_expression) + regex_line = re.sub(r"\{\{(.*?)\}\}", r"\1", regex_line) + match_type = MatchType.REGEX + check_expression = regex_line + + check = Check(check_type=check_type, + match_type=match_type, + expression=check_expression, + source_line=line, + check_line_idx=line_idx, + start_index=check_match.start(1)) + + checks.append(check) continue - elif current_check.check_type == CheckType.CHECK_NEXT: - if current_check.match_type == MatchType.SUBSTRING: - if not args.strict_whitespace: - line = re.sub("\\s+", ' ', line).strip() + check_not_regex = "; {}-NOT: (.*)".format(check_prefix) + check_match = re.search(check_not_regex, line) + if check_match: + match_type = MatchType.SUBSTRING - if args.match_full_lines: - if current_check.expression != line: - break - else: - if current_check.expression not in line: - break + check_expression = check_match.group(1) - elif current_check.match_type == MatchType.REGEX: - if not re.search(current_check.expression, line): - break + if re.search(r"\{\{.*\}\}", check_expression): + regex_line = escape_non_regex_parts(check_expression) + regex_line = re.sub(r"\{\{(.*?)\}\}", r"\1", regex_line) + match_type = MatchType.REGEX + check_expression = regex_line - elif current_check.check_type == CheckType.CHECK_NOT: - if current_check.match_type == MatchType.SUBSTRING: - if not args.strict_whitespace: - line = re.sub("\\s+", ' ', line).strip() + check = Check(check_type=CheckType.CHECK_NOT, + match_type=match_type, + expression=check_expression, + source_line=line, + check_line_idx=line_idx, + start_index=check_match.start(1)) - if current_check.expression in line: - break + checks.append(check) + continue - elif current_check.match_type == MatchType.REGEX: - if re.search(current_check.expression, line): - break + check_empty_regex = "; {}-EMPTY:".format(check_prefix) + check_match = re.search(check_empty_regex, line) + if check_match: + check = Check(check_type=CheckType.CHECK_EMPTY, + match_type=MatchType.SUBSTRING, + expression=None, + source_line=line, + check_line_idx=line_idx, + start_index=-1) + + if len(checks) == 0: + print("{}:{}:{}: error: found 'CHECK-EMPTY' without previous 'CHECK: line".format(check_file, 1, 3)) + print(line) + print(" ^") + exit(2) + + checks.append(check) + continue + check_iterator = iter(checks) + + current_check = None try: current_check = next(check_iterator) - current_scan_base = line_idx + 1 except StopIteration: - exit(0) + error_message = "error: no check strings found with prefix '{}:'".format(check_prefix) + print(error_message, file=sys.stderr) + sys.stdout.flush() + exit(2) -if line_counter == 0: - print("CHECK: FileCheck error: '-' is empty.") - print("FileCheck command line: {}".format(check_file)) - exit(2) + line_counter = 0 -if current_check.check_type == CheckType.CHECK_EMPTY: - exit(0) - -if current_check.check_type == CheckType.CHECK: - if current_check.match_type == MatchType.SUBSTRING: - last_read_line = input_lines[current_scan_base] + input_lines = [] - candidate_line = None - current_best_ratio = 0 - for read_line in input_lines[current_scan_base:]: - similar_ratio = similar(last_read_line, current_check.expression) - if current_best_ratio < similar_ratio: - candidate_line = read_line - current_best_ratio = similar_ratio - assert candidate_line - - print("{}:{}:{}: error: CHECK: expected string not found in input" - .format(check_file, - current_check.check_line_idx + 1, - current_check.start_index + 1)) - - print(current_check.source_line.rstrip()) - print("^".rjust(current_check.start_index + 1)) - print(":{}:{}: note: scanning from here".format(current_scan_base + 1, 1)) - print(last_read_line) - print("^") - - caret_pos = len(candidate_line) // 2 + 1 - print(":{}:{}: note: possible intended match here".format(current_scan_base + 1, caret_pos)) - print(candidate_line) - print("^".rjust(caret_pos, ' ')) - exit(1) - - if current_check.match_type == MatchType.REGEX: - print("{}:{}:{}: error: CHECK: expected string not found in input" - .format(check_file, - current_check.check_line_idx + 1, - current_check.start_index + 1)) - - print(current_check.source_line.rstrip()) - print("^".rjust(current_check.start_index + 1)) - print(":{}:{}: note: scanning from here".format(current_scan_base + 1, 1)) - print(line) - print("^") - exit(1) - -if current_check.check_type == CheckType.CHECK_NOT: - if (current_check.match_type == MatchType.SUBSTRING or - current_check.match_type == MatchType.REGEX): - last_read_line = input_lines[-1] + current_scan_base = 0 + stdin_input_iter = enumerate(sys.stdin) + for line_idx, line in stdin_input_iter: + line = line.rstrip() if not args.strict_whitespace: - last_read_line = re.sub("\\s+", ' ', last_read_line).strip() + line = canonicalize_whitespace(line) - print("{}:{}:{}: error: CHECK-NOT: excluded string found in input" - .format(check_file, - current_check.check_line_idx + 1, - current_check.start_index + 1)) + input_lines.append(line) - print(current_check.source_line.rstrip()) - print("^".rjust(current_check.start_index + 1)) - print(":{}:{}: note: found here".format(current_scan_base + 1, 1)) - print(last_read_line) + line_counter = line_counter + 1 - if current_check.match_type == MatchType.SUBSTRING: - match_pos = last_read_line.find(current_check.expression) - assert match_pos != -1 + if current_check.check_type == CheckType.CHECK_EMPTY: + if line != '': + assert 0, "Not implemented" - highlight_line = "^".rjust(match_pos, ' ') - print("^".ljust(len(current_check.expression), '~')) - else: - print("^".ljust(len(last_read_line), '~')) + elif current_check.check_type == CheckType.CHECK: + if current_check.match_type == MatchType.SUBSTRING: + if args.match_full_lines: + if current_check.expression != line: + continue + else: + if current_check.expression not in line: + continue - exit(1) + elif current_check.match_type == MatchType.REGEX: + if not re.search(current_check.expression, line): + continue - assert 0, "Not implemented" + elif current_check.check_type == CheckType.CHECK_NEXT: + if current_check.match_type == MatchType.SUBSTRING: + if not args.strict_whitespace: + line = re.sub("\\s+", ' ', line).strip() -if current_check.check_type == CheckType.CHECK_NEXT: - last_read_line = input_lines[current_scan_base] + if args.match_full_lines: + if current_check.expression != line: + break + else: + if current_check.expression not in line: + break - if current_check.match_type == MatchType.SUBSTRING: - matching_line_idx = -1 - for line_idx, line in stdin_input_iter: - line = line.rstrip() - input_lines.append(line) + elif current_check.match_type == MatchType.REGEX: + if not re.search(current_check.expression, line): + break + + elif current_check.check_type == CheckType.CHECK_NOT: + if current_check.match_type == MatchType.SUBSTRING: + if not args.strict_whitespace: + line = re.sub("\\s+", ' ', line).strip() + + if current_check.expression in line: + break - if current_check.expression in line: - matching_line_idx = line_idx + elif current_check.match_type == MatchType.REGEX: + if re.search(current_check.expression, line): + break + + try: + current_check = next(check_iterator) + current_scan_base = line_idx + 1 + except StopIteration: + exit(0) + + if line_counter == 0: + print("CHECK: FileCheck error: '-' is empty.") + print("FileCheck command line: {}".format(check_file)) + exit(2) - if matching_line_idx == -1: - print("{}:{}:{}: error: CHECK-NEXT: expected string not found in input" + if current_check.check_type == CheckType.CHECK_EMPTY: + exit(0) + + if current_check.check_type == CheckType.CHECK: + if current_check.match_type == MatchType.SUBSTRING: + last_read_line = input_lines[current_scan_base] + + candidate_line = None + current_best_ratio = 0 + for read_line in input_lines[current_scan_base:]: + similar_ratio = similar(last_read_line, current_check.expression) + if current_best_ratio < similar_ratio: + candidate_line = read_line + current_best_ratio = similar_ratio + assert candidate_line + + print("{}:{}:{}: error: CHECK: expected string not found in input" .format(check_file, current_check.check_line_idx + 1, current_check.start_index + 1)) @@ -395,30 +322,108 @@ def dump_check(check): print(last_read_line) print("^") + caret_pos = len(candidate_line) // 2 + 1 + print(":{}:{}: note: possible intended match here".format(current_scan_base + 1, caret_pos)) + print(candidate_line) + print("^".rjust(caret_pos, ' ')) exit(1) - else: - assert current_scan_base > 0 - previous_matched_line = input_lines[current_scan_base - 1] - print("{}:{}:{}: error: CHECK-NEXT: is not on the line after the previous match" + if current_check.match_type == MatchType.REGEX: + print("{}:{}:{}: error: CHECK: expected string not found in input" .format(check_file, current_check.check_line_idx + 1, current_check.start_index + 1)) + print(current_check.source_line.rstrip()) print("^".rjust(current_check.start_index + 1)) - - matching_line = input_lines[matching_line_idx] - print(":{}:1: note: 'next' match was here".format(matching_line_idx + 1)) - print(matching_line) + print(":{}:{}: note: scanning from here".format(current_scan_base + 1, 1)) + print(line) print("^") + exit(1) + + if current_check.check_type == CheckType.CHECK_NOT: + if (current_check.match_type == MatchType.SUBSTRING or + current_check.match_type == MatchType.REGEX): + last_read_line = input_lines[-1] + + if not args.strict_whitespace: + last_read_line = re.sub("\\s+", ' ', last_read_line).strip() + + print("{}:{}:{}: error: CHECK-NOT: excluded string found in input" + .format(check_file, + current_check.check_line_idx + 1, + current_check.start_index + 1)) - print(":{}:{}: note: previous match ended here".format(current_scan_base, len(previous_matched_line) + 1)) - print(previous_matched_line) - print("^".rjust(len(previous_matched_line) + 1)) - print(":{}:{}: note: non-matching line after previous match is here".format(current_scan_base + 1, 1)) + print(current_check.source_line.rstrip()) + print("^".rjust(current_check.start_index + 1)) + print(":{}:{}: note: found here".format(current_scan_base + 1, 1)) print(last_read_line) - print("^") + + if current_check.match_type == MatchType.SUBSTRING: + match_pos = last_read_line.find(current_check.expression) + assert match_pos != -1 + + highlight_line = "^".rjust(match_pos, ' ') + print("^".ljust(len(current_check.expression), '~')) + else: + print("^".ljust(len(last_read_line), '~')) exit(1) - assert 0, "Not implemented" + assert 0, "Not implemented" + + if current_check.check_type == CheckType.CHECK_NEXT: + last_read_line = input_lines[current_scan_base] + + if current_check.match_type == MatchType.SUBSTRING: + matching_line_idx = -1 + for line_idx, line in stdin_input_iter: + line = line.rstrip() + input_lines.append(line) + + if current_check.expression in line: + matching_line_idx = line_idx + + if matching_line_idx == -1: + print("{}:{}:{}: error: CHECK-NEXT: expected string not found in input" + .format(check_file, + current_check.check_line_idx + 1, + current_check.start_index + 1)) + + print(current_check.source_line.rstrip()) + print("^".rjust(current_check.start_index + 1)) + print(":{}:{}: note: scanning from here".format(current_scan_base + 1, 1)) + print(last_read_line) + print("^") + + exit(1) + else: + assert current_scan_base > 0 + previous_matched_line = input_lines[current_scan_base - 1] + + print("{}:{}:{}: error: CHECK-NEXT: is not on the line after the previous match" + .format(check_file, + current_check.check_line_idx + 1, + current_check.start_index + 1)) + print(current_check.source_line.rstrip()) + print("^".rjust(current_check.start_index + 1)) + + matching_line = input_lines[matching_line_idx] + print(":{}:1: note: 'next' match was here".format(matching_line_idx + 1)) + print(matching_line) + print("^") + + print(":{}:{}: note: previous match ended here".format(current_scan_base, len(previous_matched_line) + 1)) + print(previous_matched_line) + print("^".rjust(len(previous_matched_line) + 1)) + print(":{}:{}: note: non-matching line after previous match is here".format(current_scan_base + 1, 1)) + print(last_read_line) + print("^") + + exit(1) + + assert 0, "Not implemented" + + +if __name__ == "__main__": + main() From e5fb40480c1aa72343bd346d413632739c5f1da5 Mon Sep 17 00:00:00 2001 From: Stanislav Pankevich Date: Wed, 4 Dec 2019 21:25:55 +0100 Subject: [PATCH 2/2] Bump version to 0.0.2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 82422db..29586fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "filecheck" -version = "0.0.1" +version = "0.0.2" description = "Python port of LLVM's FileCheck, flexible pattern matching file verifier" authors = ["Stanislav Pankevich "]