Skip to content

Commit

Permalink
Warn if a file contains UTF-8 and Latin-1
Browse files Browse the repository at this point in the history
Add a new warning non-ascii-utf8 displayed only if the non-ascii
attribute is specified and UTF-8 characters were ignored in the
copyright or authors lines in the header.
  • Loading branch information
dra27 committed Oct 25, 2017
1 parent 49723e5 commit bfff8f9
Showing 1 changed file with 19 additions and 2 deletions.
21 changes: 19 additions & 2 deletions tools/check-typo
Expand Up @@ -162,10 +162,13 @@ IGNORE_DIRS="
(cat "$f" | tr -d '\r'; echo) \
| awk -v rules="$rules" -v svnrules="$svnrules" -v file="$f" \
'
function is_err(name) {
return (("," rules svnrules ",") !~ ("[, ]" name "[, ]"));
}
function err(name, msg) {
++ counts[name];
if (("," rules svnrules ",") !~ ("[, ]" name "[, ]") \
&& counts[name] <= 10){
if (is_err(name) && counts[name] <= 10){
printf ("%s:%d.%d:", file, NR, RSTART + RLENGTH);
printf (" [%s] %s\n", name, msg);
got_errors = 1;
Expand Down Expand Up @@ -207,6 +210,10 @@ IGNORE_DIRS="
match($0, /[\200-\377]/) \
&& state != "authors" && state != "copyright" {
err("non-ascii", "non-ASCII character(s)");
if (header_utf8 && !is_err("non-ascii")) {
err("non-ascii-utf8", \
"non-ASCII character(s) AND UTF-8 encountered");
}
}
match($0, /[^\t\200-\377 -~]/) {
Expand Down Expand Up @@ -237,6 +244,16 @@ IGNORE_DIRS="
err("very-long-line", "line is over 132 columns");
}
# Record that the header contained UTF-8 sequences
match($0, /[\300-\367][\200-\277]+/) \
&& (state == "authors" || state == "copyright") {
header_utf8 = 1;
if (counts["non-ascii"] > 0 && is_err("non-ascii")) {
err("non-ascii-utf8", \
"non-ASCII character(s) AND UTF-8 encountered");
}
}
# Header-recognition automaton. Read this from bottom to top.
# Valid UTF-8 chars are recognised in copyright and authors
# TODO: ensure all files are valid UTF-8 before awking them.
Expand Down

0 comments on commit bfff8f9

Please sign in to comment.