Skip to content

Commit

Permalink
Add iswpunct(3) breakage workaround for Android
Browse files Browse the repository at this point in the history
On Android 14.0 with Termux 0.118.0, one regression test failure
remains, in src/cmd/ksh93/tests/sh_match.sh: the '$' character does
not match the [[:punct:]] character class, which is incorrect.

This character class queries the system's iswpunct(3) function via
src/lib/libast/regex/regclass.c, even for plain single-byte ASCII
characters; it should be equivalent to ispunct(3) in that case.

Turns out iswpunct(3) is simply broken on Android. Nine of the
expected ASCII characters are not recognised, whereas ispunct(3) is
fine. A test program shows:

    ispunct('!')==16; iswpunct('!')==1
    ispunct('"')==16; iswpunct('"')==1
    ispunct('#')==16; iswpunct('#')==1
    ispunct('$')==16; iswpunct('$')==0
    ispunct('%')==16; iswpunct('%')==1
    ispunct('&')==16; iswpunct('&')==1
    ispunct(''')==16; iswpunct(''')==1
    ispunct('(')==16; iswpunct('(')==1
    ispunct(')')==16; iswpunct(')')==1
    ispunct('*')==16; iswpunct('*')==1
    ispunct('+')==16; iswpunct('+')==0
    ispunct(',')==16; iswpunct(',')==1
    ispunct('-')==16; iswpunct('-')==1
    ispunct('.')==16; iswpunct('.')==1
    ispunct('/')==16; iswpunct('/')==1
    ispunct(':')==16; iswpunct(':')==1
    ispunct(';')==16; iswpunct(';')==1
    ispunct('<')==16; iswpunct('<')==0
    ispunct('=')==16; iswpunct('=')==0
    ispunct('>')==16; iswpunct('>')==0
    ispunct('?')==16; iswpunct('?')==1
    ispunct('@')==16; iswpunct('@')==1
    ispunct('[')==16; iswpunct('[')==1
    ispunct('\')==16; iswpunct('\')==1
    ispunct(']')==16; iswpunct(']')==1
    ispunct('^')==16; iswpunct('^')==0
    ispunct('_')==16; iswpunct('_')==1
    ispunct('`')==16; iswpunct('`')==0
    ispunct('{')==16; iswpunct('{')==1
    ispunct('|')==16; iswpunct('|')==0
    ispunct('}')==16; iswpunct('}')==1
    ispunct('~')==16; iswpunct('~')==0

It's broken for multibyte UTF-8 characters as well; at least £ and
€ are not recognised as punctuation. But there's nothing we can
realistically do about that. But we can at least fix the ASCII
characters as those are important for portability.
  • Loading branch information
McDutchie committed Mar 22, 2024
1 parent 0af4dc9 commit abba180
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 3 deletions.
5 changes: 4 additions & 1 deletion src/cmd/ksh93/tests/sh_match.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,10 @@
[[ $'\cg' =~ [[:cntrl:]] ]] || err_exit 'pattern [[:cntrl:]] broken'
[[ \$ =~ [[:graph:]] ]] || err_exit 'pattern [[:graph:]] broken'
[[ ' ' =~ [[:graph:]] ]] && err_exit 'pattern [[:graph:]] broken'
[[ \$ =~ [[:punct:]] ]] || err_exit 'pattern [[:punct:]] broken'
for c in '!' '"' '#' '$' '%' '&' \' '(' ')' '*' '+' ',' '-' '.' '/' ':' ';' \
'<' '=' '>' '?' '@' '[' '\\' ']' '^' '_' '`' '{' '|' '}' '~'
do [[ $c =~ [[:punct:]] ]] || err_exit "pattern [[:punct:]] broken for $c"
done
[[ / =~ [[:punct:]] ]] || err_exit 'pattern [[:punct:]] broken'
[[ ' ' =~ [[:punct:]] ]] && err_exit 'pattern [[:punct:]] broken'
[[ x =~ [[:punct:]] ]] && err_exit 'pattern [[:punct:]] broken'
Expand Down
16 changes: 16 additions & 0 deletions src/lib/libast/features/wchar
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,22 @@ if hdr - wctype wchar.h
endif
endif

tst iswpunct_broken note{ is iswpunct(3) broken }end execute{
/* on Android 14.0, it is: it does not recognise some of the ASCII characters. ispunct(3) is fine */
#include <ctype.h>
#include <wctype.h>
int main(void)
{
char c[] = { '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';',
'<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~', '\0' };
int i;
for (i = 0; c[i]; i++)
if (ispunct(c[i]) && !iswpunct(c[i]))
return 0;
return 1;
}
}end

run{
cat <<!
#ifndef WEOF
Expand Down
11 changes: 9 additions & 2 deletions src/lib/libast/regex/regclass.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* *
* This software is part of the ast package *
* Copyright (c) 1985-2011 AT&T Intellectual Property *
* Copyright (c) 2020-2023 Contributors to ksh 93u+m *
* Copyright (c) 2020-2024 Contributors to ksh 93u+m *
* and is licensed under the *
* Eclipse Public License, Version 2.0 *
* *
Expand Down Expand Up @@ -71,7 +71,14 @@ static int Notdigit(int c) { return !iswdigit(c); }
static int Isgraph(int c) { return iswgraph(c); }
static int Islower(int c) { return iswlower(c); }
static int Isprint(int c) { return iswprint(c); }
static int Ispunct(int c) { return iswpunct(c); }
static int Ispunct(int c)
{
#if _iswpunct_broken && CC_NATIVE == CC_ASCII
if (c < 128)
return ispunct(c);
#endif
return iswpunct(c);
}
static int Isspace(int c) { return iswspace(c); }
static int Notspace(int c) { return !iswspace(c); }
static int Isupper(int c) { return iswupper(c); }
Expand Down

0 comments on commit abba180

Please sign in to comment.