Skip to content

Commit 40ff238

Browse files
committed
Add tests for Unicode literals
This adds two new test files, one for identifiers and one for numbers. The numbers one is fudged for rakudo for now, but the identifiers one already passes for rakudo :) .
1 parent 9df21c3 commit 40ff238

File tree

2 files changed

+113
-0
lines changed

2 files changed

+113
-0
lines changed

S15-literals/identifiers.t

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# S15-literals/identifiers.t --- tests Unicode (namely non-ASCII) identifiers
2+
3+
use Test;
4+
5+
plan 7;
6+
7+
# tests that the proper characters are supported
8+
eval-lives-ok('my $foo', "Handles ASCII identifier");
9+
eval-lives-ok('my $foo', "Handles non-ASCII identifier");
10+
eval-dies-ok('my $১০kinds', "Doesn't allow non-ASCII digits at start of identifier");
11+
12+
eval-dies-ok('my $̈a;', "Combining marks not allowed as first character of identifier");
13+
14+
my $ẛ̣ = 42; # LATIN SMALL LETTER LONG S WITH DOT ABOVE + COMBINING DOT BELOW
15+
16+
# this reference is spelled in source as LATIN SMALL LETTER LONG S + COMBINING
17+
# DOT ABOVE + COMBINING DOT BELOW
18+
is $ẛ̣, 42, "Identifiers are normalized";
19+
20+
# XXX it would be nice to test for NFG normalization, but since .name returns a
21+
# Str (which is in NFG), there's no way you can test through that. Test left
22+
# here, commented out, in case a way to test this does come up in the future.
23+
#is $ẛ̣.VAR.name.chars, 2, "Identifiers are normalized to NFG";
24+
25+
# these two tests make sure normalization goes source -> NFD -> NFC -> NFG, that
26+
# is no occurence of NFKD or NFKC in the process (note that the -> NFD step is a
27+
# part of normalizing to NFC, it's spelled out here just for clarity)
28+
29+
my $ = True;
30+
my $fi = False;
31+
32+
is $fi, False, "Identifiers are not put through compatability decomposition";
33+
is $, True, "Identifiers are not put through compatability decomposition";
34+
35+
done;

S15-literals/numbers.t

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# S15-literals/numbers.t --- test Unicode (namely non-ASCII) numerals
2+
3+
use Test;
4+
5+
plan 41;
6+
7+
# basic test of literals
8+
#?rakudo 2 skip "is not a valid number"
9+
is ໑໐, 10, "Can use non-ASCII numbers";
10+
is 10, ໑໐, "Can use non-ASCII numbers";
11+
12+
# expression tests
13+
#?rakudo 3 skip "is not a valid number"
14+
is ٢ * ٤٢, 84, "Non-ASCII numbers can be used in expressions";
15+
is 42 + ٤٢, 84, "Non-ASCII numbers can be mixed with ASCII numbers";
16+
is 42 * 2, ٨٤, "ASCII-only expression can be succesfully compared to non-ASCII number";
17+
18+
# mixed numbers
19+
#?rakudo 2 skip "is not a valid number"
20+
is ᱄2, 42, "Can mix scripts in one number";
21+
is 4᱂, 42, "Can mix scripts in one number";
22+
23+
# check that only Nd characters are allowed
24+
# XXX might want to check for a specific exception at some point?
25+
eval-dies-ok("say ↈ;", "Numerals in category 'Nl' not allowed as numeric literal");
26+
eval-dies-ok("say 𒐀", "Numerals in category 'Nl' not allowed as numeric literal, even if its value is an integer within the range 0..9");
27+
eval-dies-ok("say ፼", "Numerals in category 'No' not allowed as numeric literal");
28+
eval-dies-ok("say ⓿", "Numerals in category 'No' not allowed as numeric literal, even if its value is an integer within the range 0..9");
29+
30+
# other radices
31+
32+
# binary
33+
is 0b101010, 42, "ASCII binary literals work";
34+
#?rakudo 2 skip "Confused"
35+
is 0b༡༠༡༠༡༠, 42, "Non-ASCII binary literals work";
36+
is 0b༡༠༡010, 42, "Binary literals with a mixture of scripts work";
37+
eval-dies-ok("say 0b¹0", "Numerals in category 'No' can't be used in binary literals");
38+
eval-dies-ok("say 0b1〇", "Numerals in category 'Nl' can't be used in binary literals");
39+
40+
# octal
41+
is 0o755, 493, "ASCII octal literals work";
42+
#?rakudo 2 skip "Confused"
43+
is 0o᠗᠕᠕, 493, "Non-ASCII octal literals work";
44+
is 0o᠗5᠕, 493, "Octal literals with a mixture of scripts work";
45+
eval-dies-ok("say 0o7₅₅", "Numerals in category 'No' can't be used in octal literals");
46+
eval-dies-ok("say 0oⅦ55", "Numerals in category 'Nl' can't be used in octal literals");
47+
48+
# hexadecimal
49+
is 0x42, 66, "ASCII hexadecimal literals work";
50+
#?rakudo 2 skip "Confused"
51+
is 0x๔๒, 66, "Non-ASCII hexadecimal literals work";
52+
is 0x๔2, 66, "Hexadecimal literals with a mixture of scripts work";
53+
eval-dies-ok("say 0x④2", "Numerals in category 'No' can't be used in hexadecimal literals");
54+
eval-dies-ok("say 0x4〢", "Numerals in category 'Nl' can't be used in hexadecimal literals");
55+
56+
is 0xCAFE, 51966, "Uppercase ASCII letters work in hexadecimal literals";
57+
is 0xcafe, 51966, "Lowercase ASCII letters work in hexadecimal literals";
58+
#?rakudo 3 skip "Confused"
59+
is 0xCAFE, 51966, "Uppercase fullwidth letters work in hexadecimal literals";
60+
is 0xcafe, 51966, "Lowercase fullwidth letters work in hexadecimal literals";
61+
is 0xCaFe, 51966, "Valid Hex_Digit characters from different scripts can be mixed in hexadecimal literals";
62+
eval-dies-ok("say 0xΓαfe", "Can't use characters without true Hex_Digit properties in hexadecimal literals");
63+
eval-dies-ok("say 0xCAF⒕", "Numerals in category 'No' can't be used in hexadecimal literals");
64+
eval-dies-ok("say 0xC𐏓FE", "Numerals in category 'Nl' can't be used in hexadecimal literals");
65+
66+
# generic radices
67+
is :36<Unicodez>, 2402100600299, "ASCII letters work in general radix numbers";
68+
#?rakudo 2 skip "Malformed radix number"
69+
is :36<Unicodez>, 2402100600299, "Fullwidth letters work in general radix numbers";
70+
is :36<Unicodez>, 2402100600299, "Mixture of ASCII and fullwidth letters work in general radix numbers";
71+
eval-dies-ok("say :36<αω>", "Scripts without Hex_Digit characters not allowed in general radix numbers");
72+
73+
#?rakudo 1 skip "Malformed radix number"
74+
is :36<utf១៦>, 51760986, "Nd numerals can be used in general radix numbers";
75+
eval-dies-ok("say :36<utfⅧ>", "Nl numerals are not allowed in general radix numbers");
76+
eval-dies-ok("say :36<utf㉜>", "No numerals are not allowed in general radix numbers");
77+
78+
done;

0 commit comments

Comments
 (0)