Skip to content

Commit 2a806dd

Browse files
committed
Add new test generated from GraphemeBreakPropertyTest Unicode 9.0
Add the script used to generate it as well.
1 parent 671355e commit 2a806dd

File tree

2 files changed

+858
-0
lines changed

2 files changed

+858
-0
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env perl6
2+
# Generates tests from GraphemeBreakPropertyTest.txt from UNIDATA
3+
use v6;
4+
sub MAIN ( Str $GrahemeBreakTest-file ) {
5+
my @text = $GrahemeBreakTest-file.IO.slurp.lines;
6+
my $line-no = 0;
7+
my @array;
8+
for @text -> $line {
9+
$line-no++;
10+
next if $line ~~ / ^ \s* '#' /;
11+
$line ~~ / ^ $<beginning>=(.*) '#' $<comment>=( .* ) $ /;
12+
my $comment = $<comment>.trim;
13+
my $beginning = $<beginning>.trim;
14+
# Remove the beginning and end, since we always break at start and end of string
15+
$beginning ~~ s/ ^ .*? ( <:AHex> .* <:AHex> ) .*? $ /$0/;
16+
# Remove all the 'break' symbols, we only care about where *not* to break
17+
$beginning ~~ s:g/'÷'//;
18+
my $no-break = 0;
19+
# CCount how many codepoints we are not supposed to break between
20+
while $beginning ~~ s/'×'// {
21+
$no-break++;
22+
}
23+
$beginning ~~ s:g/' '+/ /;
24+
my $string;
25+
my $uni-codes;
26+
my $fail= False;
27+
for $beginning.split(' ') -> $thing is copy {
28+
next if $thing eq '';
29+
my $number = $thing.parse-base(16);
30+
# Private Use High Surrogate is probably not supposed to be used in UTF-8
31+
if $number eq '55296' {
32+
$fail = True;
33+
last;
34+
}
35+
$string ~= $thing.parse-base(16).chr;
36+
$uni-codes ~= "0x$thing.parse-base(16).base(16), ";
37+
CATCH {$fail = True; last; }
38+
}
39+
next if $fail == True;
40+
$uni-codes ~~ s/ ', ' $ //;
41+
my $should-be;
42+
$should-be = $string.codes - $no-break;
43+
push @array, qq<is Uni.new($uni-codes).Str.chars, $should-be, "GraphemeBreakTest Line: $line-no Codes: $string.codes() Non-break: $no-break";>;
44+
}
45+
my $file =
46+
qq:to/END/;
47+
use v6;
48+
use Test;
49+
plan @array.elems();
50+
51+
END
52+
for @array {
53+
$file ~= $_ ~ "\n";
54+
}
55+
spurt "S15-nfg/GraphemeBreakPropertyTest.t", $file;
56+
}

0 commit comments

Comments
 (0)