|
| 1 | +#!/usr/bin/env perl6 |
| 2 | +# Generates tests from GraphemeBreakPropertyTest.txt from UNIDATA |
| 3 | +use v6; |
| 4 | +sub MAIN ( Str $GrahemeBreakTest-file ) { |
| 5 | + my @text = $GrahemeBreakTest-file.IO.slurp.lines; |
| 6 | + my $line-no = 0; |
| 7 | + my @array; |
| 8 | + for @text -> $line { |
| 9 | + $line-no++; |
| 10 | + next if $line ~~ / ^ \s* '#' /; |
| 11 | + $line ~~ / ^ $<beginning>=(.*) '#' $<comment>=( .* ) $ /; |
| 12 | + my $comment = $<comment>.trim; |
| 13 | + my $beginning = $<beginning>.trim; |
| 14 | + # Remove the beginning and end, since we always break at start and end of string |
| 15 | + $beginning ~~ s/ ^ .*? ( <:AHex> .* <:AHex> ) .*? $ /$0/; |
| 16 | + # Remove all the 'break' symbols, we only care about where *not* to break |
| 17 | + $beginning ~~ s:g/'÷'//; |
| 18 | + my $no-break = 0; |
| 19 | + # CCount how many codepoints we are not supposed to break between |
| 20 | + while $beginning ~~ s/'×'// { |
| 21 | + $no-break++; |
| 22 | + } |
| 23 | + $beginning ~~ s:g/' '+/ /; |
| 24 | + my $string; |
| 25 | + my $uni-codes; |
| 26 | + my $fail= False; |
| 27 | + for $beginning.split(' ') -> $thing is copy { |
| 28 | + next if $thing eq ''; |
| 29 | + my $number = $thing.parse-base(16); |
| 30 | + # Private Use High Surrogate is probably not supposed to be used in UTF-8 |
| 31 | + if $number eq '55296' { |
| 32 | + $fail = True; |
| 33 | + last; |
| 34 | + } |
| 35 | + $string ~= $thing.parse-base(16).chr; |
| 36 | + $uni-codes ~= "0x$thing.parse-base(16).base(16), "; |
| 37 | + CATCH {$fail = True; last; } |
| 38 | + } |
| 39 | + next if $fail == True; |
| 40 | + $uni-codes ~~ s/ ', ' $ //; |
| 41 | + my $should-be; |
| 42 | + $should-be = $string.codes - $no-break; |
| 43 | + push @array, qq<is Uni.new($uni-codes).Str.chars, $should-be, "GraphemeBreakTest Line: $line-no Codes: $string.codes() Non-break: $no-break";>; |
| 44 | + } |
| 45 | + my $file = |
| 46 | + qq:to/END/; |
| 47 | + use v6; |
| 48 | + use Test; |
| 49 | + plan @array.elems(); |
| 50 | +
|
| 51 | + END |
| 52 | + for @array { |
| 53 | + $file ~= $_ ~ "\n"; |
| 54 | + } |
| 55 | + spurt "S15-nfg/GraphemeBreakPropertyTest.t", $file; |
| 56 | +} |
0 commit comments