-
-
Notifications
You must be signed in to change notification settings - Fork 373
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
WIP on generating hashes for .succ/.pred
- Loading branch information
Showing
1 changed file
with
178 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
# This script reads the Rakudo/Internals.pm file from STDIN, and generates | ||
# the necessary lookup hashes for making magic Str .succ / .pred work, and | ||
# writes it to STDOUT. | ||
|
||
use v6; | ||
use nqp; | ||
|
||
# general initializations | ||
my $generator = $*PROGRAM-NAME; | ||
my $generated = DateTime.now.gist.subst(/\.\d+/,''); | ||
my $start = '#- start of generated part of succ/pred hashes'; | ||
my $end = '#- end of generated part of succ/pred hashes'; | ||
|
||
# the ranges we consider magic wrt to .succ / .pred | ||
my @ranges = | ||
"0".ord .. "9".ord, # arabic digits | ||
"A".ord .. "Z".ord, # latin uppercase | ||
"a".ord .. "z".ord, # lating lowercase | ||
0x00391 .. 0x003A9, # greek uppercase | ||
0x003B1 .. 0x003C9, # greek lowercase | ||
0x005D0 .. 0x005EA, # hebrew | ||
0x00410 .. 0x0042F, # cyrillic uppercase | ||
0x00430 .. 0x0044F, # cyrillic lowercase | ||
0x00660 .. 0x00669, # arabic-indic digits | ||
0x00966 .. 0x0096F, # devanagari digits | ||
0x009E6 .. 0x009EF, # bengali digits | ||
0x00A66 .. 0x00A6F, # gurmukhi digits | ||
0x00AE6 .. 0x00AEF, # gujarati digits | ||
0x00B66 .. 0x00B6F, # oriya digits | ||
0x02070 .. 0x02079, # superscripts | ||
0x02080 .. 0x02089, # subscripts | ||
0x02160 .. 0x0216b, # clock roman uc | ||
0x02170 .. 0x0217b, # clock roman lc | ||
0x02460 .. 0x02473, # circled digits 1..20 | ||
0x02474 .. 0x02487, # parenthesized digits 1..20 | ||
0x0249C .. 0x024B5, # parenthesized latin lc | ||
0x02581 .. 0x02588, # lower blocks | ||
0x02680 .. 0x02685, # die faces | ||
0x02776 .. 0x0277F, # dingbat negative circled 1..10 | ||
0x0FF10 .. 0x0FF19, # fullwidth digits | ||
0x1F37A .. 0x1F37B, # beer mugs | ||
0x1F42A .. 0x1F42B, # camels | ||
; | ||
|
||
# ranges that start with these, carry (aka "9".succ -> "10" instead of "00") | ||
my str $carrydigits = nqp::unbox_s(( | ||
'0', # arabic | ||
"\x00660", # arabic-indic | ||
"\x00966", # devanagari | ||
"\x009E6", # bengali | ||
"\x00A66", # gurmukhi | ||
"\x00AE6", # gujarati | ||
"\x00B66", # oriya | ||
"\x02070", # superscripts XXX: should be treated as digit? | ||
"\x02080", # subscripts XXX: should be treated as digit? | ||
"\x0FF10", # fullwidth XXX: should be treated as digit? | ||
"\x1F37A", # beer mugs | ||
"\x1F42A", # camels | ||
).join); | ||
|
||
# for all the lines in the source that don't need special handling | ||
for $*IN.lines -> $line { | ||
|
||
# nothing to do yet | ||
unless $line.starts-with($start) { | ||
say $line; | ||
next; | ||
} | ||
|
||
# found header | ||
say $start ~ " --------------------------------"; | ||
say "#- Generated on $generated by $generator"; | ||
say "#- PLEASE DON'T CHANGE ANYTHING BELOW THIS LINE"; | ||
|
||
# skip the old version of the code | ||
for $*IN.lines -> $line { | ||
last if $line.starts-with($end); | ||
} | ||
|
||
# generate the .succ hash | ||
print Q:to/SOURCE/; | ||
method INITIALIZE-SUCC(--> Nil) { | ||
$succ := nqp::hash( | ||
SOURCE | ||
|
||
print Q:to/SOURCE/; | ||
); | ||
} | ||
SOURCE | ||
|
||
# generate the .pred hash | ||
print Q:to/SOURCE/; | ||
method INITIALIZE-PRED(--> Nil) { | ||
$pred := nqp::hash( | ||
SOURCE | ||
|
||
print Q:to/SOURCE/; | ||
); | ||
} | ||
SOURCE | ||
|
||
# we're done for this role | ||
say "#- PLEASE DON'T CHANGE ANYTHING ABOVE THIS LINE"; | ||
say $end ~ " ----------------------------------"; | ||
} | ||
|
||
=finish | ||
|
||
# calculate the beginning and ending positions of <!after '.'><rangechar+> | ||
sub RANGEPOS(str $str, \pos, \end) { # sadly, --> Nil doesn't work here | ||
my int $pos = nqp::chars($str); | ||
while $pos > 0 { | ||
$pos = $pos - 1; | ||
my str $ch = nqp::substr($str, $pos, 1); | ||
if nqp::isge_i(nqp::index($RANGECHAR, $ch, 0), 0) { | ||
my int $end = $pos; | ||
while $pos > 0 { | ||
$pos = $pos - 1; | ||
$ch = nqp::substr($str, $pos, 1); | ||
last if nqp::iseq_s($ch, '.'); | ||
unless nqp::isge_i(nqp::index($RANGECHAR, $ch, 0), 0) { | ||
pos = $pos + 1; | ||
end = $end; | ||
return; | ||
} | ||
} | ||
unless nqp::iseq_s($ch, '.') { | ||
pos = $pos; | ||
end = $end; | ||
return; | ||
} | ||
} | ||
} | ||
pos = 0; | ||
end = -1; | ||
return | ||
} | ||
|
||
method pred(Str:D:) { | ||
my str $str = self; | ||
RANGEPOS($str, my Int $Ir0, my Int $Ir1); | ||
my int $r0 = $Ir0; | ||
my int $r1 = $Ir1; | ||
while $r1 >= $r0 { | ||
my str $ch0 = nqp::substr($str, $r1, 1); | ||
my int $ipos = nqp::index($RANGECHAR, $ch0); | ||
$ipos = $RANGECHAR.index($ch0, $ipos+1) // $ipos; | ||
my str $ch1 = nqp::substr($RANGECHAR, $ipos-1, 1); | ||
$str = nqp::replace($str, $r1, 1, $ch1); | ||
# return if no carry | ||
return $str if $ch0 gt $ch1; | ||
# carry to previous position | ||
$r1 = $r1 - 1; | ||
} | ||
# cannot carry beyond first rangechar position | ||
fail('Decrement out of range'); | ||
} | ||
|
||
method succ(Str:D:) { | ||
my str $str = self; | ||
RANGEPOS($str, my Int $Ir0, my Int $Ir1); | ||
my int $r0 = $Ir0; | ||
my int $r1 = $Ir1; | ||
while $r1 >= $r0 { | ||
my str $ch0 = nqp::substr($str, $r1, 1); | ||
my int $ipos = nqp::index($RANGECHAR, $ch0); | ||
my str $ch1 = nqp::substr($RANGECHAR, $ipos+1, 1); | ||
$str = nqp::replace($str, $r1, 1, $ch1); | ||
return $str if $ch1 gt $ch0; | ||
# carry to previous position | ||
$r1 = $r1 - 1; | ||
# extend string if carried past first rangechar position | ||
$str = nqp::replace($str, $r0, 0, | ||
nqp::ifnull(nqp::atkey($carrydigit,$ch1),$ch1)) | ||
if $r1 < $r0; | ||
} | ||
$str; | ||
} |