WIP on generating hashes for .succ/.pred

rakudo · Mar 26, 2016 · 47d21b8 · 47d21b8
1 parent ccc38a0
commit 47d21b8
Showing 1 changed file with 178 additions and 0 deletions.
diff --git a/tools/build/makeMAGIC_INC_DEC.pl6 b/tools/build/makeMAGIC_INC_DEC.pl6
@@ -0,0 +1,178 @@
+# This script reads the Rakudo/Internals.pm file from STDIN, and generates
+# the necessary lookup hashes for making magic Str .succ / .pred work, and
+# writes it to STDOUT.
+
+use v6;
+use nqp;
+
+# general initializations
+my $generator = $*PROGRAM-NAME;
+my $generated = DateTime.now.gist.subst(/\.\d+/,'');
+my $start     = '#- start of generated part of succ/pred hashes';
+my $end       = '#- end of generated part of succ/pred hashes';
+
+# the ranges we consider magic wrt to .succ / .pred
+my @ranges =
+  "0".ord .. "9".ord,  # arabic digits
+  "A".ord .. "Z".ord,  # latin uppercase
+  "a".ord .. "z".ord,  # lating lowercase
+  0x00391 .. 0x003A9,  # greek uppercase
+  0x003B1 .. 0x003C9,  # greek lowercase
+  0x005D0 .. 0x005EA,  # hebrew
+  0x00410 .. 0x0042F,  # cyrillic uppercase
+  0x00430 .. 0x0044F,  # cyrillic lowercase
+  0x00660 .. 0x00669,  # arabic-indic digits
+  0x00966 .. 0x0096F,  # devanagari digits
+  0x009E6 .. 0x009EF,  # bengali digits
+  0x00A66 .. 0x00A6F,  # gurmukhi digits
+  0x00AE6 .. 0x00AEF,  # gujarati digits
+  0x00B66 .. 0x00B6F,  # oriya digits
+  0x02070 .. 0x02079,  # superscripts
+  0x02080 .. 0x02089,  # subscripts
+  0x02160 .. 0x0216b,  # clock roman uc
+  0x02170 .. 0x0217b,  # clock roman lc
+  0x02460 .. 0x02473,  # circled digits 1..20
+  0x02474 .. 0x02487,  # parenthesized digits 1..20
+  0x0249C .. 0x024B5,  # parenthesized latin lc
+  0x02581 .. 0x02588,  # lower blocks
+  0x02680 .. 0x02685,  # die faces
+  0x02776 .. 0x0277F,  # dingbat negative circled 1..10
+  0x0FF10 .. 0x0FF19,  # fullwidth digits
+  0x1F37A .. 0x1F37B,  # beer mugs
+  0x1F42A .. 0x1F42B,  # camels
+;
+
+# ranges that start with these, carry (aka "9".succ -> "10" instead of "00")
+my str $carrydigits = nqp::unbox_s((
+         '0',  # arabic
+   "\x00660",  # arabic-indic
+   "\x00966",  # devanagari
+   "\x009E6",  # bengali
+   "\x00A66",  # gurmukhi
+   "\x00AE6",  # gujarati
+   "\x00B66",  # oriya
+   "\x02070",  # superscripts XXX: should be treated as digit?
+   "\x02080",  # subscripts XXX: should be treated as digit?
+   "\x0FF10",  # fullwidth XXX: should be treated as digit?
+   "\x1F37A",  # beer mugs
+   "\x1F42A",  # camels
+).join);
+
+# for all the lines in the source that don't need special handling
+for $*IN.lines -> $line {
+
+    # nothing to do yet
+    unless $line.starts-with($start) {
+        say $line;
+        next;
+    }
+
+    # found header
+    say $start ~ " --------------------------------";
+    say "#- Generated on $generated by $generator";
+    say "#- PLEASE DON'T CHANGE ANYTHING BELOW THIS LINE";
+
+    # skip the old version of the code
+    for $*IN.lines -> $line {
+        last if $line.starts-with($end);
+    }
+
+    # generate the .succ hash
+    print Q:to/SOURCE/;
+    method INITIALIZE-SUCC(--> Nil) {
+        $succ := nqp::hash(
+SOURCE
+
+    print Q:to/SOURCE/;
+        );
+    }
+SOURCE
+
+    # generate the .pred hash
+    print Q:to/SOURCE/;
+    method INITIALIZE-PRED(--> Nil) {
+        $pred := nqp::hash(
+SOURCE
+
+    print Q:to/SOURCE/;
+        );
+    }
+SOURCE
+
+    # we're done for this role
+    say "#- PLEASE DON'T CHANGE ANYTHING ABOVE THIS LINE";
+    say $end ~ " ----------------------------------";
+}
+
+=finish
+
+# calculate the beginning and ending positions of <!after '.'><rangechar+>
+sub RANGEPOS(str $str, \pos, \end) {  # sadly, --> Nil doesn't work here
+    my int $pos = nqp::chars($str);
+    while $pos > 0 {
+        $pos = $pos - 1;
+        my str $ch = nqp::substr($str, $pos, 1);
+        if nqp::isge_i(nqp::index($RANGECHAR, $ch, 0), 0) {
+            my int $end = $pos;
+            while $pos > 0 {
+                $pos = $pos - 1;
+                $ch = nqp::substr($str, $pos, 1);
+                last if nqp::iseq_s($ch, '.');
+                unless nqp::isge_i(nqp::index($RANGECHAR, $ch, 0), 0) {
+                    pos = $pos + 1;
+                    end = $end;
+                    return;
+                }
+            }
+            unless nqp::iseq_s($ch, '.') {
+                pos = $pos;
+                end = $end;
+                return;
+            }
+        }
+    }
+    pos = 0;
+    end = -1;
+    return
+}
+
+method pred(Str:D:) {
+    my str $str = self;
+    RANGEPOS($str, my Int $Ir0, my Int $Ir1);
+    my int $r0 = $Ir0;
+    my int $r1 = $Ir1;
+    while $r1 >= $r0 {
+        my str $ch0  = nqp::substr($str, $r1, 1);
+        my int $ipos = nqp::index($RANGECHAR, $ch0);
+        $ipos = $RANGECHAR.index($ch0, $ipos+1) // $ipos;
+        my str $ch1 = nqp::substr($RANGECHAR, $ipos-1, 1);
+        $str = nqp::replace($str, $r1, 1, $ch1);
+        # return if no carry
+        return $str if $ch0 gt $ch1;
+        # carry to previous position
+        $r1 = $r1 - 1;
+    }
+    # cannot carry beyond first rangechar position
+    fail('Decrement out of range');
+}
+
+method succ(Str:D:) {
+    my str $str = self;
+    RANGEPOS($str, my Int $Ir0, my Int $Ir1);
+    my int $r0 = $Ir0;
+    my int $r1 = $Ir1;
+    while $r1 >= $r0 {
+        my str $ch0  = nqp::substr($str, $r1, 1);
+        my int $ipos = nqp::index($RANGECHAR, $ch0);
+        my str $ch1  = nqp::substr($RANGECHAR, $ipos+1, 1);
+        $str = nqp::replace($str, $r1, 1, $ch1);
+        return $str if $ch1 gt $ch0;
+        # carry to previous position
+        $r1 = $r1 - 1;
+        # extend string if carried past first rangechar position
+        $str = nqp::replace($str, $r0, 0,
+          nqp::ifnull(nqp::atkey($carrydigit,$ch1),$ch1))
+            if $r1 < $r0;
+    }
+    $str;
+}