Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Fix \w handling, update <<>> to use new \w, add a bunch of predefined…

… rules like <punct>
  • Loading branch information...
commit b3fda07868e3b3b626d0f65ccecf7e2a28683bb0 1 parent cf4faee
Stefan O'Rear authored
3  docs/TODO.S05
View
@@ -42,8 +42,7 @@ LOL: feature requires slice context mappings.
(1744) No <*...> DISCUSS
(1804) No <~~> DISCUSS
(1843) No <|g> etc
-(1893) Missing predefined rules include upper, lower, digit, xdigit, print,
- graph, cntrl, punct, alnum, wb, ww, space, blank
+(1893) No <ww> or <wb>
(2246) regex :ratchet syntax doesn't work DISCUSS
(2342) ::> is defined in terms of brackets, not temporal alternation
(2380) No <commit> or <cut>
4 docs/TODO.UTS18
View
@@ -33,9 +33,9 @@ aliases; property names are also loose-matched. Script and General_Category
support use of their values as pseudo-Boolean properties. Script_Extensions
is supported with alias scx.
-Property values are NOT loose-matched, but this does not seem to be mandated.
+Compatibility properties are handled following Annex C.
-*** Compatibility properties are NOT handled in the recommended way ***
+Property values are NOT loose-matched, but this does not seem to be mandated.
*** not in any documentation I can find: isCased, isCasefolded, isLowercase,
isUppercase, isTitlecase, isNFC, isNFD, isNFKC, isNFKD, toLowercase,
21 lib/CORE.setting
View
@@ -4,7 +4,7 @@ use MONKEY_TYPING;
# Predeclarations of types {{{
my class Mu { ... }
-my class Cursor { ... }
+my grammar Cursor { ... }
my class Regex { ... }
my class Num { ... }
my class Str { ... }
@@ -1535,7 +1535,7 @@ sub reduceop($triangle, $list, $right, $chain, $func, *@items) {
}
# }}}
# Regular expression support {{{
-my class Cursor {
+my grammar Cursor is Any {
method at_key($k) { self.{$k} }
method at_pos($k) { self.[$k] }
method suppose($rx) {
@@ -1578,8 +1578,21 @@ my class Cursor {
(box Str (cursor_backing (cast cursor (@ {self})))) } }
method ws() { Q:CgOp { (cursor_dows (cast cursor (@ {self}))) } }
method CURSOR() { self }
- token alpha { <+INTERNAL::alpha> } #OK
- token ident { <+INTERNAL::alpha> \w* } #OK
+ # definitions from UTS18
+ token alpha { <:Alphabetic> }
+ token lower { <:Lowercase> }
+ token upper { <:Uppercase> }
+ token punct { <:Punctuation> }
+ token digit { \d }
+ token xdigit { <:Nd + :Hex_Digit> }
+ token alnum { <:Alphabetic + :Nd> }
+ token space { \s }
+ token blank { \h }
+ token cntrl { <:Control> }
+ token graph { <:ANY - :Whitespace - :Control - :Surrogate - :Unassigned> }
+ token print { <+graph + blank - cntrl> }
+ token word { \w }
+ token ident { <.alpha> \w* }
}
my class Match is Cool {
3  lib/Cursor.cs
View
@@ -1054,8 +1054,7 @@ public sealed class CC : IFreeze {
0x000A, MAll, 0x000E, 0, 0x0085, MAll, 0x0086, 0,
0x2028, MAll, 0x202A, 0 });
- [Immutable] public static readonly CC Word = new CC(new int[] { 0, MAlNum,
- '_', MAll, '_'+1, MAlNum });
+ [Immutable] public static readonly CC Word = new CC(new int[] { 0,263167,9398,1073741823,9451,263167 }); // same as \w
[Immutable] public static readonly CC All = new CC(MAll);
[Immutable] public static readonly CC None = new CC(0);
3  src/CClass.pm6
View
@@ -73,7 +73,8 @@ method negate() { _binop(-> $a, $ { 0x3FFF_FFFF +& +^$a }, self, $Empty) }
# the range here is the only part of Other_Alphabetic which is not already
# contained in M* in Unicode 6.0.0
-our $Word = CClass.catm(< Me Mn Ms Pc Nd Ll Lt Lu Lm Lo Nl >).plus(CClass.range(9398, 9450));
+our $Word = CClass.catm(< Me Mn Ms Pc Nd Ll Lt Lu Lm Lo Nl >)\
+ .plus(CClass.range(chr(9398), chr(9450)));
our $Digit = CClass.catm(< Nd >);
our $Space = CClass.enum( # Unicode :Whitespace property - TODO use db
"\x0009", "\x000A", "\x000B", "\x000C", "\x000D", "\x0020", "\x0085",
Please sign in to comment.
Something went wrong with that request. Please try again.