Skip to content

Commit

Permalink
Make .uniprop a proper multio method
Browse files Browse the repository at this point in the history
- make the sub version just pass on, like it should
- make the .uniprop General_Category lookup a constant
- move logic to Rakudo::Unicodey, only place for backend specific code
- makes the sub version 25% slower (as it now passes on to the method)
- makes the method version about 30x as fast
  • Loading branch information
lizmat committed Apr 5, 2021
1 parent 0b91b21 commit c9aceda
Showing 1 changed file with 162 additions and 119 deletions.
281 changes: 162 additions & 119 deletions src/core.c/unicodey.pm6
Expand Up @@ -19,6 +19,13 @@ my class Rakudo::Unicodey is implementation-detail {
@ords
}

method uniprop-general(int) is hidden-from-backtrace {
X::NYI.new(:feature<uniprop>).throw
}
method uniprop(int, str) is hidden-from-backtrace {
X::NYI.new(:feature<uniprop>).throw
}

method NFC(str) is hidden-from-backtrace {
X::NYI.new(:feature<NFC>).throw
}
Expand Down Expand Up @@ -58,6 +65,112 @@ my class Rakudo::Unicodey is implementation-detail {
)
}

my constant $gcprop = nqp::unipropcode("General_Category");
method uniprop-general(int $code) {
nqp::getuniprop_str($code,$gcprop)
}

## The code below was generated by tools/build/makeUNIPROP.raku
my constant $prefs = nqp::hash(
'AHex','B','ASCII_Hex_Digit','B','Age','S','Alpha','B','Alphabetic','B',
'Bidi_C','B','Bidi_Class','S','Bidi_Control','B','Bidi_M','B',
'Bidi_Mirrored','B','Bidi_Mirroring_Glyph','bmg',
'Bidi_Paired_Bracket_Type','S','Block','S','CE','B','CI','B','CWCF','B',
'CWCM','B','CWKCF','B','CWL','B','CWT','B','CWU','B',
'Canonical_Combining_Class','S','Case_Folding','S','Case_Ignorable','B',
'Cased','B','Changes_When_Casefolded','B','Changes_When_Casemapped','B',
'Changes_When_Lowercased','B','Changes_When_NFKC_Casefolded','B',
'Changes_When_Titlecased','B','Changes_When_Uppercased','B','Comp_Ex','B',
'Composition_Exclusion','B','DI','B','Dash','B',
'Decomposition_Mapping','S','Decomposition_Type','S',
'Default_Ignorable_Code_Point','B','Dep','B','Deprecated','B','Dia','B',
'Diacritic','B','East_Asian_Width','S','Emoji','B','Emoji_Modifier','B',
'Emoji_Modifier_Base','B','Emoji_Presentation','B','Expands_On_NFC','B',
'Expands_On_NFD','B','Expands_On_NFKC','B','Expands_On_NFKD','B',
'Ext','B','Extender','B','FC_NFKC','S','FC_NFKC_Closure','S',
'Full_Composition_Exclusion','B','GCB','S','General_Category','S',
'Gr_Base','B','Gr_Ext','B','Gr_Link','B','Grapheme_Base','B',
'Grapheme_Cluster_Break','S','Grapheme_Extend','B','Grapheme_Link','B',
'Hangul_Syllable_Type','S','Hex','B','Hex_Digit','B','Hyphen','B',
'IDC','B','IDS','B','IDSB','B','IDST','B','IDS_Binary_Operator','B',
'IDS_Trinary_Operator','B','ID_Continue','B','ID_Start','B',
'ISO_Comment','S','Ideo','B','Ideographic','B','InPC','S','InSC','S',
'Indic_Positional_Category','S','Indic_Syllabic_Category','S',
'Join_C','B','Join_Control','B','Joining_Group','S','Joining_Type','S',
'LOE','B','Line_Break','S','Logical_Order_Exception','B','Lower','B',
'Lowercase','B','Lowercase_Mapping','lc','Math','B','NChar','B',
'NFC_QC','S','NFC_Quick_Check','S','NFD_QC','S','NFD_Quick_Check','S',
'NFKC_CF','S','NFKC_Casefold','S','NFKC_QC','S','NFKC_Quick_Check','S',
'NFKD_QC','S','NFKD_Quick_Check','S','Name','na',
'Noncharacter_Code_Point','B','Numeric_Type','S','Numeric_Value','nv',
'OAlpha','B','ODI','B','OGr_Ext','B','OIDC','B','OIDS','B','OLower','B',
'OMath','B','OUpper','B','Other_Alphabetic','B',
'Other_Default_Ignorable_Code_Point','B','Other_Grapheme_Extend','B',
'Other_ID_Continue','B','Other_ID_Start','B','Other_Lowercase','B',
'Other_Math','B','Other_Uppercase','B','PCM','B','Pat_Syn','B',
'Pat_WS','B','Pattern_Syntax','B','Pattern_White_Space','B',
'Prepended_Concatenation_Mark','B','QMark','B','Quotation_Mark','B',
'RI','B','Radical','B','Regional_Indicator','B','SB','S','SD','B',
'STerm','B','Script','S','Sentence_Break','S','Sentence_Terminal','B',
'Simple_Case_Folding','S','Simple_Lowercase_Mapping','S',
'Simple_Titlecase_Mapping','S','Simple_Uppercase_Mapping','S',
'Soft_Dotted','B','Term','B','Terminal_Punctuation','B',
'Titlecase_Mapping','tc','UIdeo','B','Unified_Ideograph','B','Upper','B',
'Uppercase','B','Uppercase_Mapping','uc','VS','B',
'Variation_Selector','B','Vertical_Orientation','S','WB','S','WSpace','B',
'White_Space','B','Word_Break','S','XIDC','B','XIDS','B',
'XID_Continue','B','XID_Start','B','XO_NFC','B','XO_NFD','B',
'XO_NFKC','B','XO_NFKD','B','age','S','bc','S','blk','S','bmg','bmg',
'bpt','S','ccc','S','cf','S','cjkCompatibilityVariant','S','dm','S',
'dt','S','ea','S','gc','S','hst','S','isc','S','jg','S','jt','S',
'kCompatibilityVariant','S','lb','S','lc','lc','na','na','nt','S',
'nv','nv','sc','S','scf','S','sfc','S','slc','S','space','B','stc','S',
'suc','S','tc','tc','uc','uc','vo','S',
);
method uniprop(int $code, str $propname) {
my int $prop = nqp::unipropcode($propname);
my str $pref = nqp::ifnull(nqp::atkey($prefs, $propname),'');
nqp::if(
nqp::iseq_s($pref, 'S'),
nqp::getuniprop_str($code,$prop),
nqp::if(
nqp::iseq_s($pref, 'I'),
nqp::getuniprop_int($code,$prop),
nqp::if(
nqp::iseq_s($pref, 'B'),
nqp::hllbool(nqp::getuniprop_bool($code,$prop)),
nqp::if(
nqp::iseq_s($pref, 'lc'),
nqp::lc(nqp::chr(nqp::unbox_i($code))),
nqp::if(
nqp::iseq_s($pref, 'tc'),
nqp::tc(nqp::chr(nqp::unbox_i($code))),
nqp::if(
nqp::iseq_s($pref, 'uc'),
nqp::uc(nqp::chr(nqp::unbox_i($code))),
nqp::if(
nqp::iseq_s($pref, 'na'),
nqp::getuniname($code),
nqp::if(
nqp::iseq_s($pref, 'nv'),
$code.unival,
nqp::if(
nqp::iseq_s($pref, 'bmg'),
nqp::stmts(
(my int $bmg-ord = nqp::getuniprop_int($code, $prop)),
$bmg-ord ?? nqp::chr($bmg-ord) !! ''),
nqp::stmts(
(my $result := nqp::getuniprop_str($code,$prop)),
nqp::if(
nqp::istrue($result),
nqp::stmts(
nqp::bindkey($prefs, $propname, 'S'),
$result),
nqp::stmts(
nqp::bindkey($prefs, $propname, 'I'),
nqp::getuniprop_int($code,$prop)))))))))))))
}

method NFC(str $str) {
nqp::strtocodes($str,nqp::const::NORMALIZE_NFC,nqp::create(NFC))
}
Expand Down Expand Up @@ -150,7 +263,14 @@ augment class Cool {
proto method univals(*%) is pure {*}
multi method univals(Cool:D:) { self.Str.univals }

method uniprop(|c) { uniprop(self, |c) }
proto method uniprop($?, *%) is pure {*}
multi method uniprop(Cool:D:) {
self.Str.uniprop
}
multi method uniprop(Cool:D: Str:D $propname) {
self.Str.uniprop($propname)
}

method uniprop-int(|c) { uniprop-int(self, |c) }
method uniprop-bool(|c) { uniprop-bool(self, |c) }
method uniprop-str(|c) { uniprop-str(self, |c) }
Expand Down Expand Up @@ -190,6 +310,21 @@ augment class Int {
!! nqp::getuniname(self)
}

multi method uniprop(Int:D:) {
nqp::islt_I(self,0)
?? self!codepoint-out-of-bounds('uniprop')
!! nqp::isbig_I(self)
?? ""
!! Rakudo::Unicodey.uniprop-general(self)
}
multi method uniprop(Int:D: Str:D $propname) {
nqp::islt_I(self,0)
?? self!codepoint-out-of-bounds('uniprop')
!! nqp::isbig_I(self)
?? ""
!! Rakudo::Unicodey.uniprop(self, $propname)
}

multi method unival(Int:D:) {
nqp::isbig_I(self) || nqp::islt_I(self,0)
?? self!codepoint-out-of-bounds('unival')
Expand All @@ -216,16 +351,6 @@ augment class Str {
Seq.new(Rakudo::Unicodey.uninames(self))
}

multi method unival(Str:D:) {
nqp::iseq_i((my int $ord = nqp::ord($!value)),-1)
?? Nil
!! Rakudo::Unicodey.unival($ord)
}

multi method univals(Str:D:) {
Seq.new(Rakudo::Unicodey.univals(self))
}

method uniparse(Str:D: --> Str:D) {
my $names := nqp::split(',', self);
my $parts := nqp::list_s;
Expand All @@ -244,6 +369,27 @@ augment class Str {
nqp::join("",$parts)
}

multi method uniprop(Str:D:) {
nqp::iseq_i((my int $ord = nqp::ord($!value)),-1)
?? Nil
!! Rakudo::Unicodey.uniprop-general($ord)
}
multi method uniprop(Str:D: Str:D $propname) {
nqp::iseq_i((my int $ord = nqp::ord($!value)),-1)
?? Nil
!! Rakudo::Unicodey.uniprop($ord, $propname)
}

multi method unival(Str:D:) {
nqp::iseq_i((my int $ord = nqp::ord($!value)),-1)
?? Nil
!! Rakudo::Unicodey.unival($ord)
}

multi method univals(Str:D:) {
Seq.new(Rakudo::Unicodey.univals(self))
}

multi method NFC(Str:D:) { Rakudo::Unicodey.NFC($!value) }
multi method NFD(Str:D:) { Rakudo::Unicodey.NFD($!value) }
multi method NFKC(Str:D:) { Rakudo::Unicodey.NFKC($!value) }
Expand Down Expand Up @@ -309,7 +455,7 @@ proto sub ords($, *%) is pure {*}
proto sub uniname($, *%) is pure {*}
proto sub uninames($, *%) is pure {*}

proto sub uniprop($, |) is pure {*}
proto sub uniprop($, $?, *%) is pure {*}
proto sub uniprops($, $?, *%) is pure {*}

proto sub uniprop-bool($, $, *%) is pure {*}
Expand All @@ -332,11 +478,13 @@ multi sub ords($s) { $s.ords }
multi sub uniname(\what) { what.uniname }
multi sub uninames(\what) { what.uninames }

multi sub uniprop(\what) { what.uniprop }
multi sub uniprop(\what, Str:D $propname) { what.uniprop($propname) }

multi sub unival(\what) { what.unival }
multi sub univals(\what) { what.univals }

#?if jvm
multi sub uniprop(|) { die 'uniprop NYI on jvm backend' }
multi sub uniprop-int(|) { die 'uniprop-int NYI on jvm backend' }
multi sub uniprop-bool(|) { die 'uniprop-bool NYI on jvm backend' }
multi sub uniprop-str(|) { die 'uniprop-str NYI on jvm backend' }
Expand All @@ -345,7 +493,6 @@ multi sub unimatch(|) { die 'unimatch NYI on jvm backend' }
#?endif

#?if js
multi sub uniprop(|) { die 'uniprop NYI on js backend' }
multi sub uniprop-int(|) { die 'uniprop-int NYI on js backend' }
multi sub uniprop-bool(|) { die 'uniprop-bool NYI on js backend' }
multi sub uniprop-str(Int:D $code, Stringy:D $propname) {
Expand All @@ -356,111 +503,7 @@ multi sub unimatch(|) { die 'unimatch NYI on js backend' }
#?endif

#?if moar
multi sub uniprop(Str:D $str, |c) { $str ?? uniprop($str.ord, |c) !! Nil }
multi sub uniprop(Int:D $code) {
nqp::getuniprop_str($code,nqp::unipropcode('General_Category'));
}
multi sub uniprop(Int:D $code, Stringy:D $propname) {
## The code below was generated by tools/build/makeUNIPROP.raku
my constant $prefs = nqp::hash(
'AHex','B','ASCII_Hex_Digit','B','Age','S','Alpha','B','Alphabetic','B',
'Bidi_C','B','Bidi_Class','S','Bidi_Control','B','Bidi_M','B',
'Bidi_Mirrored','B','Bidi_Mirroring_Glyph','bmg',
'Bidi_Paired_Bracket_Type','S','Block','S','CE','B','CI','B','CWCF','B',
'CWCM','B','CWKCF','B','CWL','B','CWT','B','CWU','B',
'Canonical_Combining_Class','S','Case_Folding','S','Case_Ignorable','B',
'Cased','B','Changes_When_Casefolded','B','Changes_When_Casemapped','B',
'Changes_When_Lowercased','B','Changes_When_NFKC_Casefolded','B',
'Changes_When_Titlecased','B','Changes_When_Uppercased','B','Comp_Ex','B',
'Composition_Exclusion','B','DI','B','Dash','B',
'Decomposition_Mapping','S','Decomposition_Type','S',
'Default_Ignorable_Code_Point','B','Dep','B','Deprecated','B','Dia','B',
'Diacritic','B','East_Asian_Width','S','Emoji','B','Emoji_Modifier','B',
'Emoji_Modifier_Base','B','Emoji_Presentation','B','Expands_On_NFC','B',
'Expands_On_NFD','B','Expands_On_NFKC','B','Expands_On_NFKD','B',
'Ext','B','Extender','B','FC_NFKC','S','FC_NFKC_Closure','S',
'Full_Composition_Exclusion','B','GCB','S','General_Category','S',
'Gr_Base','B','Gr_Ext','B','Gr_Link','B','Grapheme_Base','B',
'Grapheme_Cluster_Break','S','Grapheme_Extend','B','Grapheme_Link','B',
'Hangul_Syllable_Type','S','Hex','B','Hex_Digit','B','Hyphen','B',
'IDC','B','IDS','B','IDSB','B','IDST','B','IDS_Binary_Operator','B',
'IDS_Trinary_Operator','B','ID_Continue','B','ID_Start','B',
'ISO_Comment','S','Ideo','B','Ideographic','B','InPC','S','InSC','S',
'Indic_Positional_Category','S','Indic_Syllabic_Category','S',
'Join_C','B','Join_Control','B','Joining_Group','S','Joining_Type','S',
'LOE','B','Line_Break','S','Logical_Order_Exception','B','Lower','B',
'Lowercase','B','Lowercase_Mapping','lc','Math','B','NChar','B',
'NFC_QC','S','NFC_Quick_Check','S','NFD_QC','S','NFD_Quick_Check','S',
'NFKC_CF','S','NFKC_Casefold','S','NFKC_QC','S','NFKC_Quick_Check','S',
'NFKD_QC','S','NFKD_Quick_Check','S','Name','na',
'Noncharacter_Code_Point','B','Numeric_Type','S','Numeric_Value','nv',
'OAlpha','B','ODI','B','OGr_Ext','B','OIDC','B','OIDS','B','OLower','B',
'OMath','B','OUpper','B','Other_Alphabetic','B',
'Other_Default_Ignorable_Code_Point','B','Other_Grapheme_Extend','B',
'Other_ID_Continue','B','Other_ID_Start','B','Other_Lowercase','B',
'Other_Math','B','Other_Uppercase','B','PCM','B','Pat_Syn','B',
'Pat_WS','B','Pattern_Syntax','B','Pattern_White_Space','B',
'Prepended_Concatenation_Mark','B','QMark','B','Quotation_Mark','B',
'RI','B','Radical','B','Regional_Indicator','B','SB','S','SD','B',
'STerm','B','Script','S','Sentence_Break','S','Sentence_Terminal','B',
'Simple_Case_Folding','S','Simple_Lowercase_Mapping','S',
'Simple_Titlecase_Mapping','S','Simple_Uppercase_Mapping','S',
'Soft_Dotted','B','Term','B','Terminal_Punctuation','B',
'Titlecase_Mapping','tc','UIdeo','B','Unified_Ideograph','B','Upper','B',
'Uppercase','B','Uppercase_Mapping','uc','VS','B',
'Variation_Selector','B','Vertical_Orientation','S','WB','S','WSpace','B',
'White_Space','B','Word_Break','S','XIDC','B','XIDS','B',
'XID_Continue','B','XID_Start','B','XO_NFC','B','XO_NFD','B',
'XO_NFKC','B','XO_NFKD','B','age','S','bc','S','blk','S','bmg','bmg',
'bpt','S','ccc','S','cf','S','cjkCompatibilityVariant','S','dm','S',
'dt','S','ea','S','gc','S','hst','S','isc','S','jg','S','jt','S',
'kCompatibilityVariant','S','lb','S','lc','lc','na','na','nt','S',
'nv','nv','sc','S','scf','S','sfc','S','slc','S','space','B','stc','S',
'suc','S','tc','tc','uc','uc','vo','S',
);
## End generated code
my int $prop = nqp::unipropcode($propname);
my str $pref = nqp::ifnull(nqp::atkey($prefs, $propname),'');
nqp::if(
nqp::iseq_s($pref, 'S'),
nqp::getuniprop_str($code,$prop),
nqp::if(
nqp::iseq_s($pref, 'I'),
nqp::getuniprop_int($code,$prop),
nqp::if(
nqp::iseq_s($pref, 'B'),
nqp::hllbool(nqp::getuniprop_bool($code,$prop)),
nqp::if(
nqp::iseq_s($pref, 'lc'),
nqp::lc(nqp::chr(nqp::unbox_i($code))),
nqp::if(
nqp::iseq_s($pref, 'tc'),
nqp::tc(nqp::chr(nqp::unbox_i($code))),
nqp::if(
nqp::iseq_s($pref, 'uc'),
nqp::uc(nqp::chr(nqp::unbox_i($code))),
nqp::if(
nqp::iseq_s($pref, 'na'),
nqp::getuniname($code),
nqp::if(
nqp::iseq_s($pref, 'nv'),
$code.unival,
nqp::if(
nqp::iseq_s($pref, 'bmg'),
nqp::stmts(
(my int $bmg-ord = nqp::getuniprop_int($code, $prop)),
$bmg-ord ?? nqp::chr($bmg-ord) !! ''),
nqp::stmts(
(my $result := nqp::getuniprop_str($code,$prop)),
nqp::if(
nqp::istrue($result),
nqp::stmts(
nqp::bindkey($prefs, $propname, 'S'),
$result),
nqp::stmts(
nqp::bindkey($prefs, $propname, 'I'),
nqp::getuniprop_int($code,$prop)))))))))))))
}

# Unicode functions
multi sub uniprop-int(Str:D $str, Stringy:D $propname) {
$str ?? uniprop-int($str.ord, $propname) !! Nil }
Expand Down

0 comments on commit c9aceda

Please sign in to comment.