Skip to content

Commit 3e87ffd

Browse files
committed
Merge remote branch 'origin/unirx'
Conflicts: src/ops/nqp.ops
2 parents bf55068 + 1f5883b commit 3e87ffd

File tree

4 files changed

+110
-2
lines changed

4 files changed

+110
-2
lines changed

src/QAST/Compiler.nqp

100644100755
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,16 @@ class QAST::Compiler is HLL::Compiler {
415415
$ops;
416416
}
417417

418+
method uniprop($node) {
419+
my $ops := self.post_new('Ops', :result(%*REG<cur>));
420+
my $cmpop := $node.negate ?? 'ne' !! 'eq';
421+
$ops.push_pirop('assign', '$S10', '"' ~ $node[0] ~ '"');
422+
$ops.push_pirop('is_uprop', '$I11', '$S10', %*REG<tgt>, %*REG<pos>);
423+
$ops.push_pirop($cmpop, '$I11', 0, %*REG<fail>);
424+
$ops.push_pirop('inc', %*REG<pos>) unless $node.subtype eq 'zerowidth';
425+
$ops;
426+
}
427+
418428
# a :rxtype<ws> node is a normal subrule call
419429
method ws($node) { self.subrule($node) }
420430

src/QRegex/P6Regex/Actions.nqp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,12 @@ class QRegex::P6Regex::Actions is HLL::Actions {
409409
$qast := QAST::Regex.new( PAST::Node.new($name), :rxtype<subrule>, :subtype<method>,
410410
:negate( $<sign> eq '-' ), :node($/) );
411411
}
412+
elsif $<uniprop> {
413+
my $uniprop := ~$<uniprop>;
414+
$qast := QAST::Regex.new( $uniprop, :rxtype<uniprop>,
415+
:negate( $<sign> eq '-' && $<invert> ne '!' # $<sign> ^^ $<invert>
416+
|| $<sign> ne '-' && $<invert> eq '!' ), :node($/) );
417+
}
412418
else {
413419
my @alts;
414420
for $<charspec> {

src/QRegex/P6Regex/Grammar.nqp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ grammar QRegex::P6Regex::Grammar is HLL::Grammar {
193193
]?
194194
}
195195

196-
token assertion:sym<[> { <?before '['|'+'|'-'> <cclass_elem>+ }
196+
token assertion:sym<[> { <?before '['|'+'|'-'|':'> <cclass_elem>+ }
197197

198198
token cclass_elem {
199199
$<sign>=['+'|'-'|<?>]
@@ -209,6 +209,7 @@ grammar QRegex::P6Regex::Grammar is HLL::Grammar {
209209
)*
210210
\s* ']'
211211
| $<name>=[\w+]
212+
| ':' $<invert>=['!'|<?>] $<uniprop>=[\w+]
212213
]
213214
<.normspace>?
214215
}

src/ops/nqp.ops

100644100755
Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ BEGIN_OPS_PREAMBLE
1414
#include "../6model/serialization_context.h"
1515
#include "pmc_sub.h"
1616

17+
#if PARROT_HAS_ICU
18+
# include <unicode/uchar.h>
19+
#endif
20+
1721
/* Did we do the dynop setup yet?
1822
* XXX Relies on this happening once in a single thread at startup. */
1923
static INTVAL initialized = 0;
@@ -1792,7 +1796,6 @@ inline op nqp_radix(out PMC, in INT, in STR, in INT, in INT) :base_core {
17921796
$1 = out;
17931797
}
17941798

1795-
17961799
/*
17971800

17981801
=item nqp_get_runcore()
@@ -1816,5 +1819,93 @@ inline op nqp_set_runcore(in STR) :flow {
18161819
Parrot_runcore_switch(interp, $1);
18171820
restart NEXT();
18181821
}
1822+
1823+
/*
1824+
1825+
=item inline op is_uprop(out INT, in STR, in STR, in INT)
1826+
1827+
Sets a true value in $1 if character $4 in string $3 has the unicode property
1828+
named $2.
1829+
1830+
=cut
1831+
1832+
*/
1833+
inline op is_uprop(out INT, in STR, in STR, in INT) :base_core {
1834+
#if PARROT_HAS_ICU
1835+
char *cstr;
1836+
INTVAL ord;
1837+
int32_t strwhich, ordwhich;
1838+
UProperty strprop;
1839+
opcode_t *handler;
1840+
1841+
if ($4 > 0 && (UINTVAL)$4 == ($3->strlen)) {
1842+
$1 = 0;
1843+
goto NEXT();
1844+
}
1845+
1846+
ord = Parrot_str_indexed(interp, $3, $4);
1847+
cstr = Parrot_str_to_cstring(interp, $2);
1848+
1849+
/* try block tests */
1850+
if (strncmp(cstr, "In", 2) == 0) {
1851+
strwhich = u_getPropertyValueEnum(UCHAR_BLOCK, cstr+2);
1852+
ordwhich = u_getIntPropertyValue(ord, UCHAR_BLOCK);
1853+
if (strwhich != UCHAR_INVALID_CODE) {
1854+
$1 = (strwhich == ordwhich);
1855+
Parrot_str_free_cstring(cstr);
1856+
goto NEXT();
1857+
}
1858+
}
1859+
1860+
/* try bidi tests */
1861+
if (strncmp(cstr, "Bidi", 4) == 0) {
1862+
strwhich = u_getPropertyValueEnum(UCHAR_BIDI_CLASS, cstr+4);
1863+
ordwhich = u_getIntPropertyValue(ord, UCHAR_BIDI_CLASS);
1864+
if (strwhich != UCHAR_INVALID_CODE) {
1865+
$1 = (strwhich == ordwhich);
1866+
Parrot_str_free_cstring(cstr);
1867+
goto NEXT();
1868+
}
1869+
}
1870+
1871+
/* try property value aliases */
1872+
strwhich = u_getPropertyValueEnum(UCHAR_GENERAL_CATEGORY_MASK, cstr);
1873+
if (strwhich != UCHAR_INVALID_CODE) {
1874+
ordwhich = u_getIntPropertyValue(ord, UCHAR_GENERAL_CATEGORY_MASK);
1875+
$1 = ((strwhich & ordwhich) != 0);
1876+
Parrot_str_free_cstring(cstr);
1877+
goto NEXT();
1878+
}
1879+
1880+
/* try property */
1881+
strprop = u_getPropertyEnum(cstr);
1882+
if (strprop != UCHAR_INVALID_CODE) {
1883+
$1 = (u_hasBinaryProperty(ord, strprop) != 0);
1884+
Parrot_str_free_cstring(cstr);
1885+
goto NEXT();
1886+
}
1887+
1888+
/* try script aliases */
1889+
strwhich = u_getPropertyValueEnum(UCHAR_SCRIPT, cstr);
1890+
if (strwhich != UCHAR_INVALID_CODE) {
1891+
ordwhich = u_getIntPropertyValue(ord, UCHAR_SCRIPT);
1892+
$1 = (strwhich == ordwhich);
1893+
Parrot_str_free_cstring(cstr);
1894+
goto NEXT();
1895+
}
1896+
1897+
/* unrecognized property name */
1898+
Parrot_str_free_cstring(cstr);
1899+
handler = Parrot_ex_throw_from_op_args(interp, NULL,
1900+
EXCEPTION_ICU_ERROR,
1901+
"Unicode property '%Ss' not found", $2);
1902+
goto ADDRESS(handler);
1903+
#else
1904+
opcode_t * const handler = Parrot_ex_throw_from_op_args(interp, NULL,
1905+
EXCEPTION_ICU_ERROR,
1906+
"ICU not loaded", $2);
1907+
goto ADDRESS(handler);
1908+
#endif
1909+
}
18191910

18201911

0 commit comments

Comments
 (0)