Skip to content

Commit

Permalink
RakuAST: more =table tweaks
Browse files Browse the repository at this point in the history
- bring back original <ws> semantics, instead of just space (0x20)
- re-implement legacy podify sanitation into a single pass nqp
  codepoint check, while adding support for removing backslashes
  • Loading branch information
lizmat committed May 15, 2023
1 parent e92e6f2 commit bfb1fe3
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 26 deletions.
41 changes: 22 additions & 19 deletions src/core.c/RakuAST/Fixups.pm6
Expand Up @@ -550,9 +550,10 @@ augment class RakuAST::Doc::Block {
my int @row-dividers;
@row-dividers[.ord] = 1 for ' ', '_', '-', '+', '|', '=';

my int32 $space = 32; # " "
my int32 $plus = 43; # "+"
my int32 $pipe = 124; # "|"
my int32 $space = 32; # " "
my int32 $plus = 43; # "+"
my int32 $pipe = 124; # "|"
my int $gcprop = nqp::unipropcode("General_Category");

method interpret-as-table(RakuAST::Doc::Block:D: $spaces, @matched --> Nil) {

Expand Down Expand Up @@ -736,6 +737,12 @@ in line '$line'",
# Parse the given line and find out offsets of columns and dividers
my sub columnify($line) {

# is a given codepoint horizontal whitespace
my sub is-ws(int $codepoint) {
nqp::iseq_i($codepoint,$space)
|| nqp::iseq_s(nqp::getuniprop_str($codepoint,$gcprop),'Zs')
}

nqp::strtocodes($line,nqp::const::NORMALIZE_NFC,my int32 @codes);

my int $elems = nqp::elems(@codes);
Expand All @@ -744,36 +751,32 @@ in line '$line'",
my int @offsets; # offsets where columns start (except first)

# Check the current line for column dividers. Sets the @dividers
# and @offsets arrays, returns whether this line should be considered
# a row (any char that is not a row|column divider).
# and @offsets arrays, returns whether this line should be
# considered a row (any char that is not a row|column divider).
my sub inspect-real-dividers() {
my int $prev = $space; # fake space at start for leading |
my int $curr;
my int $is-row;
my int $i = -1;
my int32 $prev = $space; # fake space at start for leading |
my int32 $curr;
my int $is-row;
my int $i = -1;
nqp::while(
nqp::islt_i(++$i,$elems),
nqp::if( # for all chars
nqp::if( # for all chars
nqp::iseq_i(($curr = nqp::atpos_i(@codes,$i)),$pipe)
|| nqp::iseq_i($curr,$plus),
nqp::stmts( # | or +
nqp::stmts( # | or +
nqp::push_s(@dividers,nqp::chr($curr)),
nqp::if(
nqp::iseq_i($prev,$space)
&& nqp::iseq_i(
nqp::atpos_i(@codes,nqp::add_i($i,1)),
$space
),
nqp::stmts( # real column divider
is-ws($prev) && is-ws(nqp::atpos_i(@codes,$i + 1)),
nqp::stmts( # real column divider
nqp::push_i(@offsets,nqp::add_i(++$i,1)),
($prev = 0),
)
)
),
nqp::stmts( # NOT | or +
nqp::stmts( # NOT | or +
nqp::unless(
nqp::atpos_i(@row-dividers,$curr),
($is-row = 1), # not a row divider
($is-row = 1), # not a row divider
),
($prev = $curr)
)
Expand Down
48 changes: 41 additions & 7 deletions src/core.c/RakuAST/LegacyPodify.pm6
Expand Up @@ -3,14 +3,48 @@

class RakuAST::LegacyPodify {

my int32 $nl = 10; # "\n"
my int32 $space = 32; # " "
my int32 $bslash = 92; # "\\"
my int $gcprop = nqp::unipropcode("General_Category");

# basically mangle text to just single spaces
my sub sanitize(Str:D $string --> Str:D) {
$string eq "\n"
?? ' '
!! $string
.subst(/ \n+ $/)
.subst("\n", ' ', :global)
.subst(/\s+/, ' ', :global)
my sub sanitize(str $string --> Str:D) {
return ' ' if $string eq "\n";

nqp::strtocodes($string,nqp::const::NORMALIZE_NFC,my int32 @input);
my int $end = nqp::elems(@input);
return '' unless $end;

# remove any trailing newlines
nqp::while(
$end && nqp::iseq_i(nqp::atpos_i(@input,--$end),$nl),
nqp::pop_i(@input)
);

my int32 @output;
my int32 $curr;
my int32 $prev;
my int $i = -1;

nqp::while(
nqp::isle_i(++$i,$end),
nqp::if( # for all codes
nqp::iseq_i(($curr = nqp::atpos_i(@input,$i)),$nl)
|| nqp::iseq_i($curr,$space)
|| nqp::iseq_s(nqp::getuniprop_str($curr,$gcprop),'Zs'),
nqp::if( # \n or \h
nqp::isne_i($prev,$space),
nqp::push_i(@output,$prev = $space), # first space
),
nqp::if( # not \n nor \h
nqp::isne_i($curr,$bslash),
nqp::push_i(@output,$prev = $curr) # not a \\
)
)
);

nqp::strfromcodes(@output)
}

# sanitize the given string, including any handling of Z<markup>
Expand Down

0 comments on commit bfb1fe3

Please sign in to comment.