Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Allow Str.split to take more than one needle
This should make most of the cases that Text::CSV handles a *lot* faster.
  • Loading branch information
lizmat committed Nov 5, 2015
1 parent 1a60da9 commit 85033f7
Showing 1 changed file with 88 additions and 0 deletions.
88 changes: 88 additions & 0 deletions src/core/Str.pm
Expand Up @@ -1017,6 +1017,94 @@ my class Str does Stringy { # declared in BOOTSTRAP
}.new(self,$limit));
}
}
multi method split(Str:D \string, @needles;; :$all, :$keep-indices) {
return self.split(string,rx/ @needles /,:$all)
if Rakudo::Internals.NOT_ALL_DEFINED_TYPE(@needles,Cool);

my str $str = nqp::unbox_s(string);
my $positions := nqp::list;
my $needles := nqp::list;
my $needle-chars := nqp::list;
my $sorted := nqp::list;
my int $found = -1;
my $fired;
for @needles.kv -> $needle-index, $needle {
my str $need = nqp::unbox_s($needle.Str);
my int $chars = nqp::chars($need);
nqp::push($needles,$need);
nqp::push($needle-chars,$chars);

my int $pos;
my int $i;
my int $seen = nqp::elems($positions);
while nqp::isge_i($i = nqp::index($str, $need, $pos),0) {
nqp::push($positions,Pair.new($i,nqp::unbox_i($needle-index)));
nqp::push($sorted,nqp::unbox_i($found = $found + 1));
$pos = $i + $chars;
}
$fired = $fired + 1 if nqp::elems($positions) > $seen;
}

nqp::p6sort($sorted, -> int $a, int $b {
# $a <=> $b || $b.chars <=> $a.chars, aka pos asc, length desc
nqp::getattr(nqp::atpos($positions,$a),Pair,'$!key')
<=> nqp::getattr(nqp::atpos($positions,$b),Pair,'$!key')
|| nqp::atpos($needle-chars,
nqp::getattr(nqp::atpos($positions,$b),Pair,'$!value'))
<=> nqp::atpos($needle-chars,
nqp::getattr(nqp::atpos($positions,$a),Pair,'$!value'))
}) if $fired > 1;

my $pair;
my int $from;
my int $needle-index;
my int $pos;
my $result := nqp::list;
if $keep-indices {
while nqp::elems($sorted) {
$pair := nqp::atpos($positions,nqp::shift($sorted));
$from = nqp::getattr($pair,Pair,'$!key');
if nqp::isge_i($from,$pos) { # not hidden by other needle
$needle-index = nqp::getattr($pair,Pair,'$!value');
nqp::push($result,nqp::substr($str,$pos,$from - $pos));
nqp::push($result,$needle-index);
$pos = $from + nqp::atpos($needle-chars,$needle-index);
}
}
}
elsif $all {
while nqp::elems($sorted) {
$pair := nqp::atpos($positions,nqp::shift($sorted));
$from = nqp::getattr($pair,Pair,'$!key');
if nqp::isge_i($from,$pos) { # not hidden by other needle
$needle-index = nqp::getattr($pair,Pair,'$!value');
nqp::push($result,nqp::substr($str,$pos,$from - $pos));
nqp::push($result,nqp::atpos($needles,$needle-index));
$pos = $from + nqp::atpos($needle-chars,$needle-index);
}
}
}
else {
while nqp::elems($sorted) {
$pair := nqp::atpos($positions,nqp::shift($sorted));
$from = nqp::getattr($pair,Pair,'$!key');
if nqp::isge_i($from,$pos) { # not hidden by other needle
nqp::push($result,nqp::substr($str,$pos,$from - $pos));
$pos = $from + nqp::atpos(
$needle-chars,nqp::getattr($pair,Pair,'$!value'));
}
}
}
nqp::push($result,nqp::substr($str,$pos));

$result
}
multi method split(Str:D \string,@needles,$parts;; :$all, :$keep-indices) {
my @result = self.split(string,@needles,:$all,:$keep-indices);
nqp::istype($parts,Whatever) || $parts === Inf
?? @result
!! @result.head($all || $keep-indices ?? $parts + $parts !! $parts)
}

method samecase(Str:D: Str $pattern) {
my str $str = nqp::unbox_s(self);
Expand Down

0 comments on commit 85033f7

Please sign in to comment.