Skip to content

Commit

Permalink
Grep improved
Browse files Browse the repository at this point in the history
  • Loading branch information
shinobi committed Mar 3, 2018
1 parent 1271cef commit 9a757fa
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 90 deletions.
10 changes: 0 additions & 10 deletions TODO

This file was deleted.

130 changes: 118 additions & 12 deletions lib/Data/StaticTable.pm6
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,16 @@ class StaticTable::Query {
return $q;
}

method grep(Str $heading, Mu $matcher where { -> Regex {}($_); True }) {
method grep(Mu $matcher where { -> Regex {}($_); True }, Str $heading,
Bool :$n = False, # Row numbers
Bool :$r = False, # Array of array
Bool :$h = False, # Array of hashes (Default)
Bool :$nr = False, # Row numbers => row data (array)
Bool :$nh = False, # Row numbers => row data (hash)
) {
my $default = $h;
$default = True if all($n, $r, $h, $nr, $nh) == False;
X::Data::StaticTable.new("Method grep only accepts one adverb at a time").throw unless one($n, $r, $default, $nr, $nh) == True;
my Data::StaticTable::Position @rownums;
if (%!indexes{$heading}:exists) { #-- Search in the index if it is available. Should be faster.
my @keysearch = grep {.defined and $matcher}, %!indexes{$heading}.keys;
Expand All @@ -315,9 +324,26 @@ class StaticTable::Query {
} else {;
@rownums = 1 <<+>> ( grep {.defined and $matcher}, :k, $!T.column($heading) );
}
return @rownums.sort.list;
if ($n) { # Returning rowlist
return @rownums.sort.list #-- :n
} elsif ($r || $default) { # Returning an array of arrays or array of hashes
my @rows;
for @rownums.sort -> $row-num {
if ($r) { push @rows, $!T.row($row-num) } #-- :r
else { push @rows, $!T[$row-num] } #-- :h
}
return @rows;
} else { # A hash of row-num => data in the row or row-num => a row hash
my %hash;
for @rownums.sort -> $row-num {
if ($nh) { %hash{$row-num} = $!T[$row-num] } #-- :nh
else { %hash{$row-num} = $!T.row($row-num) } #-- :nr
}
return %hash;
}
}

method where() { ... };

#==== Index ====
method add-index(Str $heading) {
Expand Down Expand Up @@ -674,9 +700,10 @@ You can use hash-like keys, to get a specific index for a column
$Q1<Column1>
$Q1{'Column1'}
Both can get you the index (sames as generated by C<generate-index> in a
Both can get you the index (the same you could get by using C<generate-index> in a
C<StaticTable>).
=head2 C<method new(Data::StaticTable $T, *@to-index)>
You need to specify an existing C<StaticTable> to create this object. Optionally
Expand Down Expand Up @@ -710,21 +737,100 @@ Returns the values indexed.
Returns the hash of the same indexes in the C<Query> object.
=head2 C<method grep(Str $heading, Mu $matcher where { -> Regex {}($_); True })>
=head2 C<method grep(Mu $matcher where { -E<gt> Regex {}($_); True }, Str $heading, Bool :$h = True, Bool :$n = False, Bool :$r = False, Bool :$nr = False, Bool :$nh = False)>
Allows to use grep over a column. It returns a list of row numbers where a
regular expression matches.
Allows to use grep over a column. Depending on the flags used, returns the resulting
row information for all that rows where there are matches. You can not only use
a regxep, but a C<Junction> of C<Regex> elements.
You can not only use a regxep, but a C<Junction> of C<Regex> elements.
Examples:
Examples of Regexp and Junctions:
# Get the rownumbers where the column 'A' contains '9'
my Data::StaticTable::Position @rs1 = $q.grep("A", rx/9/);
my Data::StaticTable::Position @rs1 = $q.grep(rx/9/, "A"):n;
# Get the rownumbers where the column 'A' contains 'n' and 'e'
my Data::StaticTable::Position @rs2 = $q.grep("A", all(rx/n/, rx/e/));
my Data::StaticTable::Position @rs2 = $q.grep(all(rx/n/, rx/e/), "A"):n;
When you use the flag C<:n>, you can use these results later with the method C<take>
=head3 Flags
Similar to the default grep method, this contains flags that allows you to
receive the information in various ways.
Consider this StaticTable and its Query:
my $t = Data::StaticTable.new(
<Countries Import Tons>,
(
'US PE CL', 'Copper', 100, # Row 1
'US RU', 'Alcohol', 50, # Row 2
'IL UK', 'Processor', 12, # Row 3
'UK', 'Tuxedo', 1, # Row 4
'JP CN', 'Tuna', 10, # Row 5
'US RU CN', 'Uranium', 0.01 # Row 6
)
);
my $q = Data::StaticTable::Query.new($t)
=over 4
You can use these results later with the method C<take>
=item C<:n>
Returns only the row numbers.
This is very useful to combine with the C<take> method.
my @a = $q.grep(all(rx/US/, rx/RU/), 'Countries'):n;
# Result: The array (2, 6)
=item C<:r>
Returns the rows, just data, no headers
my @a = $q.grep(all(rx/US/, rx/RU/), 'Countries'):r;
# Result: The array
# [
# ("US RU", "Alcohol", 50),
# ("US RU CN", "Uranium", 0.01)
# ]
=item C<:h>
Returns the rows as a hash with header information
This is the default mode. You don't need to use the C<:h> flag to get this result
my @a1 = $q.grep(all(rx/US/, rx/RU/), 'Countries'):h; # :h is the default
my @a2 = $q.grep(all(rx/US/, rx/RU/), 'Countries'); # @a1 and @a2 are identical
# Result: The array
# [
# {:Countries("US RU"), :Import("Alcohol"), :Tons(50)},
# {:Countries("US RU CN"), :Import("Uranium"), :Tons(0.01)}
# ]
=item C<:nr>
Like C<:r> but in a hash, with the row number as the key
my %h = $q.grep(all(rx/US/, rx/RU/), 'Countries'):nr;
# Result: The hash
# {
# "2" => $("US RU", "Alcohol", 50),
# "6" => $("US RU CN", "Uranium", 0.01)
# }
=item C<:nh>
Like C<:h> but in a hash, with the row number as the key
my %h = $q.grep(all(rx/US/, rx/RU/), 'Countries'):nh;
# Result: The hash
# {
# "2" => ${:Countries("US RU"), :Import("Alcohol"), :Tons(50)},
# "6" => ${:Countries("US RU CN"), :Import("Uranium"), :Tons(0.01)}
# }
=back
=head2 C<method add-index($column-heading)>
Expand Down
2 changes: 0 additions & 2 deletions pod2htmd.tmp

This file was deleted.

44 changes: 15 additions & 29 deletions t/StaticTable-examples.t
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use Data::StaticTable;

ok(1==1, "Example code");

# == Basic examples of use ==
my $t1 = Data::StaticTable.new(
<Col1 Col2 Col3>,
(
Expand Down Expand Up @@ -33,6 +34,7 @@ say $t1.row(1); # Prints (1 2 3)
my Data::StaticTable::Position @rowlist = (1,3);
my $t2 = $t1.take( @rowlist ); # $t2 is $t1 but only containing rows 1 and 3

# == Query object use and grep ==
my $t3 = Data::StaticTable.new(
5,
(
Expand All @@ -43,43 +45,27 @@ my $t3 = Data::StaticTable.new(
"Turkey" , "Uganda" , "Uruguay" , "Vatican" , "Zambia"
)
);
# Query object
my $q3 = Data::StaticTable::Query.new($t3);
my $q3 = Data::StaticTable::Query.new($t3); # Query object
say $t3.header; # Prints [A B C D E]
$q3.add-index('A'); # Searches (grep) on column A will be faster now

# Rows with a column A that has 'e' AND 'n', at the same time
my Data::StaticTable::Position @r1 = $q3.grep("A", all(rx/n/, rx/e/)); # Rows 1 and 2
# == Rows with a column A that has 'e' AND 'n', at the same time ==
my Data::StaticTable::Position @r1 = $q3.grep(all(rx/n/, rx/e/), 'A'):n; # Rows 1 and 2

# Rows with a column C that has 'y'
my Data::StaticTable::Position @r2 = $q3.grep('C', rx/y/); # Rows 3 and 5
# == Rows with a column C that has 'y' ==
my Data::StaticTable::Position @r2 = $q3.grep(rx/y/, 'C'):n; # Rows 3 and 5
my $t4 = $t3.take(@r2); # Table $t4 is $t3 with rows 3 and 5 only
say $t4.display; # Display contents of $t4

# Simple rowset constructor, each array is a full row
my $t5 = Data::StaticTable.new(
(1000, 2000, 3000, 4000),
(1, 2, 3, 4),
(0.1, 0.2, 0.3, 0.4, 0.5)
);
# == grep modes ==
say $q3.grep(rx/Peru/, 'B'); # Default grep mode, :h
# Displays [B => Peru A => Paraguay E => Singapore D => Rwanda C => Quatar]

say $q3.grep(rx/Peru/, 'B'):n; # :n only get the number of the matching rows
# Displays (4)

#This will create a StaticTable with 5 columns. Each column will be named
#automatically as A, B, C... etc.
my $t6 = Data::StaticTable.new(
(1000, 2000, 3000, 4000),
(1, 2, 3, 4),
(0.1, 0.2, 0.3, 0.4, 0.5)
):data-has-headers;
#This will create a StaticTable with 4 columns, because the first
#row will be taken as the header. The value 0.5 is discarded
say $q3.grep(rx/Peru/, 'B'):r; # r: returns the array of the data, no headers
# Displays [Paraguay Peru Quatar Rwanda Singapore]

my @personal-data =
{ name => 'John', age => 50, car => 'sedan' },
{ name => 'Mary', age => 70, car => 'truck' },
{ name => 'Diego', age => 15, console => 'xbox' };
my $t7 = Data::StaticTable.new(@personal-data):set-of-hashes;
diag $t7.display;
#This will create a table with 3 rows, and the columns
#'name', 'age', 'car', 'console' and fill them appropiately

done-testing;
12 changes: 6 additions & 6 deletions t/StaticTable-perf.t
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,12 @@ $TIME = now - $TIME;
diag "== Index creation 'county' took : $TIME secs. ==";

$TIME = now;
$q1.grep("county", /CLAY/);
$q1.grep(/CLAY/, "county"):n;
$TIME = now - $TIME;
diag "== Search with index (scored $score-county) took : $TIME secs. ==";

$TIME = now;
$q2.grep("county", /CLAY/);
$q2.grep(/CLAY/, "county"):n;
$TIME = now - $TIME;
diag "== Search without index took : $TIME secs. ==";

Expand All @@ -88,18 +88,18 @@ $TIME = now;
$TIME = now - $TIME;
diag "== Index creation 'policyID' took : $TIME secs. ==";
$TIME = now;
$q1.grep("policyID", /167630/);
$q1.grep(/167630/, "policyID"):n;
$TIME = now - $TIME;
diag "== Search with index (scored $score-policyID) took : $TIME secs. ==";
$TIME = now;
$q2.grep("policyID", /167630/);
$q2.grep(/167630/, "policyID"):n;
$TIME = now - $TIME;
diag "== Search without index took : $TIME secs. ==";

ok(
$q1.grep("policyID", /167630/)
($q1.grep(/167630/, "policyID"):n)
~~
$q2.grep("policyID", /167630/),
($q2.grep(/167630/, "policyID"):n),
"Grep with index and without are equivalent"
);

Expand Down
64 changes: 33 additions & 31 deletions t/StaticTable-query.t
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ my $t1 = Data::StaticTable.new(
diag $t1.display;

diag "== Check indexes ==";
my $q1 = Data::StaticTable::Query.new($t1);
for ($t1.header) -> $h { $q1.add-index($h) }; #--- Generate all indexes
my $q1 = Data::StaticTable::Query.new($t1, $t1.header);

ok($q1<Dim4>.elems == 4, "Index of Dim4 has 4 elements");
ok($q1<Dim1>:exists == True, "We can check if a column index has been generated");
Expand All @@ -32,35 +31,52 @@ ok($q1<Dim3><6> ~~ (2, 4), "We can check that the value 6 appears in column Dim3

diag "== Searching without index ==";
my $q2 = Data::StaticTable::Query.new($t1);
ok($q2.grep("Dim3", rx/6/) ~~ (2, 4), "Grep test returns rows 2,4");
ok($q2.grep("Dim3", any(rx/9/, rx/6/)) ~~ (2, 3, 4), "Grep test returns rows 2,3,4" );
ok($q2.grep("Dim2", one(rx/1/, rx/5/)) ~~ (1, 4), "Grep test returns rows 1,4");
ok($q2.grep("Dim2", any(rx/1/, rx/5/)) ~~ (1, 2, 4), "Grep test returns rows 1,2,4");
ok($q2.grep("Dim2", all(rx/1/, rx/5/)) ~~ (2,), "Grep test returns row 2");
ok($q2.grep("Dim2", none(rx/1/, rx/5/)) ~~ (3, 5), "Grep test returns rows 3,5");
ok($q2.grep("Dim1", any(rx/ALPHA/, rx/0/)) ~~ (4, 5, 6), "Grep test returns rows 4,5,6");
ok($q2.grep(rx/6/, "Dim3"):n ~~ (2, 4), "Grep test returns rows 2,4");
ok($q2.grep(any(rx/9/, rx/6/), "Dim3"):n ~~ (2, 3, 4), "Grep test returns rows 2,3,4" );
ok($q2.grep(one(rx/1/, rx/5/), "Dim2"):n ~~ (1, 4), "Grep test returns rows 1,4");
ok($q2.grep(any(rx/1/, rx/5/), "Dim2"):n ~~ (1, 2, 4), "Grep test returns rows 1,2,4");
ok($q2.grep(all(rx/1/, rx/5/), "Dim2"):n ~~ (2,), "Grep test returns row 2");
ok($q2.grep(none(rx/1/, rx/5/), "Dim2"):n ~~ (3, 5), "Grep test returns rows 3,5");
ok($q2.grep(any(rx/ALPHA/, rx/0/), "Dim1"):n ~~ (4, 5, 6), "Grep test returns rows 4,5,6");

ok(
$q1.grep("Dim2", any(rx/1/, rx/5/)) ~~ $q2.grep("Dim2", any(rx/1/, rx/5/)),
($q1.grep(any(rx/1/, rx/5/), "Dim2"):n) ~~ ($q2.grep(any(rx/1/, rx/5/), "Dim2"):n),
"Grep with index and without are equivalent (#1)"
);

ok(
$q1.grep("Dim2", all(rx/1/, rx/5/)) ~~ $q2.grep("Dim2", all(rx/1/, rx/5/)),
($q1.grep(all(rx/1/, rx/5/), "Dim2"):n) ~~ ($q2.grep(all(rx/1/, rx/5/), "Dim2"):n),
"Grep with index and without are equivalent (#2)"
);

ok(
$q1.grep("Dim2", none(rx/1/, rx/5/)) ~~ $q2.grep("Dim2", none(rx/1/, rx/5/)),
($q1.grep(none(rx/1/, rx/5/), "Dim2"):n) ~~ ($q2.grep(none(rx/1/, rx/5/), "Dim2"):n),
"Grep with index and without are equivalent (#3)"
);

diag "== Create a new table from grep results ==";
my Data::StaticTable::Position @rows;
@rows.append($q2.grep("Dim2", one(rx/1/, rx/5/)));
@rows.append($q2.grep("Dim2", all(rx/1/, rx/5/)));
diag "== Check different grep modes ==";
my $rx = any(rx/ALPHA/, rx/0/);
diag $q1.grep($rx, "Dim1"):n.perl;
ok (($q1.grep($rx, "Dim1"):n) ~~ (4,5,6), "Expected rows");

my $t2 = $t1.take(@rows);
diag $q1.grep($rx, "Dim1"):r.perl;
ok($q1.grep($rx, "Dim1"):r.elems == 3, "Expected 3 rows");

diag $q1.grep($rx, "Dim1"):h.perl;
ok($q1.grep($rx, "Dim1"):h.elems == 3, "Expected 3 rows");

diag $q1.grep($rx, "Dim1"):nr.perl;
ok (($q1.grep($rx, "Dim1"):nr.keys) ~~ (4,5,6), "Expected rows indexes");

diag $q1.grep($rx, "Dim1"):nh.perl;
ok (($q1.grep($rx, "Dim1"):nh.keys) ~~ (4,5,6), "Expected rows indexes");

diag "== Create a new table from grep results of row numbers ==";
my Data::StaticTable::Position @rownums;
@rownums.append( $q2.grep(one(rx/1/, rx/5/), "Dim2"):n );
@rownums.append( $q2.grep(all(rx/1/, rx/5/), "Dim2"):n );

my $t2 = $t1.take(@rownums);
#-- This should generate a StaticTable with rows 1, 4 and 2. IN THAT ORDER.
diag $t2.display;

Expand All @@ -84,18 +100,4 @@ my $q33 = EVAL $q31.perl;
diag $q33.perl;
ok($q31.perl eq $q33.perl, "Can be serialized using .perl method");

diag "== More grep testing ==";
my $t4 = Data::StaticTable.new(
<Countries Import Tons>,
(
'US PE CL', 'Copper', 100,
'US RU', 'Alcohol', 50,
'IL UK', 'Processor', 12,
'UK', 'Tuxedo', 1,
'JP CN', 'Tuna', 10
)
);
my $q4 = Data::StaticTable::Query.new($t4, $t4.header);


done-testing;

0 comments on commit 9a757fa

Please sign in to comment.