Skip to content

Commit

Permalink
* refactor our filters parser and update Makefile with requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
ruz committed Jan 22, 2009
1 parent ea40bd0 commit 6a68fe4
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 74 deletions.
3 changes: 3 additions & 0 deletions Makefile.PL
Expand Up @@ -23,6 +23,9 @@ requires 'Class::Date';
# converts unicode into readable ASCII
requires 'Text::Unidecode';

requires 'Regexp::Common';
requires 'Parse::BooleanLogic' => '0.06';

requires 'Config::General'; # This should reflect the config file format you've chosen
# See Catalyst::Plugin::ConfigLoader for supported formats
catalyst;
Expand Down
154 changes: 80 additions & 74 deletions lib/Plagger/Plugin/Globus/GrepTags.pm
Expand Up @@ -5,6 +5,9 @@ use utf8;
package Plagger::Plugin::Globus::GrepTags;
use base qw( Plagger::Plugin );

use Parse::BooleanLogic '0.06';
use Regexp::Common qw(delimited);

sub register {
my($self, $context) = @_;
$context->register_hook(
Expand All @@ -13,12 +16,6 @@ sub register {
);
}

# TODO:
# add support for advanced filtering, for example:
# (catalyst AND perl) OR
# (catalyst AND !adobe AND !ati AND !cisco AND !text:"adobe flash catalyst" AND !text:"flash catalyst")
# All this should be stored in a plugin's config

sub feed {
my($self, $context, $args) = @_;

Expand All @@ -30,20 +27,44 @@ sub feed {
}
}

use Parse::BooleanLogic;

{ my $cache;
sub boolean_parser {
sub filter_entries {
my $self = shift;
return $cache ||= Parse::BooleanLogic->new( operators => ['', 'OR'] );
my $feed = shift;

my $filter = $feed->meta->{'globus'}{'config'}{'filter'};
return unless $filter;

my $parser = $self->parser;
my $tree = $self->parse_filter( $filter );
my $solver = $self->filter_solver;

foreach my $entry ( $feed->entries ) {
next if $parser->solve( $tree, $solver, $entry );

$self->log(
debug => "Deleting " . $entry->permalink
." since it doesn't match filter '$filter'"
);

$feed->delete_entry($entry);
}
}

{ my %cache;
sub parse_filter {
my $self = shift;
my $string = shift;

return $cache{$string} ||= $self->parser->as_array(
$string, operand_cb => $self->condition_parser
);
} }

use Regexp::Common qw(delimited);
my $re_delim = qr{$RE{delimited}{-delim=>qq{\'\"}}};
my $re_value = qr{$re_delim|[^"']+};
my $re_cs_values = qr{$re_value(?:,$re_value)*};


sub dq($) {
my $s = $_[0];
return $s unless $s =~ /^$re_delim$/o;
Expand All @@ -53,71 +74,56 @@ sub dq($) {
return $s;
}

our $operation_parser = sub {
my $op = shift;
if ( $op =~ /^(!?)(tag|author|content|title):($re_cs_values)$/o ) {
my ($negative, $field, $value) = ($1, $2, $3);
my @values = map dq $_, grep defined && length, split /,(?=$re_value|\z)/, $value;
return { negative => $1, op => $field, values => \@values };
} elsif ( $op =~ /^(has)(_no)?:(tag)$/ ) {
return { negative => $2, op => $1, values => [$3] };
} else {
return { op => 'content', values => [$op] }
}
};

our $boolean_solver = sub {
my ($cond, $entry) = @_;

use Data::Dumper;
print Dumper( $cond );

if ( $cond->{'op'} eq 'tag' ) {
my $tags = $entry->tags;
foreach my $et ( @{ $entry->tags } ) {
return !$cond->{'negative'} if grep lc($_) eq lc($et), @{ $cond->{'values'} };
}
return $cond->{'negative'};
} elsif ( $cond->{'has'} && $cond->{'values'}[0] eq 'tag' ) {
return !$cond->{'negative'} if @{ $entry->tags };
return $cond->{'negative'};
} elsif ( $cond->{'op'} eq 'author' ) {
die "TODO: not implemented";
} elsif ( $cond->{'op'} eq 'title' || $cond->{'op'} eq 'content' ) {
my $text = lc $entry->title_text;
$text .= "\n". $entry->body_text
if $cond->{'op'} eq 'content';

foreach my $v ( @{ $cond->{'values'} } ) {
return !$cond->{'negative'} if index($text, lc($v)) > 0;
}
return $cond->{'negative'};
} else {
warn "Don't know how to deal with '$cond', yet";
return 1;
}
};

sub filter_entries {
{ my $cache;
sub parser {
my $self = shift;
my $feed = shift;

my $filter = $feed->meta->{'globus'}{'config'}{'filter'};
return unless $filter;

my $parser = $self->boolean_parser;
my $tree = $parser->as_array( $filter, operand_cb => $operation_parser );

foreach my $entry ( $feed->entries ) {
next if $parser->solve( $tree, $boolean_solver, $entry );
return $cache ||= Parse::BooleanLogic->new( operators => ['', 'OR'] );
} }

$self->log(
debug => "Deleting " . $entry->permalink
." since it doesn't match filter '$filter'"
);
sub filter_solver {
return sub {
my ($cond, $entry) = @_;

if ( $cond->{'op'} eq 'tag' ) {
my $tags = $entry->tags;
foreach my $et ( @{ $entry->tags } ) {
return !$cond->{'negative'} if grep lc($_) eq lc($et), @{ $cond->{'values'} };
}
return $cond->{'negative'};
} elsif ( $cond->{'has'} && $cond->{'values'}[0] eq 'tag' ) {
return !$cond->{'negative'} if @{ $entry->tags };
return $cond->{'negative'};
} elsif ( $cond->{'op'} eq 'author' ) {
die "TODO: not implemented";
} elsif ( $cond->{'op'} eq 'title' || $cond->{'op'} eq 'content' ) {
my $text = lc $entry->title_text;
$text .= "\n". $entry->body_text
if $cond->{'op'} eq 'content';

foreach my $v ( @{ $cond->{'values'} } ) {
return !$cond->{'negative'} if index($text, lc($v)) > 0;
}
return $cond->{'negative'};
} else {
warn "Don't know how to deal with '$cond', yet";
return 1;
}
};
}

$feed->delete_entry($entry);
}
sub condition_parser {
return sub {
my $op = shift;
if ( $op =~ /^(!?)(tag|author|content|title):($re_cs_values)$/o ) {
my ($negative, $field, $value) = ($1, $2, $3);
my @values = map dq $_, grep defined && length, split /,(?=$re_value|\z)/, $value;
return { negative => $1, op => $field, values => \@values };
} elsif ( $op =~ /^(has)(_no)?:(tag)$/ ) {
return { negative => $2, op => $1, values => [$3] };
} else {
return { op => 'content', values => [$op] }
}
};
}

1;

0 comments on commit 6a68fe4

Please sign in to comment.