Permalink
Browse files

clean up the API, add files

  • Loading branch information...
1 parent e72c289 commit a7f5b2ed7913878f11cedabc177c405e56f3d765 @kazuho committed Sep 23, 2010
Showing with 250 additions and 34 deletions.
  1. +4 −0 Changes
  2. +13 −0 Makefile.PL
  3. +85 −0 README
  4. +126 −20 lib/String/Filter.pm
  5. +22 −14 t/00base.t
View
@@ -0,0 +1,4 @@
+Revision history for Perl extension String::Filter
+
+0.01
+ - initial release
View
@@ -0,0 +1,13 @@
+use strict;
+use inc::Module::Install;
+
+name 'String-Filter';
+all_from 'lib/String/Filter.pm';
+readme_from 'lib/String/Filter.pm';
+
+requires 'Regexp::Assemble';
+test_requires 'HTML::Entities';
+test_requires 'Test::More' => 0.88; # done_testing()
+test_requires 'URI::Escape';
+
+WriteAll;
View
@@ -0,0 +1,85 @@
+NAME
+ String::Filter - a regexp-based string filter
+
+SYNOPSIS
+ # define the rules that convert tweets to HTML
+ # (handles url, @user, #hash)
+ my $sf = String::Filter->new(
+ rules => [
+ 'http://[A-Za-z0-9_\-\~\.\%\?\#\@/]+' => sub {
+ my $url = shift;
+ sprintf(
+ '<a href="%s">%s</a>',
+ encode_entities($url),
+ encode_entities($url),
+ );
+ },
+ '(?:^|\s)\@[A-Za-z0-9_]+' => sub {
+ $_[0] =~ /^(.*?\@)(.*)$/;
+ my ($prefix, $user) = ($1, $2);
+ sprintf(
+ '%s<a href="http://twitter.com/%s">%s</a>',
+ encode_entities($prefix),
+ encode_entities($user),
+ encode_entities($user),
+ );
+ },
+ '(?:^|\s)#[A-Za-z0-9_]+' => sub {
+ $_[0] =~ /^(.?)(#.*)$/;
+ my ($prefix, $hashtag) = ($1, $2);
+ sprintf(
+ '%s<a href="http://twitter.com/search?q=%s">%s</a>',
+ encode_entities($prefix),
+ encode_entities(uri_escape($hashtag)),
+ $hashtag,
+ );
+ },
+ ],
+ default_rule => sub {
+ my $text = shift;
+ encode_entities($text);
+ },
+ );
+
+ # convert a tweet to HTML
+ my $html = $sf->filter($tweet);
+
+DESCRIPTION
+ The module is a regexp-based string filter, that can merge multiple
+ conversion rules to convert strings. The primary target is to convert
+ inline markups (such as tweets of Twitter) to HTML.
+
+FUNCTIONS
+ new
+ instantiates the filter object. Takes a hash as an argument recognizing
+ the attributes below.
+
+ rules
+ arrayref of more than zero "regexp => subref"s. For more information see
+ add_rule.
+
+ default_rule
+ default filter function. See the default_rule accessor for more
+ information.
+
+ filter($input)
+ Converts the input string using the given rules and returns it.
+
+ add_rule($regexp => $subref)
+ adds a conversion rule. For each substring matching the regular
+ expression the subref will be invoked with the substring as the only
+ argument. The subref should return the filtered output of the substring.
+
+ default_rule([$subref])
+ setter / getter for the default conversion function. The subref should
+ accept a string and return the filtered output of the input.
+
+COPYRIGHT
+ Copyright (C) 2010 Cybozu Labs, Inc. Written by Kazuho Oku.
+
+LICENSE
+ This program is free software; you can redistribute it and/or modify it
+ under the same terms as Perl itself.
+
+ See <http://www.perl.com/perl/misc/Artistic.html>
+
View
@@ -7,44 +7,150 @@ use Regexp::Assemble;
sub new {
my $klass = shift;
- die "# of arguments is not even"
- unless @_ % 2 == 0;
+ my $args = @_ == 1 ? $_[0] : +{ @_ };
my $self = bless {
- handlers => [],
- default_handler => sub { $_[0] },
+ rules => [],
+ default_rule => $args->{default_rule} || sub { $_[0] },
+ _ra => Regexp::Assemble->new(),
+ _re => undef,
};
- my $ra = Regexp::Assemble->new();
+ $self->add_rule(@{$args->{rules}})
+ if $args->{rules};
+ return $self;
+}
+
+sub add_rule {
+ my $self = shift;
+ die "# of arguments is not even"
+ unless @_ % 2 == 0;
while (@_) {
my $pattern = shift;
my $subref = shift;
- if ($pattern ne '') {
- $ra->add($pattern);
- push @{$self->{handlers}}, [ qr/$pattern/, $subref ];
- } else {
- $self->{default_handler} = $subref;
- }
+ $self->{_ra}->add($pattern);
+ push @{$self->{rules}}, [ qr/^$pattern/, $subref ];
+ $self->{_re} = undef;
}
- my $assembled = $ra->re;
- $self->{match} = qr/($assembled)/;
- return $self;
}
-sub transform {
+sub default_rule {
+ my $self = shift;
+ $self->{default_rule} = shift
+ if @_;
+ return $self->{default_rule};
+}
+
+sub filter {
my ($self, $text) = @_;
+ $self->{_re} ||= do {
+ my $assembled = $self->{_ra}->re;
+ qr/($assembled)/;
+ };
my @ret;
- for my $token (split /$self->{match}/, $text) {
+ for my $token (split /$self->{_re}/, $text) {
+ next if $token eq '';
# FIXME do we have to do this O(n) every time?
- for my $handler (@{$self->{handlers}}) {
- if ($token =~ /$handler->[0]/) {
- push @ret, $handler->[1]->($token);
+ for my $rule (@{$self->{rules}}) {
+ if ($token =~ /$rule->[0]/) {
+ push @ret, $rule->[1]->($token);
goto NEXT_TOKEN;
}
}
- push @ret, $self->{default_handler}->($token);
+ push @ret, $self->{default_rule}->($token);
NEXT_TOKEN:
;
}
return join '', @ret;
}
1;
+__END__
+
+=head1 NAME
+
+String::Filter - a regexp-based string filter
+
+=head1 SYNOPSIS
+
+ # define the rules that convert tweets to HTML
+ # (handles url, @user, #hash)
+ my $sf = String::Filter->new(
+ rules => [
+ 'http://[A-Za-z0-9_\-\~\.\%\?\#\@/]+' => sub {
+ my $url = shift;
+ sprintf(
+ '<a href="%s">%s</a>',
+ encode_entities($url),
+ encode_entities($url),
+ );
+ },
+ '(?:^|\s)\@[A-Za-z0-9_]+' => sub {
+ $_[0] =~ /^(.*?\@)(.*)$/;
+ my ($prefix, $user) = ($1, $2);
+ sprintf(
+ '%s<a href="http://twitter.com/%s">%s</a>',
+ encode_entities($prefix),
+ encode_entities($user),
+ encode_entities($user),
+ );
+ },
+ '(?:^|\s)#[A-Za-z0-9_]+' => sub {
+ $_[0] =~ /^(.?)(#.*)$/;
+ my ($prefix, $hashtag) = ($1, $2);
+ sprintf(
+ '%s<a href="http://twitter.com/search?q=%s">%s</a>',
+ encode_entities($prefix),
+ encode_entities(uri_escape($hashtag)),
+ $hashtag,
+ );
+ },
+ ],
+ default_rule => sub {
+ my $text = shift;
+ encode_entities($text);
+ },
+ );
+
+ # convert a tweet to HTML
+ my $html = $sf->filter($tweet);
+
+=head1 DESCRIPTION
+
+The module is a regexp-based string filter, that can merge multiple conversion rules for converting strings. The primary target of the module is to convert inline markups (such as the tweets of Twitter) to HTML.
+
+=head1 FUNCTIONS
+
+=head2 new
+
+instantiates the filter object. Takes a hash as an argument recognizing the attributes below.
+
+=head3 rules
+
+arrayref of more than zero "regexp => subref"s. For more information see L<add_rule>.
+
+=head3 default_rule
+
+default filter function. See the L<default_rule> accessor for more information.
+
+=head2 filter($input)
+
+Converts the input string using the given rules and returns it.
+
+=head2 add_rule($regexp => $subref)
+
+adds a conversion rule. For each substring matching the regular expression the subref will be invoked with the substring as the only argument. The subref should return the filtered output of the substring.
+
+=head3 default_rule([$subref])
+
+setter / getter for the default conversion function. The subref should accept a string and return the filtered output of the input.
+
+=head1 COPYRIGHT
+
+Copyright (C) 2010 Cybozu Labs, Inc. Written by Kazuho Oku.
+
+=head1 LICENSE
+
+This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
+
+See L<http://www.perl.com/perl/misc/Artistic.html>
+
+=cut
View
@@ -1,29 +1,37 @@
use strict;
use warnings;
-use HTML::Entities;
+use HTML::Entities qw(encode_entities);
use Test::More;
+use URI::Escape qw(uri_escape);
use_ok('String::Filter');
-my $tf = String::Filter->new(
- 'http://[A-Za-z0-9_\.\%\?\#\@/]+' => sub {
- my $url = shift;
- qq{<a href="@{[encode_entities($url)]}">@{[encode_entities($url)]}</a>};
- },
- '\@[A-Za-z0-9_]+' => sub {
- my $name = shift;
- $name =~ s/^\@//;
- qq{<a href="http://twitter.com/@{[encode_entities($name)]}">\@$name</a>};
- },
- '' => sub {
+my $sf = String::Filter->new(
+ rules => [
+ 'http://[A-Za-z0-9_\-\~\.\%\?\#\@/]+' => sub {
+ my $url = shift;
+ qq{<a href="@{[encode_entities($url)]}">@{[encode_entities($url)]}</a>};
+ },
+ '(?:^|\s)\@[A-Za-z0-9_]+' => sub {
+ $_[0] =~ /^(.*?\@)(.*)$/;
+ my ($prefix, $user) = ($1, $2);
+ qq{$prefix<a href="http://twitter.com/@{[encode_entities($user)]}">$user</a>};
+ },
+ '(?:^|\s)#[A-Za-z0-9_]+' => sub {
+ $_[0] =~ /^(.?)(#.*)$/;
+ my ($prefix, $hashtag) = ($1, $2);
+ qq{$prefix<a href="http://twitter.com/search?q=@{[encode_entities(uri_escape($hashtag))]}"><b>@{[encode_entities($hashtag)]}</b></a>};
+ },
+ ],
+ default_rule => sub {
my $text = shift;
encode_entities($text);
},
);
is(
- $tf->transform('@kazuho http://hello.com/ yesyes <b>'),
- '<a href="http://twitter.com/kazuho">@kazuho</a> <a href="http://hello.com/">http://hello.com/</a> yesyes &lt;b&gt;',
+ $sf->filter('@kazuho @kazuho foo@bar http://hello.com/ yesyes <b> #hash'),
+ '@<a href="http://twitter.com/kazuho">kazuho</a> @<a href="http://twitter.com/kazuho">kazuho</a> foo@bar <a href="http://hello.com/">http://hello.com/</a> yesyes &lt;b&gt; <a href="http://twitter.com/search?q=%23hash"><b>#hash</b></a>',
);
done_testing;

0 comments on commit a7f5b2e

Please sign in to comment.