Permalink
Browse files

before 0.76, yes-stem and no-stem were equal. now, yes-stem is 15% sl…

…ower. but accurate
  • Loading branch information...
1 parent ace4289 commit a05bf5605e1850fe5974c59ee4479d3bd608c24f @karpet committed Aug 9, 2012
Showing with 44 additions and 0 deletions.
  1. +44 −0 example/stemmer.pl
View
@@ -0,0 +1,44 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use Search::Tools;
+use Search::Tools::Snipper;
+use Search::Tools::XML;
+use Benchmark qw(:all);
+use Lingua::Stem::Snowball;
+
+my $stemmer = Lingua::Stem::Snowball->new( lang => 'en' );
+my $html = Search::Tools->slurp('t/docs/big-C-Child-abuse.html');
+my $buf = Search::Tools::XML->strip_markup($html);
+my $query = Search::Tools->parser->parse('child abuse');
+my $stemmed_query = Search::Tools->parser(
+ stemmer => sub {
+ return $stemmer->stem( $_[1] );
+ }
+)->parse('child abuse');
+
+my $snipper = Search::Tools::Snipper->new(
+ query => $query,
+ occur => 1,
+ context => 25,
+ max_chars => 190,
+);
+
+my $stemming_snipper = Search::Tools::Snipper->new(
+ query => $stemmed_query,
+ occur => 1,
+ context => 25,
+ max_chars => 190,
+);
+
+cmpthese(
+ 100,
+ { 'no-stem' => sub {
+ my $snip = $snipper->snip($buf);
+ },
+
+ 'yes-stem' => sub {
+ my $snip = $stemming_snipper->snip($buf);
+ },
+ }
+);

0 comments on commit a05bf56

Please sign in to comment.