Permalink
Browse files

Implement search ranking

  • Loading branch information...
kappa committed Apr 2, 2010
1 parent 8f3a334 commit 1bd15d93adde618a64c62190a3c8c13a89622f1d
Showing with 36 additions and 10 deletions.
  1. +36 −10 lib/CpanHub.pm
View
@@ -1,14 +1,13 @@
package CpanHub;
-use strict;
-use warnings;
+use Modern::Perl;
use parent 'Exporter';
use URI;
use URI::Escape;
use AnyEvent::HTTP;
use XML::Simple qw/:strict/;
-use List::MoreUtils qw/first_index/;
+use List::MoreUtils qw/first_index part/;
use DateTime::Format::RFC3339;
use signatures;
@@ -42,7 +41,8 @@ sub cpan_search_req($q, $cb) {
author_link => $_->{author}->[0]->{link}->[0],
date => do { (my $d = $_->{released}->[0]) =~ s/(\d+)[a-z]{2}/$1/; $d },
author => ($_->{author}->[0]->{link}->[0] =~ /~([^\/]+)/),
- } } grep { $_->{name}
+ } } grep {
+ $_->{name}
&& $_->{link}
&& $_->{description}
&& $_->{author}
@@ -54,7 +54,10 @@ sub cpan_search_req($q, $cb) {
}
sub github_search_req($q, $cb) {
- my $query = uri_escape("($q OR description:$q) AND language:Perl AND fork:false");
+ (my $gh_q = $q) =~ s/::|-/ /g;
+ # XXX looks like description: search is broken on github right now
+ #my $query = uri_escape("($gh_q OR description:$q) AND language:Perl AND fork:false");
+ my $query = uri_escape("$gh_q AND language:Perl AND fork:false");
my $dtf = DateTime::Format::RFC3339->new;
_call('http://github.com/api/v2/xml/repos/search/' . $query, { }, sub {
@@ -63,12 +66,13 @@ sub github_search_req($q, $cb) {
my $rv = [ map { {
name => $_->{name}->[0],
'link' => "http://github.com/$_->{username}->[0]/$_->{name}->[0]",
- desc => $_->{description}->[0],
+ desc => ref $_->{description}->[0] ? '' : $_->{description}->[0],
author_link => "http://github.com/$_->{username}->[0]",
date => $dtf->parse_datetime($_->{pushed}->[0])->strftime('%e %B %Y'),
author => $_->{username}->[0],
ghscore => $_->{score}->[0]->{content},
- } } grep { $_->{name}
+ } } grep {
+ $_->{name}
&& $_->{description}
&& $_->{pushed}
&& $_->{username}
@@ -78,13 +82,20 @@ sub github_search_req($q, $cb) {
});
}
+sub _gh_query($q) {
+ my $gh_q = lc $q;
+ $gh_q =~ s/::/--?/g;
+ $gh_q =~ s/\./\\./g;
+
+ return $gh_q;
+}
+
sub merge_cpan_and_github($cpan, $gh, $q) {
my @res;
foreach my $cp (@$cpan) {
my $ghi = first_index {
- (my $name = lc $cp->{name}) =~ s/::/--?/g;
- $name =~ s/\./\\./g;
+ my $name = _gh_query($cp->{name});
$_->{name} =~ /^ $name $/ixs;
} @$gh;
@@ -93,7 +104,22 @@ sub merge_cpan_and_github($cpan, $gh, $q) {
$cp->{gh} = splice @$gh, $ghi, 1;
}
- return [@$cpan, @$gh];
+ my $gh_q = _gh_query($q);
+
+ # ranking algorithm:
+ # ---
+ # we reorder all results into 5 groups:
+ # 0. github with query exactly matched in name
+ # 1. CPAN with query matched in name
+ # 2. github with query matched in name
+ # 3. rest of CPAN results
+ # 4. rest of github results # this group is usually empty because current gh search use only names
+ # the original order inside the groups is retained
+
+ my ($cpan1, $cpan2) = part { $_->{name} =~ /$q/i ? 0 : 1 } @$cpan;
+ my ($gh0, $gh1, $gh2) = part { $_->{name} =~ /^$gh_q$/i ? 0 : $_->{name} =~ /$gh_q/i ? 1 : 2 } @$gh;
+
+ return [ map { @$_ } grep defined, $gh0, $cpan1, $gh1, $cpan2, $gh2 ];
}
1;

0 comments on commit 1bd15d9

Please sign in to comment.