Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate to MetaCPAN V1 API #520

Merged
merged 1 commit into from
May 12, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
190 changes: 21 additions & 169 deletions lib/Menlo/Index/MetaCPAN.pm
Expand Up @@ -11,15 +11,13 @@ use parent 'CPAN::Common::Index';
use Class::Tiny qw/uri include_dev/;

use Carp;
use CPAN::Meta::Requirements;
use HTTP::Tiny;
use HTTP::Tinyish;
use JSON::PP ();
use Time::Local ();
use version;

=attr uri

A URI for the endpoint of MetaCPAN. The default is L<http://api.metacpan.org/v0/>.
A URI for the endpoint of MetaCPAN. The default is L<https://fastapi.metacpan.org/v1/download_url/>.

=cut

Expand All @@ -32,7 +30,7 @@ Whether an index should include dev releases on PAUSE. Defaults to false.
sub BUILD {
my $self = shift;
my $uri = $self->uri;
$uri = "http://api.metacpan.org/v0/"
$uri = "https://fastapi.metacpan.org/v1/download_url/"
unless defined $uri;
# ensure URI ends in '/'
$uri =~ s{/?$}{/};
Expand All @@ -51,81 +49,29 @@ sub search_packages {
} elsif ( $args->{version_range} ) {
$range = $args->{version_range};
}

my @filter = $self->_maturity_filter($args->{package}, $range);

my $query = { filtered => {
(@filter ? (filter => { and => \@filter }) : ()),
query => { nested => {
score_mode => 'max',
path => 'module',
query => { custom_score => {
metacpan_script => "score_version_numified",
query => { constant_score => {
filter => { and => [
{ term => { 'module.authorized' => JSON::PP::true() } },
{ term => { 'module.indexed' => JSON::PP::true() } },
{ term => { 'module.name' => $args->{package} } },
$self->_version_to_query($args->{package}, $range),
] }
} },
} },
} },
} };

my $module_uri = $self->uri . "file/_search?source=";
$module_uri .= $self->_encode_json({
query => $query,
fields => [ 'date', 'release', 'author', 'module', 'status' ],
});

my($release, $author, $module_version);

my $res = HTTP::Tiny->new->get($module_uri);
return unless $res->{success};

my $module_meta = eval { JSON::PP::decode_json($res->{content}) };

my $file = $self->_find_best_match($module_meta);
if ($file) {
$release = $file->{release};
$author = $file->{author};
my $module_matched = (grep { $_->{name} eq $args->{package} } @{$file->{module}})[0];
$module_version = $module_matched->{version};
}

return unless $release;

my $dist_uri = $self->uri . "release/_search?source=";
$dist_uri .= $self->_encode_json({
filter => { and => [
{ term => { 'release.name' => $release } },
{ term => { 'release.author' => $author } },
]},
fields => [ 'download_url' ],
});

$res = HTTP::Tiny->new->get($dist_uri);
my %query = (
($self->include_dev ? (dev => 1) : ()),
($range ? (version => $range) : ()),
);
my $query = join "&", map { "$_=" . $self->_uri_escape($query{$_}) } sort keys %query;

my $uri = $self->uri . $args->{package} . ($query ? "?$query" : "");
my $res = HTTP::Tinyish->new->get($uri);
return unless $res->{success};

my $dist_meta = eval { JSON::PP::decode_json($res->{content}) };

if ($dist_meta) {
$dist_meta = $dist_meta->{hits}{hits}[0]{fields};
}

if ($dist_meta && $dist_meta->{download_url}) {
(my $distfile = $dist_meta->{download_url}) =~ s!.+/authors/id/\w/\w\w/!!;

my $res = {
package => $args->{package},
version => $module_version,
version => $dist_meta->{version},
uri => "cpan:///distfile/$distfile",
};

if ($file->{status} eq 'backpan') {
if ($dist_meta->{status} eq 'backpan') {
$res->{download_uri} = $self->_download_uri("http://backpan.perl.org", $distfile);
} elsif ($self->_parse_date($file->{date}) > time() - 24 * 60 * 60) {
} elsif ($self->_parse_date($dist_meta->{date}) > time() - 24 * 60 * 60) {
$res->{download_uri} = $self->_download_uri("http://cpan.metacpan.org", $distfile);
}

Expand All @@ -137,115 +83,21 @@ sub search_packages {

sub _parse_date {
my($self, $date) = @_;
my @date = $date =~ /^(\d{4})-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)(?:\.\d+)?Z$/;
my @date = $date =~ /^(\d{4})-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)$/;
Time::Local::timegm($date[5], $date[4], $date[3], $date[2], $date[1] - 1, $date[0] - 1900);
}

sub _uri_escape {
my($self, $string) = @_;
$string =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("%%%02x",ord($1))/eg;
$string;
}

sub _download_uri {
my($self, $base, $distfile) = @_;
join "/", $base, "authors/id", substr($distfile, 0, 1), substr($distfile, 0, 2), $distfile;
}

sub _encode_json {
my($self, $data) = @_;
my $json = JSON::PP::encode_json($data);
$json =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("%%%02x",ord($1))/eg;
$json;
}

sub _version_to_query {
my($self, $module, $version) = @_;

return () unless $version;

my $requirements = CPAN::Meta::Requirements->new;
$requirements->add_string_requirement($module, $version || '0');

my $req = $requirements->requirements_for_module($module);

if ($req =~ s/^==\s*//) {
return {
term => { 'module.version' => $req },
};
} elsif ($req !~ /\s/) {
return {
range => { 'module.version_numified' => { 'gte' => $self->_numify($req) } },
};
} else {
my %ops = qw(< lt <= lte > gt >= gte);
my(%range, @exclusion);
my @requirements = split /,\s*/, $req;
for my $r (@requirements) {
if ($r =~ s/^([<>]=?)\s*//) {
$range{$ops{$1}} = $self->_numify($r);
} elsif ($r =~ s/\!=\s*//) {
push @exclusion, $self->_numify($r);
}
}

my @filters= (
{ range => { 'module.version_numified' => \%range } },
);

if (@exclusion) {
push @filters, {
not => { or => [ map { +{ term => { 'module.version_numified' => $self->_numify($_) } } } @exclusion ] },
};
}

return @filters;
}
}

# Apparently MetaCPAN numifies devel releases by stripping _ first
sub _numify {
my($self, $ver) = @_;
$ver =~ s/_//g;
version->new($ver)->numify;
}

sub _maturity_filter {
my($self, $module, $version) = @_;

my @filters;

if ($self->include_dev) {
# backpan'ed dev release are considered "cancelled"
push @filters, { not => { term => { status => 'backpan' } } };
}

my $explicit_version = $version && $version =~ /==/;

unless ($self->include_dev or $explicit_version) {
push @filters, { term => { maturity => 'released' } };
}

return @filters;
}

sub by_version {
# version: higher version that satisfies the query
$b->{fields}{module}[0]{"version_numified"} <=> $a->{fields}{module}[0]{"version_numified"};
}

sub by_status {
# prefer non-backpan dist
my %s = (latest => 3, cpan => 2, backpan => 1);
$s{ $b->{fields}{status} } <=> $s{ $a->{fields}{status} };
}

sub by_date {
# prefer new uploads
$b->{fields}{date} cmp $a->{fields}{date};
}

sub _find_best_match {
my($self, $match, $version) = @_;
return unless $match && @{$match->{hits}{hits} || []};
my @hits = sort { by_version || by_status || by_date } @{$match->{hits}{hits}};
$hits[0]->{fields};
}

sub index_age { return time } # pretend always current

sub search_authors { return } # not supported
Expand Down