Skip to content

Commit

Permalink
moved authorized logic into the indexer
Browse files Browse the repository at this point in the history
  • Loading branch information
monken committed Feb 26, 2012
1 parent 1ebd90a commit 1d58f5f
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 185 deletions.
46 changes: 44 additions & 2 deletions lib/MetaCPAN/Document/File.pm
Expand Up @@ -195,7 +195,7 @@ has abstract =>
has description =>
( is => 'ro', required => 1, lazy_build => 1, index => 'analyzed' );
has status => ( is => 'ro', required => 1, default => 'cpan' );
has authorized => ( required => 1, is => 'ro', isa => 'Bool', default => 1 );
has authorized => ( required => 1, is => 'rw', isa => 'Bool', default => 1 );
has maturity => ( is => 'ro', required => 1, default => 'released' );
has directory => ( is => 'ro', required => 1, isa => 'Bool', default => 0 );
has level => ( is => 'ro', required => 1, isa => 'Int', lazy_build => 1 );
Expand Down Expand Up @@ -430,6 +430,48 @@ sub _build_pod {
return \$text;
}

=head2 set_authorized
Expects a C<$perms> parameter which is a HashRef. The key is the module name
and the value an ArrayRef of author names who are allowed to release
that module.
The method returns a list of unauthorized, but indexed modules.
Unauthorized modules are modules that were uploaded in the name of a
different author than stated in the C<06perms.txt.gz> file. One problem
with this file is, that it doesn't record historical data. It may very
well be that an author was authorized to upload a module at the time.
But then his co-maintainer rights might have been revoked, making consecutive
uploads of that release unauthorized. However, since this script runs
with the latest version of C<06perms.txt.gz>, the former upload will
be flagged as unauthorized as well. Same holds the other way round,
a previously unauthorized release would be flagged authorized if the
co-maintainership was added later on.
If a release contains unauthorized modules, the whole release is marked
as unauthorized as well.
=cut

sub set_authorized {
my ( $self, $perms ) = @_;
# only authorized perl distributions make it into the CPAN
return () if ( $self->distribution eq 'perl' );
foreach my $module ( @{ $self->module } ) {
$module->authorized(0)
if ( $perms->{ $module->name } && !grep { $_ eq $self->author }
@{ $perms->{ $module->name } } );
}
$self->authorized(0)
if ( $self->authorized
&& $self->documentation
&& $perms->{ $self->documentation }
&& !grep { $_ eq $self->author }
@{ $perms->{ $self->documentation } } );
return grep { !$_->authorized && $_->indexed } @{ $self->module };
}

__PACKAGE__->meta->make_immutable;

package MetaCPAN::Document::File::Set;
Expand Down Expand Up @@ -510,7 +552,7 @@ sub find_pod {
($file) = grep {
grep { $_->indexed && $_->authorized && $_->name eq $module }
@{ $_->module || [] }
} @files unless($file);
} @files unless ($file);
return $file ? $file : shift @files;
}

Expand Down
2 changes: 1 addition & 1 deletion lib/MetaCPAN/Document/Module.pm
Expand Up @@ -67,7 +67,7 @@ has version => ( is => 'ro' );
has version_numified =>
( is => 'ro', isa => 'Num', lazy_build => 1, required => 1 );
has indexed => ( is => 'rw', required => 1, isa => 'Bool', default => 0 );
has authorized => ( is => 'ro', required => 1, isa => 'Bool', default => 1 );
has authorized => ( is => 'rw', required => 1, isa => 'Bool', default => 1 );

# REINDEX: make 'ro' once a full reindex has been done
has associated_pod => ( required => 0, is => 'rw' );
Expand Down
2 changes: 1 addition & 1 deletion lib/MetaCPAN/Document/Release.pm
Expand Up @@ -120,7 +120,7 @@ has status => ( is => 'rw', required => 1, default => 'cpan' );
has maturity => ( is => 'ro', required => 1, default => 'released' );
has stat => ( is => 'ro', isa => Stat, dynamic => 1 );
has tests => ( is => 'ro', isa => Tests, dynamic => 1 );
has authorized => ( is => 'ro', required => 1, isa => 'Bool', default => 1 );
has authorized => ( is => 'rw', required => 1, isa => 'Bool', default => 1 );
has first => (
is => 'ro',
required => 1,
Expand Down
176 changes: 0 additions & 176 deletions lib/MetaCPAN/Script/Authorized.pm

This file was deleted.

56 changes: 51 additions & 5 deletions lib/MetaCPAN/Script/Release.pm
Expand Up @@ -20,7 +20,7 @@ use File::stat ('stat');
use CPAN::DistnameInfo ();
use File::Spec::Functions ( 'tmpdir', 'catdir' );
use File::Find ();
use File::stat ();
use File::stat ();
use MetaCPAN::Script::Latest;
use DateTime::Format::Epoch::Unix;
use File::Find::Rule;
Expand Down Expand Up @@ -65,6 +65,13 @@ has detect_backpan => (
);
has backpan_index => ( is => 'ro', lazy_build => 1 );

has perms => (
is => 'ro',
isa => 'HashRef',
lazy_build => 1,
traits => ['NoGetopt']
);

sub run {
my $self = shift;
my ( undef, @args ) = @{ $self->extra_argv };
Expand All @@ -76,7 +83,12 @@ sub run {
qr/\.(tgz|tbz|tar[\._-]gz|tar\.bz2|tar\.Z|zip|7z)$/);
$find = $find->mtime( ">" . ( time - $self->age * 3600 ) )
if ( $self->age );
push( @files, map { $_->{file}} sort { $a->{mtime} <=> $b->{mtime} } map { +{ file => $_, mtime => File::stat::stat($_)->mtime } } $find->in($_) );
push(
@files,
map { $_->{file} } sort { $a->{mtime} <=> $b->{mtime} } map {
+{ file => $_, mtime => File::stat::stat($_)->mtime }
} $find->in($_)
);
}
elsif ( -f $_ ) {
push( @files, $_ );
Expand Down Expand Up @@ -166,7 +178,7 @@ sub import_tarball {
# load Archive::Any in the child due to bugs in MMagic and MIME::Types
require Archive::Any;
my $at = Archive::Any->new($tarball);
my $tmpdir = dir( File::Temp::tempdir( CLEANUP => 0 ) );
my $tmpdir = dir( File::Temp::tempdir( CLEANUP => 0 ) );

# TODO: add release to the index with status => 'broken' and move along
log_error {"$tarball is being naughty"}
Expand Down Expand Up @@ -278,7 +290,8 @@ sub import_tarball {
foreach my $file (@files) {
my $obj = $file_set->new_document($file);
$bulk->put($obj);
$file->{$_} = $obj->$_ for (qw(abstract id pod sloc pod_lines documentation));
$file->{$_} = $obj->$_
for (qw(abstract id pod sloc pod_lines documentation));
$file->{module} = [];
}
$bulk->commit;
Expand Down Expand Up @@ -337,7 +350,9 @@ sub import_tarball {
}
log_debug { "Indexing ", scalar @modules, " modules" };
$i = 1;
my $perms = $self->perms;
my $mod_set = $cpan->type('module');
my @release_unauthorized;
foreach my $file (@modules) {
$file = MetaCPAN::Document::File->new( %$file, index => $cpan );
foreach my $mod ( @{ $file->module } ) {
Expand All @@ -363,11 +378,22 @@ sub import_tarball {
!!grep { $file->documentation eq $_->name } @{ $file->module }
) if ( $file->documentation );
log_trace {"reindexing file $file->{path}"};
push(@release_unauthorized, $file->set_authorized($perms)) if(keys %$perms);
$file->clear_module if ( $file->is_pod_file );
$bulk->put($file);

}
$bulk->commit;
if (@release_unauthorized) {
log_info {
"release "
. $release->name
. " contains unauthorized modules: "
. join( ",", map { $_->name } @release_unauthorized );
};
$release->authorized(0);
$release->put;
}


$tmpdir->rmtree;

Expand All @@ -390,6 +416,7 @@ sub load_meta_file {
my $file;
for (qw{*/META.json */META.yml */META.yaml META.json META.yml META.yaml})
{

# scalar context globbing (without exhausting results) produces
# confusing results (which caused existsing */META.json files to
# get skipped). using list context seems more reliable.
Expand Down Expand Up @@ -472,6 +499,25 @@ sub detect_status {
}
}

sub _build_perms {
my $self = shift;
my $file = $self->cpan->file(qw(modules 06perms.txt));
unless(-e $file) {
log_warn {"$file could not be found. All modules are assumed authorized."};
return {};
}
log_info { "parsing ", $file };
my $fh = $file->openr;
my %authors;
while ( my $line = <$fh> ) {
my ( $module, $author, $type ) = split( /,/, $line );
next unless ($type);
$authors{$module} ||= [];
push( @{ $authors{$module} }, $author );
}
return \%authors;
}

1;

__END__
Expand Down

0 comments on commit 1d58f5f

Please sign in to comment.