Skip to content

Commit

Permalink
Adds files for data import
Browse files Browse the repository at this point in the history
  • Loading branch information
oalders committed May 18, 2011
1 parent d13d881 commit 249773d
Show file tree
Hide file tree
Showing 6 changed files with 227 additions and 0 deletions.
18 changes: 18 additions & 0 deletions perl/bin/update_db.pl
@@ -0,0 +1,18 @@
#!/usr/bin/env perl

use Data::Dump qw( dump );
use ElasticSearch;
use Modern::Perl;
use Scalar::Util qw( reftype );
use iCPAN;

my $icpan = iCPAN->new;
$icpan->db_file( 'iCPAN.sqlite' );
my $schema = $icpan->schema;

#$icpan->insert_authors;

$icpan->insert_modules;


#say dump $schema;
1 change: 1 addition & 0 deletions perl/iCPAN.sqlite
132 changes: 132 additions & 0 deletions perl/lib/iCPAN.pm
@@ -0,0 +1,132 @@
package iCPAN;

use Data::Dump qw( dump );
use ElasticSearch;
use Modern::Perl;
use Moose;

with 'iCPAN::Role::DB';
with 'iCPAN::Role::Common';

use iCPAN::Schema;

has 'es' => ( is => 'rw', isa => 'ElasticSearch', lazy_build => 1 );

sub _build_es {

return ElasticSearch->new(
servers => 'localhost:9201',
transport => 'httplite',
max_requests => 0, # default 10_000
trace_calls => 'log_file',
no_refresh => 1,
);

}

sub scroll {

my $self = shift;
my $result = shift;
my @hits = ();

while ( 1 ) {

my $hits = $result->{hits}{hits};

last unless @$hits; # if no hits, we're finished

say "found " . scalar @{$hits} . " hits";

foreach my $hit ( @{$hits} ) {
push @hits, $hit->{'_source'};
}

$result = $self->es->scroll(
scroll_id => $result->{_scroll_id},
scroll => '5m'
);

#last if scalar @hits > 100;

}

return \@hits;
}

sub insert_authors {

my $self = shift;
my $author_rs = $self->schema->resultset( 'Zauthor' );
$author_rs->delete;

my $result = $self->es->search(
index => 'cpan',
type => 'author',
query => {
#term => { pauseid => 'OALDERS' },
match_all => {},
},
scroll => '5m',
size => 500,
);

my $hits = $self->scroll( $result );
my @authors = ();

say "found " . scalar @{$hits} . " hits";

foreach my $src ( @{$hits} ) {
say dump $src;
push @authors,
{
zpauseid => $src->{pauseid},
zname => (!reftype $src->{name}) ? $src->{name} : undef,
zemail => shift @{ $src->{email} },
};
}

return $author_rs->populate( \@authors );

}


sub insert_modules {

my $self = shift;
my $rs = $self->schema->resultset( 'Zmodule' );
$rs->delete;

my $result = $self->es->search(
index => 'cpan',
type => ['release'],
query => {
term => { status => 'latest' },
#match_all => {},
},
scroll => '5m',
size => 10,
explain => 0,
);

my $hits = $self->scroll( $result );
my @rows = ();

say "found " . scalar @{$hits} . " hits";

foreach my $src ( @{$hits} ) {
say dump $src;
return;
push @rows,
{
zpauseid => $src->{pauseid},
zname => (!reftype $src->{name}) ? $src->{name} : undef,
zemail => shift @{ $src->{email} },
};
}

return $rs->populate( \@rows );

}

1;
17 changes: 17 additions & 0 deletions perl/lib/iCPAN/Role/Common.pm
@@ -0,0 +1,17 @@
package iCPAN::Role::Common;

use Moose::Role;

has 'debug' => (
is => 'rw',
lazy_build => 1,
);

sub _build_debug {

my $self = shift;
return $ENV{'DEBUG'} || 0;

}

1;
47 changes: 47 additions & 0 deletions perl/lib/iCPAN/Role/DB.pm
@@ -0,0 +1,47 @@
package iCPAN::Role::DB;

use Modern::Perl;
use Moose::Role;
use DBI;
use Find::Lib;

has 'db_file' => (
is => 'rw',
isa => 'Str',
lazy_build => 1,
);

has 'dsn' => (
is => 'rw',
isa => 'Str',
lazy_build => 1,
);

has 'schema' => (
is => 'ro',
lazy_build => 1,
);

has 'schema_class' => (
is => 'rw',
default => 'iCPAN::Schema',
);

sub _build_dsn {

my $self = shift;
return "dbi:SQLite:dbname=" . $self->db_file;

}

sub _build_schema {

my $self = shift;
my $schema = $self->schema_class->connect( $self->dsn, '', '', '',
{ sqlite_use_immediate_transaction => 1, AutoCommit => 1 } );

#$schema->storage->dbh->sqlite_busy_timeout(0);
return $schema;
}

1;
12 changes: 12 additions & 0 deletions perl/lib/iCPAN/Schema.pm
@@ -0,0 +1,12 @@
package iCPAN::Schema;
use base qw/DBIx::Class::Schema::Loader/;

__PACKAGE__->loader_options(
# constraint => '^foo.*',
debug => 0,
);

__PACKAGE__->naming('current');
__PACKAGE__->use_namespaces(1);

1;

0 comments on commit 249773d

Please sign in to comment.