Skip to content
Browse files

zap tricky deps

  • Loading branch information...
1 parent 07f5c88 commit b3c575554b608c4078a2255cd0bccd7726ed0ae3 @nothingmuch committed Oct 11, 2008
Showing with 8 additions and 334 deletions.
  1. +0 −2 Makefile.PL
  2. +0 −212 lib/Search/GIN/Driver/BerkeleyDB.pm
  3. +1 −0 lib/Search/GIN/Driver/Hash.pm
  4. +0 −84 t/bdb_driver.t
  5. +7 −36 t/core.t
View
2 Makefile.PL
@@ -16,8 +16,6 @@ WriteMakefile(
'MooseX::Types::Set::Object' => 0,
'namespace::clean' => '0.08',
'Data::Stream::Bulk' => "0.03",
- 'BerkeleyDB::Manager' => 0,
- 'BerkeleyDB' => 0.35, # multiple keys
'Scope::Guard' => 0,
'Set::Object' => 0,
'Test::use::ok' => 0,
View
212 lib/Search/GIN/Driver/BerkeleyDB.pm
@@ -1,212 +0,0 @@
-#!/usr/bin/perl
-
-package Search::GIN::Driver::BerkeleyDB;
-use Moose::Role;
-
-
-use Scalar::Util qw(weaken);
-
-use MooseX::Types::Path::Class;
-
-use BerkeleyDB 0.35; # DBT_MULTIPLE, see http://rt.cpan.org/Ticket/Display.html?id=38896
-use BerkeleyDB::Manager;
-
-use constant USE_PARTIAL => 1; # not sure it's a good thing yet
-
-use namespace::clean -except => [qw(meta)];
-
-with qw(
- Search::GIN::Driver::TXN
- Search::GIN::Driver::Pack::Values
-);
-
-has primary_file => (
- isa => "Path::Class::File",
- is => "ro",
- coerce => 1,
- default => sub { Path::Class::File->new("primary.db") },
-);
-
-has secondary_file => (
- isa => "Path::Class::File",
- is => "ro",
- coerce => 1,
- default => sub { Path::Class::File->new("secondary.db") },
-);
-
-has manager => (
- isa => "BerkeleyDB::Manager",
- is => "ro",
- coerce => 1,
- lazy_build => 1,
- # handles => "Search::GIN::Driver::TXN",
-);
-
-sub _build_manager {
- my $self = shift;
- BerkeleyDB::Manager->new()
-}
-
-sub txn_begin { shift->manager->txn_begin(@_) }
-sub txn_commit { shift->manager->txn_commit(@_) }
-sub txn_rollback { shift->manager->txn_rollback(@_) }
-sub txn_do { shift->manager->txn_do(@_) }
-
-has [qw(primary_db secondary_db)] => (
- isa => "Object",
- is => "ro",
- lazy_build => 1,
-);
-
-has block_size => (
- isa => "Int",
- is => "rw",
- default => 500,
-);
-
-sub _build_primary_db {
- my $self = shift;
-
- my $m = $self->manager;
-
- my $primary = $m->open_db( name => "primary", file => $self->primary_file );
-
- my $secondary = $self->secondary_db;
-
- my $weak_self = $self;
- weaken($weak_self); # don't leak (circular ref)
-
- $m->associate(
- primary => $primary,
- secondary => $secondary,
- callback => sub {
- my ( $id, $vals ) = @_;
- return [ $weak_self->unpack_values($vals) ];
- },
- );
-
- return $primary;
-}
-
-# this is the secondary index, it maps from secondary keys (the inverted values) back to IDS
-# BDB maintains (as in updates/deletes etc) entries from this index based on
-# modifications to primary_db
-sub _build_secondary_db {
- my $self = shift;
-
- $self->manager->open_db( name => "secondary", file => $self->secondary_file, dup => 1, dupsort => 1 );
-}
-
-# Search::GIN::Driver methods
-sub fetch_entry {
- my ( $self, $key ) = @_;
- $self->get_ids($key);
-}
-
-sub remove_ids {
- my ( $self, @ids ) = @_;
-
- my $pri = $self->primary_db;
-
- # BDB will delete all dependent keys from the secondary index
- foreach my $id ( @ids ) {
- $pri->db_del($id);
- }
-}
-
-sub insert_entry {
- my ( $self, $id, @keys ) = @_;
-
- my $pri = $self->primary_db;
-
- # BDB will update the secondary index using the callback we gave it in
- # ->associate
- $pri->db_put($id, $self->pack_values(@keys));
-}
-
-# this method is just for completeness
-sub get_values {
- my ( $self, $id ) = @_;
-
- my $v;
-
- if ( $self->primary_db->db_get( $id, $v ) == 0 ) {
- return $self->unpack_values($v);
- } else {
- return;
- }
-}
-
-# OW MY EYES! BDB is so nastty it's not even funny.
-
-# to avoid reading key data unnecessarily (we'll never actually need it) we set
-# the partial value range to
-sub _key_only_guard ($) {
- my $db = shift;
-
- my ( $pon, $off, $len ) = $db->partial_set(0,0);
-
- return Scope::Guard->new(sub {
- if ( $pon ) {
- $db->partial_set($off, $len);
- } else {
- $db->partial_clear;
- }
- });
-}
-
-# this data set is potentially large (all IDs for a given secondary key)
-# we iterate the duplicates, and if we wind up with more than $block_size then
-# we create an iterator for the remainder
-sub get_ids {
- my ( $self, $key ) = @_;
-
- my $db = $self->secondary_db;
-
- my ( $pk, $v );
-
- $self->manager->dup_cursor_stream(
- db => $db,
- init => USE_PARTIAL && sub { _key_only_guard($db) },
- callback_first => sub {
- my ( $cursor, $ret ) = @_;
-
- if ( $cursor->c_pget( $key, $pk, $v, DB_SET ) == 0 ) {
- push @$ret, $pk;
- return 1;
- } else {
- return;
- }
- },
- callback => sub {
- my ( $cursor, $ret ) = @_;
-
- if ( $cursor->c_pget( $key, $pk, $v, DB_NEXT_DUP ) == 0 ) {
- push @$ret, $pk;
- return 1;
- } else {
- return;
- }
- }
- );
-}
-
-__PACKAGE__
-
-__END__
-
-=pod
-
-=head1 NAME
-
-Search::GIN::Driver::BerkeleyDB -
-
-=head1 SYNOPSIS
-
- use Search::GIN::Driver::BerkeleyDB;
-
-=head1 DESCRIPTION
-
-=cut
-
-
View
1 lib/Search/GIN/Driver/Hash.pm
@@ -42,6 +42,7 @@ sub remove_ids {
my @key_sets = grep { defined } delete @{$objects}{map { ref() ? refaddr($_) : $_ } @ids};
return unless @key_sets;
+ warn "key sets: @key_sets";
my $keys = (shift @key_sets)->union(@key_sets);
foreach my $key ( $keys->members ) {
View
84 t/bdb_driver.t
@@ -1,84 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use warnings;
-
-use Test::More 'no_plan';
-use Test::TempDir;
-
-use Path::Class;
-
-use ok 'Search::GIN::Driver::BerkeleyDB';
-
-{
- package Drv;
- use Moose;
-
- with (
- 'Search::GIN::Driver::BerkeleyDB',
- 'Search::GIN::Driver::Pack::Length' => {
- alias => {
- pack_length => "pack_values",
- unpack_length => "unpack_values",
- },
- },
- );
-}
-
-my $d = Drv->new( manager => { home => temp_root, create => 1 } );
-
-my $id = "a" x 16;
-my @ids = map { $id++ } 1 .. 10;
-
-my @foo = @ids[3,4,6];
-my @bar = @ids[4,8];
-
-$d->insert_entry( $_ => "foo" ) for @ids[3,6];
-$d->insert_entry( $ids[8] => "bar" );
-$d->insert_entry( $ids[4] => qw(foo bar) );
-
-is_deeply( [ sort $d->fetch_entry('foo')->all ], [ sort @foo ], "foo entry" );
-is_deeply( [ sort $d->fetch_entry('bar')->all ], [ sort @bar ], "bar entry" );
-
-$d->insert_entry($ids[1] => qw(foo));
-$d->insert_entry($ids[2] => qw(foo));
-
-is_deeply( [ sort $d->fetch_entry('foo')->all ], [ sort @foo, @ids[1,2] ], "merged" );
-
-my $txn = $d->txn_begin;
-
-$d->insert_entry($ids[5] => qw(quxx));
-
-is_deeply( [ $d->fetch_entry('quxx')->all ], [ $ids[5] ], "mid txn" );
-
-$d->txn_commit($txn);
-
-is_deeply( [ $d->fetch_entry('quxx')->all ], [ $ids[5] ], "txn succeeded" );
-
-eval {
- $d->txn_do(sub {
- $d->insert_entry( $ids[0] => qw(gorch) );
-
- is_deeply( [ $d->fetch_entry("gorch")->all ], [ $ids[0] ], "mid txn" );
-
- die "user error";
- });
-};
-
-like( $@, qr/user error/, "got error" );
-
-{
- is_deeply( [ $d->fetch_entry("gorch")->all ], [ ], "transaction aborted" );
-}
-
-$d->txn_do(sub {
- $d->insert_entry( $ids[5] => qw(zot) );
-});
-
-is_deeply( [ $d->fetch_entry("zot")->all ], [ $ids[5] ], "transaction succeeded" );
-
-$d->remove_ids(@ids[2,4]);
-
-is_deeply( [ sort $d->fetch_entry('foo')->all ], [ sort @ids[1, 3, 6] ], "removed" );
-
-
View
43 t/core.t
@@ -22,42 +22,13 @@ use Set::Object;
# on disk index:
with (
qw(
- Search::GIN::DelegateToIndexed
- Search::GIN::Driver::BerkeleyDB
- ),
- 'Search::GIN::Driver::Pack::Length' => {
- alias => {
- pack_length => "pack_values",
- unpack_length => "unpack_values",
- }
- },
- );
-
- # DelegateToIndexed means we delegate everything to Query and Indexable
- # there's also Callbacks, and presumably custom impls
-
- # PackUUID is because BerkeleyDB is ondisk
- # it's an implementation of pack_ids and unpack_ids that uses unpack/pack
- # on constant width strings
-
- # the only required method left after all these roles were added
- # we fake it here, but it should go to the storage backend
- sub ids_to_objects {
- my ( $self, @ids ) = @_;
- @{ $self->objects }{@ids};
- }
-
- around objects_to_ids => sub {
- my ( $next, $self, @objs ) = @_;
- my @ids = $self->$next(@objs);
- @{ $self->objects }{@ids} = @objs;
- return @ids;
- };
-
- has objects => (
- isa => "HashRef",
- is => "rw",
- default => sub { {} },
+ Search::GIN::Core
+ Search::GIN::Driver::Hash
+ Search::GIN::SelfIDs
+ ),
+ 'Search::GIN::DelegateToIndexed' => {
+ excludes => "objects_to_ids", # SelfIDs
+ },
);
# you create the query objects, the GIN implementation uses them

0 comments on commit b3c5755

Please sign in to comment.
Something went wrong with that request. Please try again.