Permalink
Browse files

experiment: use PazPar2 to group related works

The approach is to use PazPar2 to search just one
target, the biblio Zebra database.  The results
of each set are merged by PazPar2 to generate a
hitlist that combines related bibs together; as an
example, if a library has the first Harry Potter
book in three languages and an audiobook format,
the hitlist should ideally return one result
for the work that includes links to the individual
bibs.

The new module C4::Search::PazPar2 implements a
simple client for PazPar2's XML-over-HTTP API.  It is
designed to be generic, and thus may end up getting
moved out of Koha to become a stand-alone CPAN module.

Signed-off-by: Chris Cormack <crc@liblime.com>
Signed-off-by: Joshua Ferraro <jmf@liblime.com>
  • Loading branch information...
Galen Charlton Joshua Ferraro
Galen Charlton authored and Joshua Ferraro committed Feb 7, 2008
1 parent 374ee42 commit d92eb0373e5cb96a20dd7efa170102738e22b986
View
@@ -611,6 +611,70 @@ sub getRecords {
return ( undef, $results_hashref, \@facets_loop );
}
+use C4::Search::PazPar2;
+use XML::Simple;
+use Data::Dumper;
+sub pazGetRecords {
+ my (
+ $koha_query, $simple_query, $sort_by_ref, $servers_ref,
+ $results_per_page, $offset, $expanded_facet, $branches,
+ $query_type, $scan
+ ) = @_;
+
+ my $paz = C4::Search::PazPar2->new('http://localhost:10006/search.pz2');
+ $paz->init();
+ $paz->search($simple_query);
+ sleep 1;
+
+ # do results
+ my $results_hashref = {};
+ my $stats = XMLin($paz->stat);
+ $results_hashref->{'biblioserver'}->{'hits'} = $stats->{'hits'};
+ my $results = XMLin($paz->show($offset, $results_per_page), forcearray => 1);
+ #die Dumper($results);
+ HIT: foreach my $hit (@{ $results->{'hit'} }) {
+ warn "hit";
+ my $recid = $hit->{recid}->[0];
+ #if ($recid =~ /[\200-\377]/) {
+ if ($recid =~ /sodot/) {
+ #die "bad $recid\n";
+ #probably do not want non-ASCII in record ID
+ last HIT;
+ }
+ my $count = 1;
+ if (exists $hit->{count}) {
+ $count = $hit->{count}->[0];
+ }
+ #die $count;
+ for (my $i = 0; $i < $count; $i++) {
+ warn "look for $recid offset = $i";
+ my $rec = $paz->record($recid, $i);
+ warn "got record $i";
+ push @{ $results_hashref->{'biblioserver'}->{'RECORDS'} }, $paz->record($recid, $i);
+ }
+ }
+ warn "past hits";
+
+ # pass through facets
+ my $termlist_xml = $paz->termlist('author,subject');
+ my $terms = XMLin($termlist_xml, forcearray => 1);
+ my @facets_loop = ();
+ foreach my $list (sort keys %{ $terms->{'list'} }) {
+ my @facets = ();
+ foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
+ push @facets, {
+ facet_label_value => $facet->{'name'}->[0],
+ };
+ }
+ push @facets_loop, ( {
+ type_label => $list,
+ facets => \@facets,
+ } );
+ }
+
+ return ( undef, $results_hashref, \@facets_loop );
+}
+
# STOPWORDS
sub _remove_stopwords {
my ( $operand, $index ) = @_;
View
@@ -0,0 +1,168 @@
+package C4::Search::PazPar2;
+
+# Copyright (C) 2007 LibLime
+#
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA 02111-1307 USA
+
+use strict;
+
+use LWP::UserAgent;
+use URI;
+use URI::QueryParam;
+use XML::Simple;
+
+=head1 NAME
+
+C4::Search::PazPar2 - implement client for PazPar2
+
+[Note: may rename to Net::PazPar2 or somesuch if decide to put on CPAN separate
+ from Koha]
+
+=head1 SYNOPSIS
+
+=head1 DESCRIPTION
+
+=cut
+
+sub new {
+ my $class = shift;
+ my $endpoint = shift;
+
+ my $self = {};
+ $self->{'endpoint'} = $endpoint;
+ $self->{'session'} = '';
+ $self->{'ua'} = LWP::UserAgent->new;
+ bless $self, $class;
+
+ return $self;
+}
+
+sub init {
+ my $self = shift;
+
+ my $uri = URI->new($self->{'endpoint'});
+ $uri->query_param(command => 'init');
+ my $response = $self->{'ua'}->get($uri);
+ if ($response->is_success) {
+ my $message = XMLin($response->content);
+ if ($message->{'status'} eq 'OK') {
+ $self->{'session'} = $message->{'session'};
+ }
+ } else {
+ warn $response->status_line;
+ }
+}
+
+sub search {
+ my $self = shift;
+ my $query = shift;
+
+ my $uri = URI->new($self->{'endpoint'});
+ $uri->query_param(command => 'search');
+ $uri->query_param(session => $self->{'session'});
+ $uri->query_param(query => $query);
+ my $response = $self->{'ua'}->get($uri);
+ if ($response->is_success) {
+ #print $response->content, "\n";
+ } else {
+ warn $response->status_line;
+ }
+
+}
+
+sub stat {
+ my $self = shift;
+
+ my $uri = URI->new($self->{'endpoint'});
+ $uri->query_param(command => 'stat');
+ $uri->query_param(session => $self->{'session'});
+ my $response = $self->{'ua'}->get($uri);
+ if ($response->is_success) {
+ return $response->content;
+ } else {
+ warn $response->status_line;
+ return;
+ }
+}
+
+sub show {
+ my $self = shift;
+ my $start = shift;
+ my $count = shift;
+
+ my $uri = URI->new($self->{'endpoint'});
+ $uri->query_param(command => 'show');
+ $uri->query_param(start => $start);
+ $uri->query_param(num => $count);
+ $uri->query_param(block => 1);
+ $uri->query_param(session => $self->{'session'});
+ my $response = $self->{'ua'}->get($uri);
+ if ($response->is_success) {
+ return $response->content;
+ } else {
+ warn $response->status_line;
+ return;
+ }
+
+}
+
+sub record {
+ my $self = shift;
+ my $id = shift;
+ my $offset = shift;
+
+ my $uri = URI->new($self->{'endpoint'});
+ $uri->query_param(command => 'record');
+ $uri->query_param(id => $id);
+ $uri->query_param(offset => $offset);
+ $uri->query_param(binary => 1);
+ $uri->query_param(session => $self->{'session'});
+ my $response = $self->{'ua'}->get($uri);
+ if ($response->is_success) {
+ return $response->content;
+ } else {
+ warn $response->status_line;
+ return;
+ }
+}
+
+sub termlist {
+ my $self = shift;
+ my $name = shift;
+
+ my $uri = URI->new($self->{'endpoint'});
+ $uri->query_param(command => 'termlist');
+ $uri->query_param(name => $name);
+ $uri->query_param(session => $self->{'session'});
+ my $response = $self->{'ua'}->get($uri);
+ if ($response->is_success) {
+ return $response->content;
+ } else {
+ warn $response->status_line;
+ return;
+ }
+
+}
+
+1;
+
+=head1 AUTHOR
+
+Koha Development Team <info@koha.org>
+
+Galen Charlton <galen.charlton@liblime.com>
+
+=cut
@@ -0,0 +1,30 @@
+<settings target="localhost:10005/biblios">
+
+ <set name="pz:name" value="Koha bib database"/>
+
+ <!-- This file introduces default settings for pazpar2 -->
+ <!-- $Id: loc.xml,v 1.2 2007-07-10 13:43:07 adam Exp $ -->
+
+ <!-- mapping for unqualified search -->
+ <set name="pz:cclmap:term" value="u=1016 t=l,r s=al"/>
+
+ <!-- field-specific mappings -->
+
+ <set name="pz:cclmap:au" value="u=1004 s=al"/>
+ <set name="pz:cclmap:ti" value="u=4 s=al"/>
+ <set name="pz:cclmap:su" value="u=21 s=al"/>
+ <set name="pz:cclmap:isbn" value="u=7"/>
+ <set name="pz:cclmap:issn" value="u=8"/>
+ <set name="pz:cclmap:date" value="u=30 r=r"/>
+
+ <!-- Retrieval settings -->
+
+ <set name="pz:requestsyntax" value="marc21"/>
+ <set name="pz:elements" value="F"/>
+
+ <!-- Result normalization settings -->
+
+ <set name="pz:nativesyntax" value="iso2709"/>
+ <set name="pz:xslt" value="marc21.xsl"/>
+
+</settings>
Oops, something went wrong.

0 comments on commit d92eb03

Please sign in to comment.