Skip to content
Fetching contributors…
Cannot retrieve contributors at this time
executable file 115 lines (107 sloc) 4.3 KB
#!/usr/bin/perl -w
#-----------------------------------
# Script Name: build_marc_word.pl
# Script Version: 0.1.0
# Date: 2004/06/05
# Author: Joshua Ferraro [jmf at kados dot org]
# Description: This script builds a new marc_word
# table with a reduced number of tags (only those
# tags that should be searched) allowing for
# faster and more accurate searching when used
# with the SearchMarc routines. Make sure that
# the MARCaddword routine in Biblio.pm will index
# characters >= 1 char; otherwise, searches like
# "O'brian, Patrick" will fail as the search
# routines will seperate that query into "o",
# "brian", and "patrick". (If "o" is not in the
# database the search will fail)
# Usage: build_marc_word.pl
# Revision History:
# 0.1.0 2004/06/11: first working version.
# Thanks to Chris Cormack
# for helping with the $data object
# and Stephen Hedges for providing
# the list of MARC tags.
# FixMe:
# *Should add a few parameters like 'delete from
# marc_word' or make script ask user whether to
# perform that task ...
# *Add a 'status' report as the data is loaded ...
#-----------------------------------
use lib '/usr/local/koha/intranet/modules/';
use strict;
use C4::Context;
use C4::Biblio;
my $dbh=C4::Context->dbh;
#Here is where you name the tags that you wish to index. If you
# are using MARC21 this set of default tags should be fine but you
# may need to add holdings tags specific to your library (e.g., holding
# branch for Nelsonville is 942k but that may not be the case for your
# library).
my @tags=(
#Tag documentation from http://lcweb.loc.gov/marc/bibliographic/ecbdhome.html
"020a", # INTERNATIONAL STANDARD BOOK NUMBER
"022a", # INTERNATIONAL STANDARD SERIAL NUMBER
"100a", # MAIN ENTRY--PERSONAL NAME
"110a", # MAIN ENTRY--CORPORATE NAME
"110b", # Subordinate unit
"110c", # Location of meeting
"111a", # MAIN ENTRY--MEETING NAME
"111c", # Location of meeting
"130a", # MAIN ENTRY--UNIFORM TITLE
"240a", # UNIFORM TITLE
"245a", # TITLE STATEMENT
"245b", # Remainder of title
"245c", # Statement of responsibility, etc.
"245p", # Name of part/section of a work
"246a", # VARYING FORM OF TITLE
"246b", # Remainder of title
"260b", # PUBLICATION, DISTRIBUTION, ETC. (IMPRINT)
"440a", # SERIES STATEMENT/ADDED ENTRY--TITLE
"440p", # Name of part/section of a work
"500a", # GENERAL NOTE
"505t", # FORMATTED CONTENTS NOTE (t is Title)
"511a", # PARTICIPANT OR PERFORMER NOTE
"520a", # SUMMARY, ETC.
"534a", # ORIGINAL VERSION NOTE
"534k", # Key title of original
"534t", # Title statement of original
"586a", # AWARDS NOTE
"600a", # SUBJECT ADDED ENTRY--PERSONAL NAME
"610a", # SUBJECT ADDED ENTRY--CORPORATE NAME
"611a", # SUBJECT ADDED ENTRY--MEETING NAME
"630a", # SUBJECT ADDED ENTRY--UNIFORM TITLE
"650a", # SUBJECT ADDED ENTRY--TOPICAL TERM
"651a", # SUBJECT ADDED ENTRY--GEOGRAPHIC NAME
"700a", # ADDED ENTRY--PERSONAL NAME
"710a", # ADDED ENTRY--CORPORATE NAME
"711a", # ADDED ENTRY--MEETING NAME
"720a", # ADDED ENTRY--UNCONTROLLED NAME
"730a", # ADDED ENTRY--UNIFORM TITLE
"740a", # ADDED ENTRY--UNCONTROLLED RELATED/ANALYTICAL TITLE
"752a", # ADDED ENTRY--HIERARCHICAL PLACE NAME
"800a", # SERIES ADDED ENTRY--PERSONAL NAME
"810a", # SERIES ADDED ENTRY--CORPORATE NAME
"811a", # SERIES ADDED ENTRY--MEETING NAME
"830a", # SERIES ADDED ENTRY--UNIFORM TITLE
"942k" # Holdings Branch ?? Unique to NPL??
);
#note that subfieldcode in marc_subfield_table is subfieldid in marc_word ... even
#though there is another subfieldid in marc_subfield_table--very confusing naming conventions!
#For each tag we run a search to find the necessary data for building the marc_word table
foreach my $this_tagid(@tags) {
my $query="SELECT bibid,tag,tagorder,subfieldcode,subfieldorder,subfieldvalue FROM marc_subfield_table WHERE tag=? AND subfieldcode=?";
my $sth=$dbh->prepare($query);
my ($tag, $subfieldid);
#split the tag into tag, subfield
if ($this_tagid =~ s/(\D+)//) {
$subfieldid = $1;
$tag = $this_tagid;
}
#Then we pass this information on to MARCaddword in Biblio.pm to actually perform the import into marc_word
$sth->execute($tag, $subfieldid);
while (my $data=$sth->fetchrow_hashref()){
MARCaddword($dbh,$data->{'bibid'},$data->{'tag'},$data->{'tagorder'},$data->{'subfieldcode'},$data->{'subfieldorder'},$data->{'subfieldvalue'});
}
}
$dbh->disconnect();
Jump to Line
Something went wrong with that request. Please try again.