/
stage_biblios_file.pl
executable file
·160 lines (138 loc) · 5.23 KB
/
stage_biblios_file.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env perl
use strict;
BEGIN {
# find Koha's Perl modules
# test carefully before changing this
use FindBin;
eval { require "$FindBin::Bin/kohalib.pl" };
}
use Koha;
use C4::Context;
use C4::ImportBatch;
use C4::Matcher;
use Getopt::Long;
$| = 1;
# command-line parameters
my $match_bibs = 0;
my $add_items = 0;
my $input_file = "";
my $batch_comment = "";
my $want_help = 0;
my $no_replace ;
my $result = GetOptions(
'file:s' => \$input_file,
'match-bibs:s' => \$match_bibs,
'add-items' => \$add_items,
'no-replace' => \$no_replace,
'comment:s' => \$batch_comment,
'h|help' => \$want_help
);
if (not $result or $input_file eq "" or $want_help) {
print_usage();
exit 0;
}
unless (-r $input_file) {
die "$0: cannot open input file $input_file: $!\n";
}
my $dbh = C4::Context->dbh;
$dbh->{AutoCommit} = 0;
process_batch($input_file, $match_bibs, $add_items, $batch_comment);
$dbh->commit();
exit 0;
sub process_batch {
my ($input_file, $match_bibs, $add_items, $batch_comment) = @_;
open IN, "<$input_file" or die "$0: cannot open input file $input_file: $!\n";
my $marc_records = "";
$/ = "\035";
my $num_input_records = 0;
while (<IN>) {
s/^\s+//;
s/\s+$//;
next unless $_; # skip if record has only whitespace, as might occur
# if file includes newlines between each MARC record
$marc_records .= $_; # FIXME - this sort of string concatenation
# is probably rather inefficient
$num_input_records++;
}
close IN;
my $marc_flavor = C4::Context->preference('marcflavour');
print "... staging MARC records -- please wait\n";
my ($batch_id, $num_valid, $num_items, @import_errors) =
BatchStageMarcRecords($marc_flavor, $marc_records, $input_file, $batch_comment, '', $add_items, 0,
[], [], 100, \&print_progress_and_commit);
print "... finished staging MARC records\n";
my $num_with_matches = 0;
if ($match_bibs) {
my $matcher = C4::Matcher->fetch($match_bibs) ;
if (! defined $matcher) {
$matcher = C4::Matcher->new('biblio');
$matcher->add_simple_matchpoint('isbn', 1000, '020', 'a', -1, 0, '');
$matcher->add_simple_required_check('245', 'a', -1, 0, '',
'245', 'a', -1, 0, '');
} else {
SetImportBatchMatcher($batch_id, $match_bibs);
}
# set default record overlay behavior
SetImportBatchOverlayAction($batch_id, ($no_replace) ? 'ignore' : 'replace');
SetImportBatchNoMatchAction($batch_id, 'create_new');
SetImportBatchItemAction($batch_id, 'always_add');
print "... looking for matches with records already in database\n";
$num_with_matches = BatchFindBibDuplicates($batch_id, $matcher, 10, 100, \&print_progress_and_commit);
print "... finished looking for matches\n";
}
my $num_invalid_bibs = scalar(@import_errors);
print <<_SUMMARY_;
MARC record staging report
------------------------------------
Input file: $input_file
Number of input bibs: $num_input_records
Number of valid bibs: $num_valid
Number of invalid bibs: $num_invalid_bibs
_SUMMARY_
if ($match_bibs) {
print "Number of bibs matched: $num_with_matches\n";
} else {
print "Incoming bibs not matched against existing bibs (--match-bibs option not supplied)\n";
}
if ($add_items) {
print "Number of items parsed: $num_items\n";
} else {
print "No items parsed (--add-items option not supplied)\n";
}
print "\n";
print "Batch number assigned: $batch_id\n";
print "\n";
}
sub print_progress_and_commit {
my $recs = shift;
$dbh->commit();
print "... processed $recs records\n";
}
sub print_usage {
print <<_USAGE_;
$0: stage MARC bib file into reservoir.
Use this batch job to load a file of MARC bibliographic records
(with optional item information) into the Koha reservoir.
After running this program to stage your file, you can use
either the batch job commit_biblios_file.pl or the Koha
Tools option "Manage Staged MARC Records" to load the
records into the main Koha database.
Parameters:
--file <file_name> name of input MARC bib file
--match-bibs <match_id> use this option to match bibs
in the file with bibs already in
the database for future overlay.
If <match_id> isn't defined, a default
MARC21 ISBN & title match rule will be applied.
--add-items use this option to specify that
item data is embedded in the MARC
bibs and should be parsed.
--no-replace overlay action for bib record: default is to
replace extant bib with the imported record.
--comment <comment> optional comment to describe
the record batch; if the comment
has spaces in it, surround the
comment with quotation marks.
--help or -h show this message.
_USAGE_
}