Permalink
Browse files

Improve handling of overly long leaders as sometimes happens with MAR…

…CXML

Rather than generating MARC records with correspondingly overly long
leaders, which confuses many other MARC parsing tools greatly, truncate
the leader at 24 chars and move on. It's mangled, there's not much more
we can do about the leader, but at least there's a chance the directory
can be salvaged.


git-svn-id: https://svn.php.net/repository/pear/packages/File_MARC/trunk@308146 c90b9560-bf6c-de11-be94-00142212c4b1
  • Loading branch information...
1 parent 007c247 commit ceeee5ad57eee544d2253cb57bd0b6357447be3c Dan Scott committed Feb 8, 2011
Showing with 98 additions and 11 deletions.
  1. +13 −7 File/MARC/Record.php
  2. +5 −4 package.xml
  3. +64 −0 tests/bad_leader.xml
  4. +16 −0 tests/marc_xml_009.phpt
View
@@ -104,7 +104,7 @@ class File_MARC_Record
function __construct($marc = null)
{
$this->fields = new File_MARC_List();
- $this->leader = str_repeat(' ', 24);
+ $this->setLeader(str_repeat(' ', 24));
if (!$marc) {
$marc = new File_MARC(null, File_MARC::SOURCE_STRING); // oh the hack
}
@@ -320,10 +320,16 @@ function setLeaderLengths($record_length, $base_address)
}
// Set record length
- $this->leader = substr_replace($this->leader, sprintf("%05d", $record_length), 0, 5);
- $this->leader = substr_replace($this->leader, sprintf("%05d", $base_address), 12, 5);
- $this->leader = substr_replace($this->leader, '22', 10, 2);
- $this->leader = substr_replace($this->leader, '4500', 20, 4);
+ $this->setLeader(substr_replace($this->getLeader(), sprintf("%05d", $record_length), 0, 5));
+ $this->setLeader(substr_replace($this->getLeader(), sprintf("%05d", $base_address), File_MARC::DIRECTORY_ENTRY_LEN, 5));
+ $this->setLeader(substr_replace($this->getLeader(), '22', 10, 2));
+ $this->setLeader(substr_replace($this->getLeader(), '4500', 20, 4));
+
+ if (strlen($this->getLeader()) > File_MARC::LEADER_LEN) {
+ // Avoid incoming leaders that are mangled to be overly long
+ $this->setLeader(substr($this->getLeader(), 0, File_MARC::LEADER_LEN));
+ $this->addWarning("Input leader was too long; truncated to " . File_MARC::LEADER_LEN . " characters");
+ }
return true;
}
// }}}
@@ -473,7 +479,7 @@ function toRaw()
/**
* Glue together all parts
*/
- return $this->leader.implode("", $directory).File_MARC::END_OF_FIELD.implode("", $fields).File_MARC::END_OF_RECORD;
+ return $this->getLeader().implode("", $directory).File_MARC::END_OF_FIELD.implode("", $fields).File_MARC::END_OF_RECORD;
}
// }}}
@@ -492,7 +498,7 @@ function toRaw()
function __toString()
{
// Begin output
- $formatted = "LDR " . $this->leader . "\n";
+ $formatted = "LDR " . $this->getLeader() . "\n";
foreach ($this->fields as $field) {
if (!$field->isEmpty()) {
$formatted .= $field->__toString() . "\n";
View
@@ -19,9 +19,9 @@ This package is based on the PHP MARC package, originally called "php-marc", tha
<email>dbs@php.net</email>
<active>yes</active>
</lead>
- <date>2010-08-15</date>
+ <date>2011-02-08</date>
<version>
- <release>0.6.1</release>
+ <release>0.6.2</release>
<api>0.6.0</api>
</version>
<stability>
@@ -30,8 +30,8 @@ This package is based on the PHP MARC package, originally called "php-marc", tha
</stability>
<license uri="http://www.gnu.org/copyleft/lesser.html">GNU Lesser General Public License</license>
<notes>
-0.6.1-beta
- * Correct layout per bug #17704
+0.6.2-beta
+ * Improve handling of bad leader data, including declared length and overly long leaders in MARCXML
</notes>
<contents>
<dir name="/" baseinstalldir="File">
@@ -95,6 +95,7 @@ This package is based on the PHP MARC package, originally called "php-marc", tha
<file name="marc_xml_006.phpt" role="test" />
<file name="marc_xml_007.phpt" role="test" />
<file name="marc_xml_008.phpt" role="test" />
+ <file name="marc_xml_009.phpt" role="test" />
<file name="marc_xml_16642.phpt" role="test" />
<file name="marc_xml_rsinger.phpt" role="test" />
<file name="skipif.inc" role="test" />
View
@@ -0,0 +1,64 @@
+<record xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.loc.gov/MARC21/slim" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd">
+ <leader>00711cam a2200229 45004500</leader>
+ <controlfield tag="001">LIBN539044247</controlfield>
+ <controlfield tag="003">OCoLC</controlfield>
+ <controlfield tag="005">20081030150430.0</controlfield>
+ <controlfield tag="008">070630||||| ||| 000 0 eng d</controlfield>
+ <datafield tag="020" ind1=" " ind2=" ">
+ <subfield code="a">9781856075442</subfield>
+ </datafield>
+ <datafield tag="020" ind1=" " ind2=" ">
+ <subfield code="a">1856075443</subfield>
+ </datafield>
+ <datafield tag="035" ind1=" " ind2=" ">
+ <subfield code="a">(OCoLC)156822300</subfield>
+ </datafield>
+ <datafield tag="040" ind1=" " ind2=" ">
+ <subfield code="a">BTCTA</subfield>
+ <subfield code="c">BTCTA</subfield>
+ <subfield code="d">YDXCP</subfield>
+ <subfield code="d">BAKER</subfield>
+ <subfield code="d">EMT</subfield>
+ </datafield>
+ <datafield tag="050" ind1=" " ind2="4">
+ <subfield code="a">BL2747.2</subfield>
+ <subfield code="b">.W45 2006</subfield>
+ </datafield>
+ <datafield tag="100" ind1="1" ind2=" ">
+ <subfield code="a">White, Stephen Ross.</subfield>
+ </datafield>
+ <datafield tag="245" ind1="1" ind2="0">
+ <subfield code="a">Space for unknowing :</subfield>
+ <subfield code="b">the place of agnosis in faith /</subfield>
+ <subfield code="c">Stephen R. White.</subfield>
+ </datafield>
+ <datafield tag="260" ind1=" " ind2=" ">
+ <subfield code="a">Dublin :</subfield>
+ <subfield code="b">Columba Press,</subfield>
+ <subfield code="c">c2006.</subfield>
+ </datafield>
+ <datafield tag="300" ind1=" " ind2=" ">
+ <subfield code="a">160 p. ;</subfield>
+ <subfield code="c">22 cm.</subfield>
+ </datafield>
+ <datafield tag="504" ind1=" " ind2=" ">
+ <subfield code="a">Includes bibliographical references.</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2="0">
+ <subfield code="a">Agnosticism.</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2="0">
+ <subfield code="a">Belief and doubt.</subfield>
+ </datafield>
+ <datafield tag="852" ind1=" " ind2=" ">
+ <subfield code="a">1</subfield>
+ <subfield code="h">230 WHI</subfield>
+ <subfield code="p">11111027105040</subfield>
+ <subfield code="t">6511254</subfield>
+ <subfield code="9">p26.95</subfield>
+ </datafield>
+ <datafield tag="901" ind1=" " ind2=" ">
+ <subfield code="a">LIBN539044247</subfield>
+ <subfield code="b">System</subfield>
+ </datafield>
+</record>
View
@@ -0,0 +1,16 @@
+--TEST--
+marc_xml_009: convert a MARCXML record with an overly long leader to MARC
+--SKIPIF--
+<?php include('skipif.inc'); ?>
+--FILE--
+<?php
+$dir = dirname(__FILE__);
+require 'File/MARCXML.php';
+$marc_file = new File_MARCXML($dir . '/' . 'bad_leader.xml');
+
+while ($marc_record = $marc_file->next()) {
+ print $marc_record->toRaw();
+}
+?>
+--EXPECT--
+00749cam a2200241 454500001001400000003000600014005001700020008004100037020001800078020001500096035002100111040003600132050002400168100002500192245007800217260003700295300002100332504004100353650001700394650002200411852004800433901002600481LIBN539044247OCoLC20081030150430.0070630||||| ||| 000 0 eng d a9781856075442 a1856075443 a(OCoLC)156822300 aBTCTAcBTCTAdYDXCPdBAKERdEMT 4aBL2747.2b.W45 20061 aWhite, Stephen Ross.10aSpace for unknowing :bthe place of agnosis in faith /cStephen R. White. aDublin :bColumba Press,cc2006. a160 p. ;c22 cm. aIncludes bibliographical references. 0aAgnosticism. 0aBelief and doubt. a1h230 WHIp11111027105040t65112549p26.95 aLIBN539044247bSystem

0 comments on commit ceeee5a

Please sign in to comment.