Skip to content

Commit

Permalink
Improve handling of overly long leaders as sometimes happens with MAR…
Browse files Browse the repository at this point in the history
…CXML

Rather than generating MARC records with correspondingly overly long
leaders, which confuses many other MARC parsing tools greatly, truncate
the leader at 24 chars and move on. It's mangled, there's not much more
we can do about the leader, but at least there's a chance the directory
can be salvaged.


git-svn-id: https://svn.php.net/repository/pear/packages/File_MARC/trunk@308146 c90b9560-bf6c-de11-be94-00142212c4b1
  • Loading branch information
Dan Scott committed Feb 8, 2011
1 parent 007c247 commit ceeee5a
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 11 deletions.
20 changes: 13 additions & 7 deletions File/MARC/Record.php
Expand Up @@ -104,7 +104,7 @@ class File_MARC_Record
function __construct($marc = null) function __construct($marc = null)
{ {
$this->fields = new File_MARC_List(); $this->fields = new File_MARC_List();
$this->leader = str_repeat(' ', 24); $this->setLeader(str_repeat(' ', 24));
if (!$marc) { if (!$marc) {
$marc = new File_MARC(null, File_MARC::SOURCE_STRING); // oh the hack $marc = new File_MARC(null, File_MARC::SOURCE_STRING); // oh the hack
} }
Expand Down Expand Up @@ -320,10 +320,16 @@ function setLeaderLengths($record_length, $base_address)
} }


// Set record length // Set record length
$this->leader = substr_replace($this->leader, sprintf("%05d", $record_length), 0, 5); $this->setLeader(substr_replace($this->getLeader(), sprintf("%05d", $record_length), 0, 5));
$this->leader = substr_replace($this->leader, sprintf("%05d", $base_address), 12, 5); $this->setLeader(substr_replace($this->getLeader(), sprintf("%05d", $base_address), File_MARC::DIRECTORY_ENTRY_LEN, 5));
$this->leader = substr_replace($this->leader, '22', 10, 2); $this->setLeader(substr_replace($this->getLeader(), '22', 10, 2));
$this->leader = substr_replace($this->leader, '4500', 20, 4); $this->setLeader(substr_replace($this->getLeader(), '4500', 20, 4));

if (strlen($this->getLeader()) > File_MARC::LEADER_LEN) {
// Avoid incoming leaders that are mangled to be overly long
$this->setLeader(substr($this->getLeader(), 0, File_MARC::LEADER_LEN));
$this->addWarning("Input leader was too long; truncated to " . File_MARC::LEADER_LEN . " characters");
}
return true; return true;
} }
// }}} // }}}
Expand Down Expand Up @@ -473,7 +479,7 @@ function toRaw()
/** /**
* Glue together all parts * Glue together all parts
*/ */
return $this->leader.implode("", $directory).File_MARC::END_OF_FIELD.implode("", $fields).File_MARC::END_OF_RECORD; return $this->getLeader().implode("", $directory).File_MARC::END_OF_FIELD.implode("", $fields).File_MARC::END_OF_RECORD;
} }
// }}} // }}}


Expand All @@ -492,7 +498,7 @@ function toRaw()
function __toString() function __toString()
{ {
// Begin output // Begin output
$formatted = "LDR " . $this->leader . "\n"; $formatted = "LDR " . $this->getLeader() . "\n";
foreach ($this->fields as $field) { foreach ($this->fields as $field) {
if (!$field->isEmpty()) { if (!$field->isEmpty()) {
$formatted .= $field->__toString() . "\n"; $formatted .= $field->__toString() . "\n";
Expand Down
9 changes: 5 additions & 4 deletions package.xml
Expand Up @@ -19,9 +19,9 @@ This package is based on the PHP MARC package, originally called "php-marc", tha
<email>dbs@php.net</email> <email>dbs@php.net</email>
<active>yes</active> <active>yes</active>
</lead> </lead>
<date>2010-08-15</date> <date>2011-02-08</date>
<version> <version>
<release>0.6.1</release> <release>0.6.2</release>
<api>0.6.0</api> <api>0.6.0</api>
</version> </version>
<stability> <stability>
Expand All @@ -30,8 +30,8 @@ This package is based on the PHP MARC package, originally called "php-marc", tha
</stability> </stability>
<license uri="http://www.gnu.org/copyleft/lesser.html">GNU Lesser General Public License</license> <license uri="http://www.gnu.org/copyleft/lesser.html">GNU Lesser General Public License</license>
<notes> <notes>
0.6.1-beta 0.6.2-beta
* Correct layout per bug #17704 * Improve handling of bad leader data, including declared length and overly long leaders in MARCXML
</notes> </notes>
<contents> <contents>
<dir name="/" baseinstalldir="File"> <dir name="/" baseinstalldir="File">
Expand Down Expand Up @@ -95,6 +95,7 @@ This package is based on the PHP MARC package, originally called "php-marc", tha
<file name="marc_xml_006.phpt" role="test" /> <file name="marc_xml_006.phpt" role="test" />
<file name="marc_xml_007.phpt" role="test" /> <file name="marc_xml_007.phpt" role="test" />
<file name="marc_xml_008.phpt" role="test" /> <file name="marc_xml_008.phpt" role="test" />
<file name="marc_xml_009.phpt" role="test" />
<file name="marc_xml_16642.phpt" role="test" /> <file name="marc_xml_16642.phpt" role="test" />
<file name="marc_xml_rsinger.phpt" role="test" /> <file name="marc_xml_rsinger.phpt" role="test" />
<file name="skipif.inc" role="test" /> <file name="skipif.inc" role="test" />
Expand Down
64 changes: 64 additions & 0 deletions tests/bad_leader.xml
@@ -0,0 +1,64 @@
<record xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.loc.gov/MARC21/slim" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd">
<leader>00711cam a2200229 45004500</leader>
<controlfield tag="001">LIBN539044247</controlfield>
<controlfield tag="003">OCoLC</controlfield>
<controlfield tag="005">20081030150430.0</controlfield>
<controlfield tag="008">070630||||| ||| 000 0 eng d</controlfield>
<datafield tag="020" ind1=" " ind2=" ">
<subfield code="a">9781856075442</subfield>
</datafield>
<datafield tag="020" ind1=" " ind2=" ">
<subfield code="a">1856075443</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(OCoLC)156822300</subfield>
</datafield>
<datafield tag="040" ind1=" " ind2=" ">
<subfield code="a">BTCTA</subfield>
<subfield code="c">BTCTA</subfield>
<subfield code="d">YDXCP</subfield>
<subfield code="d">BAKER</subfield>
<subfield code="d">EMT</subfield>
</datafield>
<datafield tag="050" ind1=" " ind2="4">
<subfield code="a">BL2747.2</subfield>
<subfield code="b">.W45 2006</subfield>
</datafield>
<datafield tag="100" ind1="1" ind2=" ">
<subfield code="a">White, Stephen Ross.</subfield>
</datafield>
<datafield tag="245" ind1="1" ind2="0">
<subfield code="a">Space for unknowing :</subfield>
<subfield code="b">the place of agnosis in faith /</subfield>
<subfield code="c">Stephen R. White.</subfield>
</datafield>
<datafield tag="260" ind1=" " ind2=" ">
<subfield code="a">Dublin :</subfield>
<subfield code="b">Columba Press,</subfield>
<subfield code="c">c2006.</subfield>
</datafield>
<datafield tag="300" ind1=" " ind2=" ">
<subfield code="a">160 p. ;</subfield>
<subfield code="c">22 cm.</subfield>
</datafield>
<datafield tag="504" ind1=" " ind2=" ">
<subfield code="a">Includes bibliographical references.</subfield>
</datafield>
<datafield tag="650" ind1=" " ind2="0">
<subfield code="a">Agnosticism.</subfield>
</datafield>
<datafield tag="650" ind1=" " ind2="0">
<subfield code="a">Belief and doubt.</subfield>
</datafield>
<datafield tag="852" ind1=" " ind2=" ">
<subfield code="a">1</subfield>
<subfield code="h">230 WHI</subfield>
<subfield code="p">11111027105040</subfield>
<subfield code="t">6511254</subfield>
<subfield code="9">p26.95</subfield>
</datafield>
<datafield tag="901" ind1=" " ind2=" ">
<subfield code="a">LIBN539044247</subfield>
<subfield code="b">System</subfield>
</datafield>
</record>
16 changes: 16 additions & 0 deletions tests/marc_xml_009.phpt
@@ -0,0 +1,16 @@
--TEST--
marc_xml_009: convert a MARCXML record with an overly long leader to MARC
--SKIPIF--
<?php include('skipif.inc'); ?>
--FILE--
<?php
$dir = dirname(__FILE__);
require 'File/MARCXML.php';
$marc_file = new File_MARCXML($dir . '/' . 'bad_leader.xml');

while ($marc_record = $marc_file->next()) {
print $marc_record->toRaw();
}
?>
--EXPECT--
00749cam a2200241 454500001001400000003000600014005001700020008004100037020001800078020001500096035002100111040003600132050002400168100002500192245007800217260003700295300002100332504004100353650001700394650002200411852004800433901002600481LIBN539044247OCoLC20081030150430.0070630||||| ||| 000 0 eng d a9781856075442 a1856075443 a(OCoLC)156822300 aBTCTAcBTCTAdYDXCPdBAKERdEMT 4aBL2747.2b.W45 20061 aWhite, Stephen Ross.10aSpace for unknowing :bthe place of agnosis in faith /cStephen R. White. aDublin :bColumba Press,cc2006. a160 p. ;c22 cm. aIncludes bibliographical references. 0aAgnosticism. 0aBelief and doubt. a1h230 WHIp11111027105040t65112549p26.95 aLIBN539044247bSystem

0 comments on commit ceeee5a

Please sign in to comment.