Skip to content

Commit

Permalink
fix CPAN RT bug 71711
Browse files Browse the repository at this point in the history
  • Loading branch information
tla committed Oct 16, 2011
1 parent a950fda commit 75732dd
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 8 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
*~
*.bbprojectd
catalyst/lib/Text
26 changes: 18 additions & 8 deletions cpan/Text-TEI-Collate/lib/Text/TEI/Collate/Manuscript.pm
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package Text::TEI::Collate::Manuscript;

use vars qw( $VERSION %assigned_sigla );
use vars qw( $VERSION %assigned_sigla %tags );
use Moose;
use Moose::Util::TypeConstraints;
use Text::TEI::Collate::Word;
Expand Down Expand Up @@ -116,17 +116,26 @@ sub _init_from_xmldesc {
warn "Manuscript initialization needs a TEI document!";
return;
}
# Get the identifier

# Set up the tags we need, with or without namespaces.
map { $tags{$_} = "//$_" } qw/ msDesc settlement repository idno p lg /;
# Set up our XPath object
my $xpc = XML::LibXML::XPathContext->new( $xmlobj );
$xpc->registerNs( 'tei', $xmlobj->namespaceURI );
# Use namespace-aware tags if we have to
if( $xmlobj->namespaceURI ) {
$xpc->registerNs( 'tei', $xmlobj->namespaceURI );
map { $tags{$_} = "//tei:$_" } keys %tags;
}
$self->_set_xpc( $xpc );
if( my $desc = $xpc->find( '//tei:msDesc' ) ) {

# Get the identifier
if( my $desc = $xpc->find( $tags{msDesc} ) ) {
my $descnode = $desc->get_node(1);
$self->_save_msdesc( $descnode );
my( $setNode, $reposNode, $idNode ) =
( $xpc->find( '//tei:settlement' )->get_node(1),
$xpc->find( '//tei:repository' )->get_node(1),
$xpc->find( '//tei:idno' )->get_node(1) );
( $xpc->find( $tags{settlement} )->get_node(1),
$xpc->find( $tags{repository} )->get_node(1),
$xpc->find( $tags{idno} )->get_node(1) );
$self->settlement( $setNode ? $setNode->textContent : '' );
$self->repository( $reposNode ? $reposNode->textContent : '' );
$self->idno( $idNode ? $idNode->textContent : '' );
Expand Down Expand Up @@ -187,7 +196,8 @@ sub _read_paragraphs_or_lines {

my @words;
my $xpc = $self->_xpc;
my @pgraphs = $xpc->findnodes( './/tei:p | .//tei:lg', $element );
my $xpexpr = '.' . $tags{p} . '|.' . $tags{lg};
my @pgraphs = $xpc->findnodes( $xpexpr, $element );
return () unless @pgraphs;
foreach my $pg( @pgraphs ) {
# If this paragraph is the descendant of a note element,
Expand Down
14 changes: 14 additions & 0 deletions cpan/Text-TEI-Collate/t/02ms_open.t
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,17 @@ foreach my $idx ( 0 .. $#wordcount ) {
is( join( ' ', map { $_->{t} } @{$json[0]->{witnesses}->[$idx]->{tokens}} ), $t, "Got correct words in plaintext JSON tokens" );
}

# Test opening a TEI file with no namespace
my $xmlparser = XML::LibXML->new();
my $doc;
eval { $doc = $xmlparser->parse_file( "$dirname/data/tei_no_ns.xml" )->documentElement(); };
ok( defined $doc, "parsed the XML file tei_no_ns.xml" );
my $ms_obj = Text::TEI::Collate::Manuscript->new(
'sourcetype' => 'xmldesc',
'source' => $doc,
);
my $words = scalar @{$ms_obj->words};
my @placeholders = grep { $_->placeholders } @{$ms_obj->words};
is( $words, 30, "Got correct number of total words" );
is( scalar @placeholders, 2, "Got correct number of placeholder words" );

34 changes: 34 additions & 0 deletions cpan/Text-TEI-Collate/t/data/tei_no_ns.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/css" href="format.css" ?>
<!DOCTYPE TEI PUBLIC "-//TEI//DTD TEI P5//EN" "tei.dtd" >
<TEI>
<teiHeader>
<fileDesc>
<titleStmt>
<title>JK test_A.xml</title>
<author>JoelK</author>
</titleStmt>
<publicationStmt>
<p> </p>
</publicationStmt>
<notesStmt>
<note>Created by the <ref target="http://www.oeaw.ac.at/kvk/cte/">Classical Text
Editor</ref></note>
</notesStmt>
</fileDesc>
<encodingDesc>
<variantEncoding method="double-end-point" location="internal"/>
<tagsDecl>
<rendition xml:id="rd-Text" scheme="css"
>margin-left:0mm;margin-right:0mm;text-indent:8.5mm;line-height:14pt;margin-top:0pt;margin-bottom:0pt;text-align:justify;font-family:Times
New
Roman;font-size:11.5pt;color:#000000;font-style:normal;font-weight:normal;text-decoration:none;text-underline-position:0pt;text-decoration:none;text-transform:none;font-variant:normal;display:block;vertical-align:baseline;letter-spacing:0pt;direction:ltr;position:relative;top:0pt;</rendition>
<rendition xml:id="rd-N1" scheme="css"
>text-indent:0mm;line-height:12pt;font-size:9.5pt;</rendition>
<rendition xml:id="rd-A1" scheme="css"
>text-indent:0mm;line-height:12pt;font-size:9.5pt;</rendition>
</tagsDecl>
</encodingDesc>
</teiHeader>
<text xml:space="preserve" rendition="#rd-Text"><body><p rendition="#rd-Text">First paragraph</p><p rendition="#rd-Text">This is a test to try to figure out how the program can be constructed so as to generate an XML file (TEI-compliant) that respects the manuscript tradition.</p></body></text>
</TEI>

0 comments on commit 75732dd

Please sign in to comment.