Skip to content

Commit

Permalink
import XML::AutoWriter 0.33 from CPAN
Browse files Browse the repository at this point in the history
git-cpan-module: XML::AutoWriter
git-cpan-version: 0.33
  • Loading branch information
Barrie Slaymaker authored and Chris Prather committed Jun 11, 2009
1 parent f1be932 commit 75cb42f
Show file tree
Hide file tree
Showing 7 changed files with 274 additions and 74 deletions.
12 changes: 10 additions & 2 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
CHANGES file for XML::AutoWriter
CHANGES file for XML::ValidWriter and XML::AutoWriter

- Fixed a bug that kicked PCDATA escape mode in to CDATA too easily.
v0.33 Fri Dec 15 07:02:36 EST 2000
- XML::ValidWriter and XML::AutoWriter now croak if any control codes
other than TAB, CR, or newline (9, 10, 13) are passed.
- Fixed a bug that kicked out of PCDATA escape mode in to CDATA too easily.

v0.32 Fri Dec 15 07:02:36 EST 2000
- Reduced number of times '>' is escaped in PCDATA. It still gets
turned into '>' gratuitously occasionally, but that's a really
minor nit, IMHO.

v0.3 Wed Aug 9 11:53:59 EDT 2000
- Fixed a bug in escaping of CDATA end tags that are split across
Expand Down
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ lib/XML/ValidWriter.pm
t/auto.t
t/cm_re.t
t/dtd.t
t/escape.t
t/foo.t
t/valid.t
CHANGES
Expand Down
2 changes: 2 additions & 0 deletions MANIFEST.SKIP
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
\.sw[a-z]$
\.tar\.gz$
^Makefile$
^blib/
^pm_to_blib$
2 changes: 1 addition & 1 deletion Makefile.PL
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use ExtUtils::MakeMaker;
use strict ;
use vars qw( $VERSION ) ;

$VERSION = 0.32 ;
$VERSION = 0.33 ;

my @programs = qw( ) ;

Expand Down
127 changes: 88 additions & 39 deletions lib/XML/ValidWriter.pm
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,24 @@ XML::ValidWriter - DOCTYPE driven valid XML output
=head1 SYNOPSIS
## As a normal perl object:
$writer = XML::ValidWriter->new(
DOCTYPE => $xml_doc_type,
OUTPUT => \*FH
) ;
$writer->startTag( 'b1' ) ;
$writer->startTag( 'c2' ) ;
$writer->end ;
## Writing to a scalar:
$writer = XML::ValidWriter->new(
DOCTYPE => $xml_doc_type,
OUTPUT => \$buf
) ;
## Or, in scripting mode:
use XML::Doctype NAME => a, SYSTEM_ID => 'a.dtd' ;
use XML::ValidWriter qw( :all :dtd_tags ) ;
#
# a.dtd contains:
#
# <!ELEMENT a ( b1, b2?, b3* ) >
# <!ATTLIST a aa1 CDATA #REQUIRED >
# <!ELEMENT b1 ( c1 ) >
# <!ELEMENT b2 ( c2 ) >
#
b1 ; # Emits <a><b1>
c2( attr=>"val" ) ; # Emits </b1><b2><c2 attr="val">
endAllTags ; # Emits </c2></b2></a>
Expand All @@ -27,11 +35,14 @@ XML::ValidWriter - DOCTYPE driven valid XML output
use FooML::Doctype::v1_0001 ;
use XML::ValidWriter qw( :dtd_tags ) ;
## Or as a normal perl object:
$writer = XML::ValidWriter->new( ... ) ;
$writer->startTag( 'b1' ) ;
$writer->startTag( 'c2' ) ;
$writer->end ;
#
# This all assumes that the DTD contains:
#
# <!ELEMENT a ( b1, b2?, b3* ) >
# <!ATTLIST a aa1 CDATA #REQUIRED >
# <!ELEMENT b1 ( c1 ) >
# <!ELEMENT b2 ( c2 ) >
#
=head1 STATUS
Expand All @@ -42,25 +53,32 @@ Many methods supplied by XML::Writer are not yet supplied here.
=head1 DESCRIPTION
This module uses the DTD contained in an XML::Doctype to enable compile-
and run-time checks of XML output validity. It also provides methods
named after the start and end tags contained in the DTD, so an element
found in the DTD named TABLE will result in any XML::ValidWriter instances
using that DTD providing the methods
and run-time checks of XML output validity. It also provides methods and
functions named after the elements mentioned in the DTD. If an
XML::ValidWriter uses a DTD that mentions the element type TABLE, that
instance will provide the methods
$writer->TABLE( ...attrs... ) ;
$writer->end_TABLE( ...attrs... ) ;
$writer->TABLE( $content, ...attrs... ) ;
$writer->start_TABLE( ...attrs... ) ;
$writer->end_TABLE() ;
$writer->empty_TABLE( ...attrs... ) ;
. These are created for undeclared elements--those elements not explicitly
declared using <!ELEMENT ..> tags--as well.
declared with an <!ELEMENT ..> declaration--as well. If an element
type name conflicts with a method, it will not override the internal method.
When an XML::Doctype is parsed, the name of the doctype defines the root
node of the document. This name can be changed, though, see L<XML::Doctype>
for details.
In addition to the object-oriented API, a function API is also provided.
This allows you to import most of the methods of XML::ValidWriter as functions
using standard import specifications:
use XML::ValidWriter qw( :all ) ; ## Could list function names instead
You can also import functions named after an XML::Doctype's elements
if you specify an XML::Doctype:
C<:all> does not import the functions named after elements mentioned in
the DTD, you need to import those tags using C<:dtd_tags>:
use XML::Doctype NAME => 'foo', SYSTEM_ID => 'fooml.dtd' ;
use XML::ValidWriter qw( :all :dtd_tags ) ;
Expand All @@ -73,9 +91,6 @@ or
use XML::ValidWriter DOCTYPE => $doctype, qw( :all :dtd_tags ) ;
This only works (at present) if you specify a DOCTYPE or ':dtd_tags'
argument. It will be enhanced later.
=head2 XML::Writer API compatibility
Much of the interface is patterned
Expand Down Expand Up @@ -273,7 +288,7 @@ my @EXPORT_OK = qw(
xmlDecl
) ;

$VERSION = 0.24 ;
$VERSION = 0.25 ;

##
## This module can maintain a set of XML::ValidWriter instances,
Expand Down Expand Up @@ -526,40 +541,70 @@ sub import {


my %escapees ;
$escapees{'&'} = '&amp;' ;
$escapees{'<'} = '&lt;' ;
$escapees{'>'} = '&gt;' ;
$escapees{'"'} = '&quot;';
$escapees{'&'} = '&amp;' ;
$escapees{'<'} = '&lt;' ;
$escapees{'>'} = '&gt;' ;
$escapees{']>'} = ']&gt;' ;
$escapees{']]>'} = ']]&gt;' ;
$escapees{'"'} = '&quot;' ;
$escapees{"'"} = '&apos;' ;

# Takes a list, returns a list: don't use in scalar context.
# This only escapes '<' and '&'.
sub _esc {
croak "_esc used in scalar context" unless wantarray ;
my $text ;
if ( $text =~ /([\x00-\x08\x0B\x0C\x0E-\x1F])/ ) {
croak sprintf(
"Invalid character 0x%02d (^%s) sent",
ord $1,
chr( ord( "A" ) + ord( $1 ) - 1 )
)
}
return map {
$text = $_ ;
$text =~ s{([&<])}{$escapees{$1}}eg ;
$text =~ s{([&<]|^>|^\]>|\]\]>)}{$escapees{$1}}eg ;
$text ;
} @_ ;
}


sub _esc1 {
my $text = shift ;
$text =~ s{([&<])}{$escapees{$1}}eg ;
if ( $text =~ /([\x00-\x08\x0B\x0C\x0E-\x1F])/ ) {
croak sprintf(
"Invalid character 0x%02d (^%s) sent",
ord $1,
chr( ord( "A" ) + ord( $1 ) - 1 )
)
}
$text =~ s{([&<]|^>|^\]>|\]\]>)}{$escapees{$1}}eg ;
return $text ;
}

sub _attr_esc1 {
my $text = shift ;
$text =~ s{([&<"])}{$escapees{$1}}eg ;
if ( $text =~ /([\x00-\x08\x0B\x0C\x0E-\x1F])/ ) {
croak sprintf(
"Invalid character 0x%02d (^%s) sent",
ord $1,
chr( ord( "A" ) + ord( $1 ) - 1 )
)
}
$text =~ s{([&<"'])}{$escapees{$1}}eg ;
return $text ;
}


sub _esc_cdata_ends {
## This could be very memory hungry, but alas...
my $text = join( '', @_ ) ;
if ( $text =~ /([\x00-\x08\x0B\x0C\x0E-\x1F])/ ) {
croak sprintf(
"Invalid character 0x%02d (^%s) sent",
ord $1,
chr( ord( "A" ) + ord( $1 ) - 1 )
)
}
$text =~ s{\]\]>}{]]]]><![CDATA[>}g ;
return $text ;
}
Expand Down Expand Up @@ -601,23 +646,27 @@ sub characters {
my $in_cdata_mode ;

if ( $decide_cdata ) {
my $esc_count = 0 ;
my $escs = 0 ;
my $cdata_ends = 0 ;
my $cdata_escs = 0 ;
my $pos ;

## I assume that splitting CDATA ends between chunks is very
## rare. If an app does that a lot, then this could guess 'wrong'
## and use CDATA escapes in a situation where they result in more
## bytes out than <& escaping would.
for ( @_ ) {
$esc_count += tr/<&// ;
$escs += tr/<&// ;
$pos = 0 ;
++$cdata_ends while ( $pos = index $_, ']]>', $pos + 3 ) >= 0 ;
$cdata_escs += tr/\x00-\x08\x0b\x0c\x0e-\x1f// ;
$length += length $_ ;
}
## Each &lt; or &amp; is 4 or 5 chars, ]]]]><![CDATA[< is 15. We
## add 12 since <![CDATA[]]> is 12 chars.
$in_cdata_mode = 4.5 * $esc_count > 15 * $cdata_ends + 12 ;
## Each &lt; or &amp; is 4 or 5 chars.
## Each ]]]]><![CDATA[< is 15.
## Each ]]>&#xN;<![CDATA[ is 17 or 18.
## We ## add 12 since <![CDATA[]]> is 12 chars.
$in_cdata_mode = 4.5*$escs > 15*$cdata_ends + 17.75*$cdata_escs + 12 ;
}
else {
$in_cdata_mode = $self->{STRAGGLERS} eq ']]>' ;
Expand Down
Loading

0 comments on commit 75cb42f

Please sign in to comment.