Permalink
Browse files

New NestedCDATA remedy

Fixes documents which contain a <![CDATA[...]]> section that's had another
XML CDATA section blindly pasted into it.
  • Loading branch information...
1 parent 617af93 commit ff0d3209c4d5b3ddd30867332e25a1ca883dccc8 @arc arc committed Jun 11, 2011
Showing with 60 additions and 0 deletions.
  1. +3 −0 MANIFEST
  2. +23 −0 lib/XML/Liberal/Remedy/NestedCDATA.pm
  3. +17 −0 t/bad/BAD-nested-cdata.xml
  4. +17 −0 t/good/GOOD-nested-cdata.xml
View
@@ -27,6 +27,7 @@ lib/XML/Liberal/Remedy/EntityRef.pm
lib/XML/Liberal/Remedy/HTMLEntity.pm
lib/XML/Liberal/Remedy/InvalidEncoding.pm
lib/XML/Liberal/Remedy/LowAsciiChars.pm
+lib/XML/Liberal/Remedy/NestedCDATA.pm
lib/XML/Liberal/Remedy/NotUTF8.pm
lib/XML/Liberal/Remedy/StandaloneAttribute.pm
lib/XML/Liberal/Remedy/TrailingDoctype.pm
@@ -85,6 +86,7 @@ t/bad/BAD-html.xml
t/bad/BAD-html2.xml
t/bad/BAD-html3.xml
t/bad/BAD-lowascii.xml
+t/bad/BAD-nested-cdata.xml
t/bad/BAD-trailing-both.xml
t/bad/BAD-trailing-doctype.xml
t/bad/BAD-trailing-elements.xml
@@ -139,6 +141,7 @@ t/good/GOOD-html.xml
t/good/GOOD-html2.xml
t/good/GOOD-html3.xml
t/good/GOOD-lowascii.xml
+t/good/GOOD-nested-cdata.xml
t/good/GOOD-trailing-both.xml
t/good/GOOD-trailing-doctype.xml
t/good/GOOD-trailing-elements.xml
@@ -0,0 +1,23 @@
+package XML::Liberal::Remedy::NestedCDATA;
+use strict;
+
+use HTML::Entities qw( encode_entities );
+
+sub apply {
+ my $class = shift;
+ my($driver, $error, $xml_ref) = @_;
+
+ return 0 if $error->message !~ /^parser error : Opening and ending tag mismatch:/;
+
+ while ($$xml_ref =~ /(?<= <!\[CDATA\[ ) (.*? \]\]> )/xmsg) {
+ my ($cdata, $start, $end) = ($1, $-[1], $+[1]);
+ next if $cdata !~ /<!\[CDATA\[/;
+ my $escaped = encode_entities($cdata, '<>&');
+ substr($$xml_ref, $start, $end - $start) = "]]>$escaped<![CDATA[";
+ return 1;
+ }
+
+ return 0;
+}
+
+1;
View
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="utf-8"?>
+<rss version="2.0">
+ <channel>
+ <title>Foo</title>
+ </channel>
+ <item>
+ <title>Blah blah blah</title>
+ <description><![CDATA[
+ <script type="text/javascript">
+ // <![CDATA[
+ (function() { alert('<hoho!>') })();
+ //]]>
+ </script>
+ <p>Article text goes here</p>
+ ]]></description>
+ </item>
+</rss>
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="utf-8"?>
+<rss version="2.0">
+ <channel>
+ <title>Foo</title>
+ </channel>
+ <item>
+ <title>Blah blah blah</title>
+ <description><![CDATA[]]>
+ &lt;script type="text/javascript"&gt;
+ // &lt;![CDATA[
+ (function() { alert('&lt;hoho!&gt;') })();
+ //]]&gt;<![CDATA[
+ </script>
+ <p>Article text goes here</p>
+ ]]></description>
+ </item>
+</rss>

0 comments on commit ff0d320

Please sign in to comment.