Permalink
Browse files

RSS Profile 1.06: & to signal a 'real' ampersand.

git-svn-id: http://feedvalidator.googlecode.com/svn/trunk@660 34a10a33-d82d-0410-ba40-81c901463ecc
  • Loading branch information...
1 parent e58c2c9 commit 62fc40c9ebaa63df64fda04960bcb391687dcb75 @rubys committed Jul 27, 2006
View
@@ -164,3 +164,7 @@ address {
background: transparent;
color: #333;
}
+
+code {
+ font-size: larger;
+}
@@ -18,6 +18,13 @@ For example:</p>
<blockquote><code>&amp;amp;copy;</code></blockquote>
<p>becomes:</p>
<blockquote><code>&amp;#169;</code></blockquote>
+<p>Encoding of the characters <code>"&amp;"</code> and <code>"&lt;"</code>
+are especially problematic in places like RSS 2.0 titles.&#160; For the widest
+interop, the
+<a href="http://www.rssboard.org/rss-profile#data-types-characterdata">RSS Profile</a>
+recommends the use of the hexadecimal character reference
+"<code>&amp;#x26;</code>" to represent "<code>&amp;</code>" and
+"<code>&amp;#x3C;</code>" to represent "<code>&lt;</code>".</p>
</div>
</div>
</fvdoc>
@@ -41,6 +41,13 @@
<blockquote><code>&amp;amp;copy;</code></blockquote>
<p>becomes:</p>
<blockquote><code>&amp;#169;</code></blockquote>
+<p>Encoding of the characters <code>"&amp;"</code> and <code>"&lt;"</code>
+are especially problematic in places like RSS 2.0 titles.&#160; For the widest
+interop, the
+<a href="http://www.rssboard.org/rss-profile#data-types-characterdata">RSS Profile</a>
+recommends the use of the hexadecimal character reference
+"<code>&amp;#x26;</code>" to represent "<code>&amp;</code>" and
+"<code>&amp;#x3C;</code>" to represent "<code>&lt;</code>".</p>
</div>
<h2>Not clear? Disagree?</h2>
<div class="docbody">
@@ -43,6 +43,9 @@ def _validate(aString, firstOccurrenceOnly, loggedEvents, base, encoding, selfUR
validator.loggedEvents += loggedEvents
+ # experimental RSS-Profile draft 1.06 support
+ validator.setLiterals(re.findall('&#x26;(\w+);',aString))
+
xmlver = re.match("^<\?\s*xml\s+version\s*=\s*['\"]([-a-zA-Z0-9_.:]*)['\"]",aString)
if xmlver and xmlver.group(1)<>'1.0':
validator.log(logging.BadXmlVersion({"version":xmlver.group(1)}))
@@ -101,8 +101,15 @@ def __init__(self, base, selfURIs, encoding):
self.selfURIs = selfURIs
self.encoding = encoding
self.handler_stack=[[root(self, base)]]
+ self.literal_entities=[]
validatorBase.defaultNamespaces = []
+ # experimental RSS-Profile draft 1.06 support
+ def setLiterals(self, literals):
+ for literal in literals:
+ if literal not in self.literal_entities:
+ self.literal_entities.append(literal)
+
def setDocumentLocator(self, locator):
self.locator = locator
ContentHandler.setDocumentLocator(self, self.locator)
@@ -568,15 +568,19 @@ def validate(self):
#
class nonhtml(text,safeHtmlMixin):#,absUrlMixin):
htmlEndTag_re = re.compile("</(\w+)>")
- htmlEntity_re = re.compile("&#?\w+;")
+ htmlEntity_re = re.compile("&(#?\w+);")
def prevalidate(self):
self.children.append(True) # force warnings about "mixed" content
def validate(self, message=ContainsHTML):
tags = [t for t in self.htmlEndTag_re.findall(self.value) if t.lower() in HTMLValidator.htmltags]
if tags:
self.log(message({"parent":self.parent.name, "element":self.name, "value":tags[0]}))
elif self.htmlEntity_re.search(self.value):
- self.log(message({"parent":self.parent.name, "element":self.name, "value":self.htmlEntity_re.search(self.value).group(0)}))
+ for value in self.htmlEntity_re.findall(self.value):
+ from htmlentitydefs import name2codepoint
+ if (value in name2codepoint or not value.isalpha()) and \
+ value not in self.dispatcher.literal_entities:
+ self.log(message({"parent":self.parent.name, "element":self.name, "value":'&'+value+';'}))
#
# valid e-mail addresses
@@ -0,0 +1,17 @@
+<?xml version='1.0' encoding='iso-8859-1'?>
+
+<!--
+ Description: http://www.rssboard.org/files/test-double-escaped.xml
+ Expect: !ContainsHTML{parent:item,element:title}
+-->
+
+<rss version="2.0">
+<channel>
+<title>Validity test</title>
+<link>http://www.rssboard.org/rss-profile#data-types-characterdata</link>
+<description>Recommendations</description>
+<item>
+<title>AT&#x26;T</title>
+</item>
+</channel>
+</rss>
@@ -0,0 +1,17 @@
+<?xml version='1.0' encoding='iso-8859-1'?>
+
+<!--
+ Description: http://www.rssboard.org/files/test-double-escaped.xml
+ Expect: !ContainsHTML{parent:item,element:title}
+-->
+
+<rss version="2.0">
+<channel>
+<title>Validity test</title>
+<link>http://www.rssboard.org/rss-profile#data-types-characterdata</link>
+<description>Recommendations</description>
+<item>
+<title>Bill &#x26; Ted's Excellent Adventure</title>
+</item>
+</channel>
+</rss>
@@ -0,0 +1,17 @@
+<?xml version='1.0' encoding='iso-8859-1'?>
+
+<!--
+ Description: http://www.rssboard.org/files/test-double-escaped.xml
+ Expect: !ContainsHTML{parent:item,element:title}
+-->
+
+<rss version="2.0">
+<channel>
+<title>Validity test</title>
+<link>http://www.rssboard.org/rss-profile#data-types-characterdata</link>
+<description>Recommendations</description>
+<item>
+<title>The &#x26;amp; entity</title>
+</item>
+</channel>
+</rss>
@@ -0,0 +1,17 @@
+<?xml version='1.0' encoding='iso-8859-1'?>
+
+<!--
+ Description: http://www.rssboard.org/files/test-double-escaped.xml
+ Expect: !ContainsHTML{parent:item,element:title}
+-->
+
+<rss version="2.0">
+<channel>
+<title>Validity test</title>
+<link>http://www.rssboard.org/rss-profile#data-types-characterdata</link>
+<description>Recommendations</description>
+<item>
+<title>I &#x3C;3 Phil Ringnalda</title>
+</item>
+</channel>
+</rss>
@@ -0,0 +1,17 @@
+<?xml version='1.0' encoding='iso-8859-1'?>
+
+<!--
+ Description: http://www.rssboard.org/files/test-double-escaped.xml
+ Expect: !ContainsHTML{parent:item,element:title}
+-->
+
+<rss version="2.0">
+<channel>
+<title>Validity test</title>
+<link>http://www.rssboard.org/rss-profile#data-types-characterdata</link>
+<description>Recommendations</description>
+<item>
+<title>A &#x3C; B</title>
+</item>
+</channel>
+</rss>
@@ -0,0 +1,17 @@
+<?xml version='1.0' encoding='iso-8859-1'?>
+
+<!--
+ Description: http://www.rssboard.org/files/test-double-escaped.xml
+ Expect: !ContainsHTML{parent:item,element:title}
+-->
+
+<rss version="2.0">
+<channel>
+<title>Validity test</title>
+<link>http://www.rssboard.org/rss-profile#data-types-characterdata</link>
+<description>Recommendations</description>
+<item>
+<title>A&#x3C;B</title>
+</item>
+</channel>
+</rss>
@@ -0,0 +1,17 @@
+<?xml version='1.0' encoding='iso-8859-1'?>
+
+<!--
+ Description: http://www.rssboard.org/files/test-double-escaped.xml
+ Expect: !ContainsHTML{parent:item,element:title}
+-->
+
+<rss version="2.0">
+<channel>
+<title>Validity test</title>
+<link>http://www.rssboard.org/rss-profile#data-types-characterdata</link>
+<description>Recommendations</description>
+<item>
+<title>Nice &#x3C;gorilla&#x3E; what's he weigh?</title>
+</item>
+</channel>
+</rss>
@@ -35,7 +35,7 @@
<li>3.1 <a href="data-types-datetime">Dates and Times</a></li>
<li>3.2 <a href="data-types-url">URLs</a></li>
- <li>3.3 <a title="data-types-characterdata">Character Data</a></li>
+ <li>3.3 <a href="data-types-characterdata">Character Data</a></li>
</ul>
</li>
<li>4 <a title="elements">Elements</a>

0 comments on commit 62fc40c

Please sign in to comment.