Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge pull request #3 from josephw/master

Documentation and robustness fixes
  • Loading branch information...
commit 1300fdd3fe635fa082b12d8e8bba9b78fe4159c1 2 parents 1ca215b + a514ece
Sam Ruby authored
Showing with 444 additions and 359 deletions.
  1. +1 −1  docs-xml/build-docs-index.py
  2. +1 −1  docs-xml/build-html-docs.py
  3. +4 −2 docs-xml/error/InvalidISO8601Date.xml
  4. +3 −3 docs-xml/mkmsgs.py
  5. +1 −1  docs-xml/warning/UnknownNamespace.xml
  6. +26 −0 docs-xml/warning/UnsupportedNamespace.xml
  7. +4 −2 docs/error/InvalidISO8601Date.html
  8. +1 −0  docs/index.html
  9. +1 −1  docs/warning/UnknownNamespace.html
  10. +83 −0 docs/warning/UnsupportedNamespace.html
  11. +13 −13 fcgi.py
  12. +17 −17 feedfinder.py
  13. +1 −0  runtest.py
  14. +0 −2  src/demo.py
  15. +28 −27 src/feedvalidator/__init__.py
  16. +3 −5 src/feedvalidator/author.py
  17. +22 −14 src/feedvalidator/base.py
  18. +0 −2  src/feedvalidator/category.py
  19. +12 −14 src/feedvalidator/channel.py
  20. +0 −2  src/feedvalidator/compatibility.py
  21. +1 −3 src/feedvalidator/content.py
  22. +2 −4 src/feedvalidator/entry.py
  23. +13 −15 src/feedvalidator/extension.py
  24. +5 −7 src/feedvalidator/feed.py
  25. +0 −2  src/feedvalidator/formatter/__init__.py
  26. +2 −4 src/feedvalidator/formatter/application_test.py
  27. +5 −7 src/feedvalidator/formatter/base.py
  28. +4 −6 src/feedvalidator/formatter/text_html.py
  29. +0 −2  src/feedvalidator/formatter/text_plain.py
  30. +1 −3 src/feedvalidator/formatter/text_xml.py
  31. +0 −2  src/feedvalidator/generator.py
  32. +0 −2  src/feedvalidator/i18n/__init__.py
  33. +1 −2  src/feedvalidator/i18n/en.py
  34. +3 −5 src/feedvalidator/image.py
  35. +1 −3 src/feedvalidator/iso639codes.py
  36. +5 −7 src/feedvalidator/item.py
  37. +11 −13 src/feedvalidator/itunes.py
  38. +0 −2  src/feedvalidator/kml.py
  39. +3 −5 src/feedvalidator/link.py
  40. +10 −11 src/feedvalidator/logging.py
  41. +2 −3 src/feedvalidator/mediaTypes.py
  42. +5 −7 src/feedvalidator/opml.py
  43. +3 −5 src/feedvalidator/rdf.py
  44. +2 −4 src/feedvalidator/root.py
  45. +2 −4 src/feedvalidator/rss.py
  46. +1 −3 src/feedvalidator/skipDays.py
  47. +1 −3 src/feedvalidator/skipHours.py
  48. +3 −3 src/feedvalidator/sse.py
  49. +0 −2  src/feedvalidator/textInput.py
  50. +16 −18 src/feedvalidator/timeoutsocket.py
  51. +3 −4 src/feedvalidator/uri.py
  52. +14 −16 src/feedvalidator/validators.py
  53. +1 −2  src/feedvalidator/xmlEncoding.py
  54. +3 −3 src/index.py
  55. +6 −3 src/missingWebPages.py
  56. +1 −1  src/rdflib/BNode.py
  57. +6 −6 src/rdflib/Literal.py
  58. +1 −1  src/rdflib/URIRef.py
  59. +1 −1  src/rdflib/constants.py
  60. +2 −2 src/rdflib/exceptions.py
  61. +33 −33 src/rdflib/syntax/parsers/RDFXMLHandler.py
  62. +2 −2 src/rdflib/syntax/xml_names.py
  63. +3 −0  src/runtests.sh
  64. +3 −1 src/tests/testFeedvalidator.py
  65. +1 −2  src/tests/testMediaTypes.py
  66. +2 −3 src/tests/testUri.py
  67. +8 −8 src/tests/testXmlEncoding.py
  68. +0 −1  src/tests/testXmlEncodingDecode.py
  69. +2 −4 src/validtest.py
  70. +2 −2 src/ws-demo.py
  71. +27 −0 testcases/atom/6.1/unsupported-namespace.xml
2  docs-xml/build-docs-index.py
View
@@ -58,7 +58,7 @@ def printLine(hr, msg):
for (f, msg) in allMsgs:
printLine(type + '/' + f + '.html', msg)
of.write("</ul>\n")
-
+
f = open('docs-index-footer.html')
of.write(f.read())
f.close()
2  docs-xml/build-html-docs.py
View
@@ -64,7 +64,7 @@ def writeDoc(x, h):
title = trimWS(title)
doc = doc.replace('<title></title>', '<title>' + title + '</title>')
-
+
for (sec, txt) in secRe.findall(t):
r = re.compile('<h2>' + sec + '</h2>\s*<div class="docbody">\s*()</div>', re.IGNORECASE)
idx = r.search(doc).start(1)
6 docs-xml/error/InvalidISO8601Date.xml
View
@@ -8,9 +8,11 @@
</div>
<div id='solution'>
<p>The value specified must adhere to the
+W3C's
+<a href="http://www.w3.org/TR/NOTE-datetime">Date and Time Formats</a> profile,
+a subset of the
<a href="http://www.iso.ch/iso/en/prods-services/popstds/datesandtime.html">ISO 8601</a>
-Date format. </p>
-
+Date format.</p>
</div>
</div>
</fvdoc>
6 docs-xml/mkmsgs.py
View
@@ -32,9 +32,9 @@ def missing():
dir = 'warning'
else:
continue
-
+
xml = path.join(basename, 'docs-xml', dir, key.__name__+'.xml')
-
+
if not path.exists(xml):
result.append((dir, key.__name__, value, xml))
@@ -55,7 +55,7 @@ def buildTestSuite():
if __name__ == '__main__':
import re
for dir, id, msg, xml in missing():
- msg = re.sub("%\(\w+\)\w?", "<code>foo</code>", msg)
+ msg = re.sub("%\(\w+\)\w?", "<code>foo</code>", msg)
if not path.exists(xml):
open(xml,'w').write(template.lstrip() % msg)
print xml
2  docs-xml/warning/UnknownNamespace.xml
View
@@ -14,7 +14,7 @@ feed validator does not (yet) support.</p>
the namespace URI, this information may not be recognized by feed
consumers.</p>
<p>If the namespace is spelled correctly, be aware that the feed validator
-is unable to verify the correct usage this namespace. Also be aware that
+is unable to verify the correct usage of this namespace. Also be aware that
this namespace is not likely to be widely supported.</p>
<p>If you know of a namespace that you feel that the feed validator should
support, <a href="http://groups.google.com/group/feedvalidator-users">let us
26 docs-xml/warning/UnsupportedNamespace.xml
View
@@ -0,0 +1,26 @@
+<fvdoc>
+<div xmlns='http://www.w3.org/1999/xhtml'>
+<div id='message'>
+<p>Unable to validate namespace: <code>http://namespace.uri/</code>. See the <i>foo</i> specification at <code>http://namespace.uri/specification</code></p>
+</div>
+<div id='explanation'>
+<p>Even though the feed validator has build in support for quite a
+<a href="../howto/declare_namespaces.html">large number</a>
+of popular namespaces, you somehow managed to have found one that the
+feed validator does not (yet) support.</p>
+</div>
+<div id='solution'>
+<p>The feed validator is unable to verify the correct usage of this namespace.
+Either it is not well-specified, there are no test cases to demonstrate
+correct use or it just hasn't been implemented.</p>
+<p>Please consult the specification for correct use of this namespace.</p>
+<p>If you know of a namespace that you feel that the feed validator should
+support, <a href="http://groups.google.com/group/feedvalidator-users">let us
+know</a>. Please include in your request a pointer to the documentation for
+the namespace. Such documentation should describe the XML elements and
+attributes defined for this namespace. More information and the status of
+current requests can be found on the <a
+href="http://code.google.com/p/feedvalidator/wiki/ExtensionNamespaces">wiki</a>.</p>
+</div>
+</div>
+</fvdoc>
6 docs/error/InvalidISO8601Date.html
View
@@ -31,9 +31,11 @@
<h2>Solution</h2>
<div class="docbody">
<p>The value specified must adhere to the
+W3C's
+<a href="http://www.w3.org/TR/NOTE-datetime">Date and Time Formats</a> profile,
+a subset of the
<a href="http://www.iso.ch/iso/en/prods-services/popstds/datesandtime.html">ISO 8601</a>
-Date format. </p>
-
+Date format.</p>
</div>
<h2>Not clear? Disagree?</h2>
<div class="docbody">
1  docs/index.html
View
@@ -295,6 +295,7 @@
<li><a href="warning/UnknownNamespace.html">Use of unknown namespace: <code>http://namespace.uri/</code></a></li>
<li><a href="warning/UnregisteredAtomLinkRel.html">Unregistered <code>link</code> relationship: <code>foo</code></a></li>
<li><a href="warning/UnsupportedItunesFormat.html">Format <code>foo</code> is not supported by iTunes</a></li>
+<li><a href="warning/UnsupportedNamespace.html">Unable to validate namespace: <code>http://namespace.uri/</code>. See the <i>foo</i> specification at <code>http://namespace.uri/specification</code></a></li>
<li><a href="warning/UseOfExtensionAttr.html">Use of extension attribute on RSS 2.0 core element: <code>(namespace,name)</code></a></li>
<li><a href="warning/UseZeroForMidnight.html">Use zero for midnight</a></li>
</ul>
2  docs/warning/UnknownNamespace.html
View
@@ -37,7 +37,7 @@
the namespace URI, this information may not be recognized by feed
consumers.</p>
<p>If the namespace is spelled correctly, be aware that the feed validator
-is unable to verify the correct usage this namespace. Also be aware that
+is unable to verify the correct usage of this namespace. Also be aware that
this namespace is not likely to be widely supported.</p>
<p>If you know of a namespace that you feel that the feed validator should
support, <a href="http://groups.google.com/group/feedvalidator-users">let us
83 docs/warning/UnsupportedNamespace.html
View
@@ -0,0 +1,83 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
+<head>
+<title>Unable to validate namespace: http://namespace.uri/. See the foo specification at http://namespace.uri/specification</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+<link rel="icon" href="http://www.feedvalidator.org/favicon.ico" />
+<link rel="shortcut icon" href="http://www.feedvalidator.org/favicon.ico" />
+<style type="text/css" media="screen">@import "../../css/common.css";
+@import "../../css/documentation.css";</style>
+<script type="text/javascript"><!-- --></script>
+<link rel="start" href="http://feedvalidator.org/docs/" title="Home" />
+</head>
+<body>
+<div id="logo">
+<h1><a href="../../"><span id="feed"><span id="f">F</span><span id="e1">E</span><span id="e2">E</span></span><span id="d">D</span> Validator</a></h1>
+<p>Documentation</p>
+<a class="skip" href="#startnavigation">Jump to navigation</a>
+</div> <!--logo-->
+
+<div id="main">
+<h2>Message</h2>
+<div class="docbody">
+<p>Unable to validate namespace: <code>http://namespace.uri/</code>. See the <i>foo</i> specification at <code>http://namespace.uri/specification</code></p>
+</div>
+<h2>Explanation</h2>
+
+<div class="docbody">
+<p>Even though the feed validator has build in support for quite a
+<a href="../howto/declare_namespaces.html">large number</a>
+of popular namespaces, you somehow managed to have found one that the
+feed validator does not (yet) support.</p>
+</div>
+<h2>Solution</h2>
+<div class="docbody">
+<p>The feed validator is unable to verify the correct usage of this namespace.
+Either it is not well-specified, there are no test cases to demonstrate
+correct use or it just hasn't been implemented.</p>
+<p>Please consult the specification for correct use of this namespace.</p>
+<p>If you know of a namespace that you feel that the feed validator should
+support, <a href="http://groups.google.com/group/feedvalidator-users">let us
+know</a>. Please include in your request a pointer to the documentation for
+the namespace. Such documentation should describe the XML elements and
+attributes defined for this namespace. More information and the status of
+current requests can be found on the <a
+href="http://code.google.com/p/feedvalidator/wiki/ExtensionNamespaces">wiki</a>.</p>
+</div>
+<h2>Not clear? Disagree?</h2>
+<div class="docbody">
+<p>You might be able to find help in one of <a href="../howto/resources.html">these fine resources</a>.</p>
+</div>
+
+</div><!--main-->
+<div class="centered">
+<a name="startnavigation" id="startnavigation"></a>
+<div class="navbarWrapper">
+ <div class="navbarContent">
+ <img class="borderTL" src="../../images/borderTL.gif" alt="" width="14" height="14" />
+ <img class="borderTR" src="../../images/borderTR.gif" alt="" width="14" height="14" />
+
+<p>
+<a href="../../">Home</a> &middot;
+<a href="../../about.html">About</a> &middot;
+<a href="../../news/">News</a> &middot;
+<a href="../../docs/">Docs</a> &middot;
+<a href="../../terms.html">Terms</a>
+</p>
+
+ <div class="roundedCornerSpacer">&nbsp;</div>
+ </div><!-- .content -->
+ <div class="bottomCorners">
+ <img class="borderBL" src="../../images/borderBL.gif" alt="" width="14" height="14" />
+ <img class="borderBR" src="../../images/borderBR.gif" alt="" width="14" height="14" />
+ </div><!-- .bottomCorners -->
+</div><!-- .contentWrapper -->
+</div><!-- .centered -->
+
+<div class="centered">
+<address>Copyright &copy; 2002-4 <a href="http://diveintomark.org/">Mark Pilgrim</a> and <a href="http://www.intertwingly.net/blog/">Sam Ruby</a></address>
+</div>
+
+</body>
+</html>
26 fcgi.py
View
@@ -445,7 +445,7 @@ def encode_pair(name, value):
s += struct.pack('!L', valueLength | 0x80000000L)
return s + name + value
-
+
class Record(object):
"""
A FastCGI Record.
@@ -494,7 +494,7 @@ def read(self, sock):
if length < FCGI_HEADER_LEN:
raise EOFError
-
+
self.version, self.type, self.requestId, self.contentLength, \
self.paddingLength = struct.unpack(FCGI_Header, header)
@@ -502,7 +502,7 @@ def read(self, sock):
'contentLength = %d' %
(sock.fileno(), self.type, self.requestId,
self.contentLength))
-
+
if self.contentLength:
try:
self.contentData, length = self._recvall(sock,
@@ -556,7 +556,7 @@ def write(self, sock):
self._sendall(sock, self.contentData)
if self.paddingLength:
self._sendall(sock, '\x00'*self.paddingLength)
-
+
class Request(object):
"""
Represents a single FastCGI request.
@@ -596,7 +596,7 @@ def run(self):
def _end(self, appStatus=0L, protocolStatus=FCGI_REQUEST_COMPLETE):
self._conn.end_request(self, appStatus, protocolStatus)
-
+
def _flush(self):
self.stdout.close()
self.stderr.close()
@@ -609,14 +609,14 @@ def __init__(self, server):
self.role = FCGI_RESPONDER
self.flags = 0
self.aborted = False
-
+
self.server = server
self.params = dict(os.environ)
self.stdin = sys.stdin
self.stdout = StdoutWrapper(sys.stdout) # Oh, the humanity!
self.stderr = sys.stderr
self.data = StringIO.StringIO()
-
+
def _end(self, appStatus=0L, protocolStatus=FCGI_REQUEST_COMPLETE):
sys.exit(appStatus)
@@ -657,7 +657,7 @@ def _cleanupSocket(self):
except:
pass
self._sock.close()
-
+
def run(self):
"""Begin processing data from the socket."""
self._keepGoing = True
@@ -818,7 +818,7 @@ def _do_unknown_type(self, inrec):
outrec.contentData = struct.pack(FCGI_UnknownTypeBody, inrec.type)
outrec.contentLength = FCGI_UnknownTypeBody_LEN
self.writeRecord(rec)
-
+
class MultiplexedConnection(Connection):
"""
A version of Connection capable of handling multiple requests
@@ -845,7 +845,7 @@ def _cleanupSocket(self):
self._lock.release()
super(MultiplexedConnection, self)._cleanupSocket()
-
+
def writeRecord(self, rec):
# Must use locking to prevent intermingling of Records from different
# threads.
@@ -904,7 +904,7 @@ def _do_data(self, inrec):
super(MultiplexedConnection, self)._do_data(inrec)
finally:
self._lock.release()
-
+
class Server(object):
"""
The FastCGI server.
@@ -1043,7 +1043,7 @@ def _installSignalHandlers(self):
def _restoreSignalHandlers(self):
for signum,handler in self._oldSIGs:
signal.signal(signum, handler)
-
+
def _hupHandler(self, signum, frame):
self._hupReceived = True
self._keepGoing = False
@@ -1290,7 +1290,7 @@ def _sanitizeEnv(self, environ):
'required by WSGI!\n' %
(self.__class__.__name__, name))
environ[name] = default
-
+
if __name__ == '__main__':
def test_app(environ, start_response):
"""Probably not the most efficient example."""
34 feedfinder.py
View
@@ -10,8 +10,8 @@
'http://scripting.com/rss.xml'
>>>
>>> feedfinder.feeds('scripting.com')
- ['http://delong.typepad.com/sdj/atom.xml',
- 'http://delong.typepad.com/sdj/index.rdf',
+ ['http://delong.typepad.com/sdj/atom.xml',
+ 'http://delong.typepad.com/sdj/index.rdf',
'http://delong.typepad.com/sdj/rss.xml']
>>>
@@ -25,10 +25,10 @@
1. If the URI points to a feed, it is simply returned; otherwise
the page is downloaded and the real fun begins.
2. Feeds pointed to by LINK tags in the header of the page (autodiscovery)
- 3. <A> links to feeds on the same server ending in ".rss", ".rdf", ".xml", or
+ 3. <A> links to feeds on the same server ending in ".rss", ".rdf", ".xml", or
".atom"
4. <A> links to feeds on the same server containing "rss", "rdf", "xml", or "atom"
- 5. <A> links to feeds on external servers ending in ".rss", ".rdf", ".xml", or
+ 5. <A> links to feeds on external servers ending in ".rss", ".rdf", ".xml", or
".atom"
6. <A> links to feeds on external servers containing "rss", "rdf", "xml", or "atom"
7. Try some guesses about common places for feeds (index.xml, atom.xml, etc.).
@@ -62,13 +62,13 @@ def __init__(self):
threading.Thread.__init__(self)
self.result = None
self.error = None
-
+
def run(self):
try:
self.result = function(*args, **kw)
except:
self.error = sys.exc_info()
-
+
c = Calculator()
c.setDaemon(True) # don't hold up exiting
c.start()
@@ -80,7 +80,7 @@ def run(self):
return c.result
return internal2
return internal
-
+
# XML-RPC support allows feedfinder to query Syndic8 for possible matches.
# Python 2.3 now comes with this module by default, otherwise you can download it
try:
@@ -94,10 +94,10 @@ def dict(aList):
for k, v in aList:
rc[k] = v
return rc
-
+
def _debuglog(message):
if _debug: print message
-
+
class URLGatekeeper:
"""a class to track robots.txt rules across multiple servers"""
def __init__(self):
@@ -108,7 +108,7 @@ def __init__(self):
self.urlopener.addheaders = [('User-agent', self.urlopener.version)]
robotparser.URLopener.version = self.urlopener.version
robotparser.URLopener.addheaders = self.urlopener.addheaders
-
+
def _getrp(self, url):
protocol, domain = urlparse.urlparse(url)[:2]
if self.rpcache.has_key(domain):
@@ -123,7 +123,7 @@ def _getrp(self, url):
pass
self.rpcache[domain] = rp
return rp
-
+
def can_fetch(self, url):
rp = self._getrp(url)
allow = rp.can_fetch(self.urlopener.version, url)
@@ -145,7 +145,7 @@ def __init__(self, baseuri):
sgmllib.SGMLParser.__init__(self)
self.links = []
self.baseuri = baseuri
-
+
def normalize_attrs(self, attrs):
def cleanattr(v):
v = sgmllib.charref.sub(lambda m: unichr(int(m.groups()[0])), v)
@@ -155,14 +155,14 @@ def cleanattr(v):
attrs = [(k.lower(), cleanattr(v)) for k, v in attrs]
attrs = [(k, k in ('rel','type') and v.lower() or v) for k, v in attrs]
return attrs
-
+
def do_base(self, attrs):
attrsD = dict(self.normalize_attrs(attrs))
if not attrsD.has_key('href'): return
self.baseuri = attrsD['href']
-
+
def error(self, *a, **kw): pass # we're not picky
-
+
class LinkParser(BaseParser):
FEED_TYPES = ('application/rss+xml',
'text/xml',
@@ -247,7 +247,7 @@ def getFeedsFromSyndic8(uri):
except:
pass
return feeds
-
+
def feeds(uri, all=False, querySyndic8=False):
fulluri = makeFullURI(uri)
try:
@@ -346,7 +346,7 @@ def test():
uri = urlparse.urljoin(uri, data.split('<link rel="next" href="').pop().split('"')[0])
print
print count, 'tests executed,', len(failed), 'failed'
-
+
if __name__ == '__main__':
args = sys.argv[1:]
if args and args[0] == '--debug':
1  runtest.py
View
@@ -1,4 +1,5 @@
modules = [
+ 'testFeedvalidator',
'testUri',
'testXmlEncoding',
'testXmlEncodingDecode',
2  src/demo.py
View
@@ -1,7 +1,5 @@
#!/usr/bin/python
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
55 src/feedvalidator/__init__.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -135,7 +133,7 @@ def __init__(self, dispatcher):
self.dispatcher=dispatcher
def error(self, message):
self.dispatcher.log(InvalidRDF({"message": message}))
-
+
source.getByteStream().reset()
parser.reset()
parser.setContentHandler(Handler(parser.getContentHandler()))
@@ -194,21 +192,24 @@ def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
rawdata = usock.read(MAXDATALENGTH)
if usock.read(1):
raise ValidationFailure(logging.ValidatorLimit({'limit': 'feed length > ' + str(MAXDATALENGTH) + ' bytes'}))
-
+
# check for temporary redirects
if usock.geturl()<>request.get_full_url():
- from httplib import HTTPConnection
- spliturl=url.split('/',3)
- if spliturl[0]=="http:":
- conn=HTTPConnection(spliturl[2])
- conn.request("GET",'/'+spliturl[3].split("#",1)[0])
+ from urlparse import urlsplit
+ (scheme, netloc, path, query, fragment) = urlsplit(url)
+ if scheme == 'http':
+ from httplib import HTTPConnection
+ requestUri = (path or '/') + (query and '?' + query)
+
+ conn=HTTPConnection(netloc)
+ conn.request("GET", requestUri)
resp=conn.getresponse()
if resp.status<>301:
loggedEvents.append(TempRedirect({}))
-
+
except BadStatusLine, status:
raise ValidationFailure(logging.HttpError({'status': status.__class__}))
-
+
except urllib2.HTTPError, status:
rawdata = status.read()
if len(rawdata) < 512 or 'content-encoding' in status.headers:
@@ -230,10 +231,10 @@ def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
except Exception, x:
raise ValidationFailure(logging.IOError({"message": x.__class__.__name__,
"exception":x}))
-
+
if usock.headers.get('content-encoding', None) == None:
loggedEvents.append(Uncompressed({}))
-
+
if usock.headers.get('content-encoding', None) == 'gzip':
import gzip, StringIO
try:
@@ -243,7 +244,7 @@ def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
exctype, value = sys.exc_info()[:2]
event=logging.IOError({"message": 'Server response declares Content-Encoding: gzip', "exception":value})
raise ValidationFailure(event)
-
+
if usock.headers.get('content-encoding', None) == 'deflate':
import zlib
try:
@@ -253,7 +254,7 @@ def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
exctype, value = sys.exc_info()[:2]
event=logging.IOError({"message": 'Server response declares Content-Encoding: deflate', "exception":value})
raise ValidationFailure(event)
-
+
if usock.headers.get('content-type', None) == 'application/vnd.google-earth.kmz':
import tempfile, zipfile, os
try:
@@ -275,21 +276,21 @@ def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
mediaType = None
charset = None
-
+
# Is the Content-Type correct?
contentType = usock.headers.get('content-type', None)
if contentType:
(mediaType, charset) = mediaTypes.checkValid(contentType, loggedEvents)
-
+
# Check for malformed HTTP headers
for (h, v) in usock.headers.items():
if (h.find(' ') >= 0):
loggedEvents.append(HttpProtocolError({'header': h}))
-
+
selfURIs = [request.get_full_url()]
baseURI = usock.geturl()
if not baseURI in selfURIs: selfURIs.append(baseURI)
-
+
# Get baseURI from content-location and/or redirect information
if usock.headers.get('content-location', None):
from urlparse import urljoin
@@ -297,25 +298,25 @@ def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
elif usock.headers.get('location', None):
from urlparse import urljoin
baseURI=urljoin(baseURI,usock.headers.get('location', ""))
-
+
if not baseURI in selfURIs: selfURIs.append(baseURI)
usock.close()
usock = None
-
+
mediaTypes.contentSniffing(mediaType, rawdata, loggedEvents)
-
+
encoding, rawdata = xmlEncoding.decode(mediaType, charset, rawdata, loggedEvents, fallback='utf-8')
-
+
if rawdata is None:
return {'loggedEvents': loggedEvents}
-
+
rawdata = rawdata.replace('\r\n', '\n').replace('\r', '\n') # normalize EOL
validator = _validate(rawdata, firstOccurrenceOnly, loggedEvents, baseURI, encoding, selfURIs, mediaType=mediaType)
-
+
# Warn about mismatches between media type and feed version
if mediaType and validator.feedType:
mediaTypes.checkAgainstFeedType(mediaType, validator.feedType, validator.loggedEvents)
-
+
params = {"feedType":validator.feedType, "loggedEvents":validator.loggedEvents}
if wantRawData:
params['rawdata'] = rawdata
@@ -326,7 +327,7 @@ def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
if usock: usock.close()
except:
pass
-
+
__all__ = ['base',
'channel',
'compatibility',
8 src/feedvalidator/author.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -35,13 +33,13 @@ def do_foaf_homepage(self):
def do_foaf_weblog(self):
return rdfResourceURI()
-
+
def do_foaf_plan(self):
return text()
-
+
def do_foaf_firstName(self):
return text()
-
+
def do_xhtml_div(self):
from content import diveater
return diveater()
36 src/feedvalidator/base.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -86,6 +84,11 @@
"xri://$xrds": "xrds",
}
+unsupported_namespaces = {
+ "http://schemas.google.com/g/2005": ("Google Data", "https://developers.google.com/gdata/docs/1.0/elements"),
+ "http://www.rawvoice.com/rawvoiceRssModule/": ("RawVoice", "http://www.rawvoice.com/services/tools-and-resources/rawvoice-rss-2-0-module-xmlns-namespace-rss2/")
+}
+
def near_miss(ns):
try:
return re.match(".*\w", ns).group().lower()
@@ -94,7 +97,7 @@ def near_miss(ns):
nearly_namespaces = dict([(near_miss(u),p) for u,p in namespaces.items()])
-stdattrs = [(u'http://www.w3.org/XML/1998/namespace', u'base'),
+stdattrs = [(u'http://www.w3.org/XML/1998/namespace', u'base'),
(u'http://www.w3.org/XML/1998/namespace', u'id'),
(u'http://www.w3.org/XML/1998/namespace', u'lang'),
(u'http://www.w3.org/XML/1998/namespace', u'space')]
@@ -138,7 +141,7 @@ def setFirstOccurrenceOnly(self, firstOccurrenceOnly=1):
def startPrefixMapping(self, prefix, uri):
for handler in iter(self.handler_stack[-1]):
handler.namespace[prefix] = uri
- if uri and len(uri.split())>1:
+ if uri and len(uri.split())>1:
from xml.sax import SAXException
self.error(SAXException('Invalid Namespace: %s' % uri))
if prefix in namespaces.values():
@@ -174,12 +177,17 @@ def startPrefixMapping(self, prefix, uri):
rule.setElement('xmlns:'+str(prefix), {}, self.handler_stack[-1][0])
rule.value=uri
if not uri or rule.validate():
- from logging import UnknownNamespace
- self.log(UnknownNamespace({'namespace':uri}))
+ if uri in unsupported_namespaces:
+ from logging import UnsupportedNamespace
+ (name, specification) = unsupported_namespaces[uri]
+ self.log(UnsupportedNamespace({'namespace': uri, 'name': name, 'specification': specification}))
+ else:
+ from logging import UnknownNamespace
+ self.log(UnknownNamespace({'namespace':uri}))
def namespaceFor(self, prefix):
return None
-
+
def startElementNS(self, name, qname, attrs):
self.lastKnownLine = self.locator.getLineNumber()
self.lastKnownColumn = self.locator.getColumnNumber()
@@ -283,7 +291,7 @@ def findDuplicate(self, event):
if not k in dup.params or dup.params[k] != v: break
else:
return dup
-
+
if event.params.has_key('element') and event.params['element']:
if not isinstance(event.params['element'],tuple):
event.params['element']=':'.join(event.params['element'].split('_', 1))
@@ -341,7 +349,7 @@ def setFeedType(self, feedType):
from logging import TYPE_RSS2
class validatorBase(ContentHandler):
-
+
def __init__(self):
ContentHandler.__init__(self)
self.value = ""
@@ -455,7 +463,7 @@ def startElementNS(self, name, qname, attrs):
else:
try:
self.child=name
- if name.startswith('dc_'):
+ if name.startswith('dc_'):
# handle "Qualified" Dublin Core
handler = getattr(self, "do_" + name.replace("-","_").split('.')[0])()
else:
@@ -506,7 +514,7 @@ def normalizeWhitespace(self):
def endElementNS(self, name, qname):
self.normalizeWhitespace()
self.validate()
- if self.isValid and self.name:
+ if self.isValid and self.name:
from validators import ValidElement
self.log(ValidElement({"parent":self.parent.name, "element":name}))
@@ -551,10 +559,10 @@ def log(self, event, offset=(0,0)):
def setFeedType(self, feedType):
self.dispatcher.setFeedType(feedType)
-
+
def getFeedType(self):
return self.dispatcher.getFeedType()
-
+
def push(self, handler, name, value):
self.dispatcher.push(handler, name, value, self)
@@ -564,6 +572,6 @@ def leaf(self):
def prevalidate(self):
pass
-
+
def validate(self):
pass
2  src/feedvalidator/category.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
26 src/feedvalidator/channel.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -18,7 +16,7 @@ def getExpectedAttrNames(self):
return [(u'urn:atom-extension:indexing', u'index')]
def prevalidate(self):
self.validate_optional_attribute((u'urn:atom-extension:indexing', u'index'), yesno)
-
+
def __init__(self):
self.link=None
self.docs=''
@@ -84,7 +82,7 @@ def do_textinput(self):
# less than helpful on RSS 1.0 feeds.
self.log(UndefinedElement({"parent":self.name, "element":"textinput"}))
return eater(), noduplicates()
-
+
def do_link(self):
self.metadata()
return link(), noduplicates()
@@ -134,11 +132,11 @@ def do_atom_logo(self):
def do_atom_title(self):
from content import textConstruct
return textConstruct(), noduplicates()
-
+
def do_atom_subtitle(self):
from content import textConstruct
return textConstruct(), noduplicates()
-
+
def do_atom_rights(self):
from content import textConstruct
return textConstruct(), noduplicates()
@@ -200,17 +198,17 @@ def do_category(self):
def do_cloud(self):
self.metadata()
return cloud(), noduplicates()
-
+
do_rating = validatorBase.leaf # TODO test cases?!?
def do_ttl(self):
self.metadata()
return positiveInteger(), nonblank(), noduplicates()
-
+
def do_docs(self):
self.metadata()
return docs(), noduplicates()
-
+
def do_generator(self):
self.metadata()
if "admin_generatorAgent" in self.children:
@@ -267,7 +265,7 @@ class rss10Channel(channel):
def getExpectedAttrNames(self):
return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'about'),
(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'about')]
-
+
def prevalidate(self):
if self.attrs.has_key((rdfNS,"about")):
if not "abouts" in self.dispatcher.__dict__:
@@ -291,21 +289,21 @@ class link(rfc2396_full):
def validate(self):
self.parent.link = self.value
rfc2396_full.validate(self)
-
+
class title(nonhtml):
def validate(self):
self.parent.title = self.value
nonhtml.validate(self)
-
+
class docs(rfc2396_full):
def validate(self):
self.parent.docs = self.value
rfc2396_full.validate(self)
-
+
class blink(text):
def validate(self):
self.log(NoBlink({}))
-
+
class category(nonhtml):
def getExpectedAttrNames(self):
return [(None, u'domain')]
2  src/feedvalidator/compatibility.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
4 src/feedvalidator/content.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -55,7 +53,7 @@ def prevalidate(self):
self.log(InvalidMIMEType({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type}))
else:
self.log(ValidMIMEAttribute({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type}))
-
+
if not self.xmlLang:
self.log(MissingDCLanguage({"parent":self.name, "element":"xml:lang"}))
6 src/feedvalidator/entry.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -102,10 +100,10 @@ def do_summary(self):
def do_title(self):
from content import textConstruct
return textConstruct(), noduplicates()
-
+
def do_updated(self):
return rfc3339(), nows(), noduplicates(), unique('updated',self.parent,DuplicateUpdated)
-
+
def do_app_edited(self):
return rfc3339(), nows(), noduplicates()
28 src/feedvalidator/extension.py
View
@@ -1,9 +1,7 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net>, Mark Pilgrim <http://diveintomark.org/> and Phil Ringnalda <http://philringnalda.com>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby, Mark Pilgrim and Phil Ringnalda"
-
+
from validators import *
from logging import *
@@ -28,7 +26,7 @@ def do_dc_contributor(self):
def do_dc_type(self):
return text(), noduplicates()
-
+
def do_dc_format(self):
return text(), noduplicates()
@@ -67,10 +65,10 @@ def do_dcterms_tableOfContents(self):
def do_dcterms_created(self):
return w3cdtf(), noduplicates()
-
+
def do_dcterms_valid(self):
return eater()
-
+
def do_dcterms_available(self):
return eater()
@@ -96,11 +94,11 @@ def do_dcterms_extent(self):
# def do_dcterms_medium(self):
# spec defines it as something that should never be used
-# undefined element'll do for now
+# undefined element'll do for now
def do_dcterms_isVersionOf(self):
return rdfResourceURI() # duplicates allowed
-
+
def do_dcterms_hasVersion(self):
return rdfResourceURI() # duplicates allowed
@@ -138,7 +136,7 @@ def do_dcterms_conformsTo(self):
return rdfResourceURI() # duplicates allowed
def do_dcterms_spatial(self):
- return eater()
+ return eater()
def do_dcterms_temporal(self):
return eater()
@@ -282,7 +280,7 @@ def do_l_link(self):
class extension_item(extension_channel_item):
def do_annotate_reference(self):
return rdfResourceURI(), noduplicates()
-
+
def do_ag_source(self):
return text(), noduplicates()
@@ -777,7 +775,7 @@ def do_gml_posList(self):
class gml_posList(text):
def validate(self):
if self.value.find(',')>=0:
- # ensure that commas are only used to separate lat and long
+ # ensure that commas are only used to separate lat and long
if not re.match('^[-+.0-9]+[, ][-+.0-9]( [-+.0-9]+[, ][-+.0-9])+$',
value.strip()):
return self.log(InvalidCoordList({'value':self.value}))
@@ -886,7 +884,7 @@ def validate(self):
if (self.lNS, 'type') in self.attrs.getNames():
self.value=self.attrs.getValue((self.lNS, 'type'))
MimeType.validate(self)
- return rdfResourceURI.validate(self)
+ return rdfResourceURI.validate(self)
class l_link(rdfResourceURI, MimeType):
lNS = u'http://purl.org/rss/1.0/modules/link/'
@@ -1112,14 +1110,14 @@ def validate(self):
class g_locationType(text):
def validate(self):
- if len(self.value.split(',')) not in [2,3]:
+ if len(self.value.split(',')) not in [2,3]:
self.log(InvalidLocation({"parent":self.parent.name, "element":self.name,
"attr": ':'.join(self.name.split('_',1)), "value":self.value}))
class g_full_locationType(text):
def validate(self):
fields = self.value.split(',')
- if len(fields) != 5 or 0 in [len(f.strip()) for f in fields]:
+ if len(fields) != 5 or 0 in [len(f.strip()) for f in fields]:
self.log(InvalidFullLocation({"parent":self.parent.name, "element":self.name,
"attr": ':'.join(self.name.split('_',1)), "value":self.value}))
@@ -1236,7 +1234,7 @@ def validate(self):
class Questionable(extension_everywhere):
children = []
-
+
def do_atom_author(self):
from author import author
return author()
12 src/feedvalidator/feed.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -20,7 +18,7 @@ def getExpectedAttrNames(self):
def prevalidate(self):
self.links = []
self.validate_optional_attribute((u'urn:atom-extension:indexing', u'index'), yesno)
-
+
def missingElement(self, params):
offset = [self.line - self.dispatcher.locator.getLineNumber(),
self.col - self.dispatcher.locator.getColumnNumber()]
@@ -89,7 +87,7 @@ def validate_metadata(self):
# archives should have current links
if not current and ('fh_complete' not in self.children):
self.log(MissingCurrentInArchive({}))
-
+
if self.itunes: itunes_channel.validate(self)
def metadata(self):
@@ -107,7 +105,7 @@ def validate(self):
self.dispatcher.loggedEvents = [event
for event in self.dispatcher.loggedEvents
if not isinstance(event,DuplicateEntries)]
-
+
if not 'entry' in self.children:
self.validate_metadata()
@@ -153,12 +151,12 @@ def do_title(self):
self.metadata()
from content import textConstruct
return textConstruct(), noduplicates()
-
+
def do_subtitle(self):
self.metadata()
from content import textConstruct
return textConstruct(), noduplicates()
-
+
def do_rights(self):
self.metadata()
from content import textConstruct
2  src/feedvalidator/formatter/__init__.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
6 src/feedvalidator/formatter/application_test.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -9,7 +7,7 @@
from base import BaseFormatter
import feedvalidator
import os
-LANGUAGE = os.environ.get('LANGUAGE', 'en').split(':')[1]
+LANGUAGE = os.environ.get('LANGUAGE', 'en_US:en').split(':')[1]
lang = __import__('feedvalidator.i18n.%s' % LANGUAGE, globals(), locals(), LANGUAGE)
class Formatter(BaseFormatter):
@@ -21,7 +19,7 @@ def getMessage(self, event):
classes = classes + list(classes[0].__bases__)
del classes[0]
return None
-
+
def format(self, event):
"""returns the formatted representation of a single event"""
return self.getMessage(event)
12 src/feedvalidator/formatter/base.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -8,7 +6,7 @@
from UserList import UserList
import os
-LANGUAGE = os.environ.get('LANGUAGE', 'en').split(':')[-1]
+LANGUAGE = os.environ.get('LANGUAGE', 'en_US:en').split(':')[-1]
lang = __import__('feedvalidator.i18n.%s' % LANGUAGE, globals(), locals(), LANGUAGE)
from feedvalidator.logging import Info, Warning, Error
@@ -36,13 +34,13 @@ def getLineAndColumn(self, event):
if not line: return ''
column = self.getColumn(event)
return '%s, %s:' % (line, column)
-
+
def getCount(self, event):
if not event.params.has_key('msgcount'): return ''
count = int(event.params['msgcount'])
if count <= 1: return ''
return lang.occurances % event.params
-
+
def getMessageClass(self, event):
classes = [event.__class__]
while len(classes):
@@ -51,7 +49,7 @@ def getMessageClass(self, event):
classes = classes + list(classes[0].__bases__)
del classes[0]
return "Undefined message: %s[%s]" % (event.__class__, event.params)
-
+
def getMessage(self, event):
classes = [event.__class__]
while len(classes):
@@ -63,7 +61,7 @@ def getMessage(self, event):
classes = classes + list(classes[0].__bases__)
del classes[0]
return "Undefined message: %s[%s]" % (event.__class__, event.params)
-
+
def format(self, event):
"""returns the formatted representation of a single event"""
return `event`
10 src/feedvalidator/formatter/text_html.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -31,11 +29,11 @@ def escapeAndMark(x):
class Formatter(BaseFormatter):
FRAGMENTLEN = 80
-
+
def __init__(self, events, rawdata):
BaseFormatter.__init__(self, events)
self.rawdata = rawdata
-
+
def getRootClass(self, aClass):
base = aClass.__bases__[0]
if base == Message: return aClass
@@ -51,7 +49,7 @@ def getHelpURL(self, event):
# messageClass = self.getMessageClass(event).__name__.split('.')[-1]
messageClass = event.__class__.__name__.split('.')[-1]
return DOCSURL + '/' + rootClass + '/' + messageClass
-
+
def mostSeriousClass(self):
ms=0
for event in self.data:
@@ -61,7 +59,7 @@ def mostSeriousClass(self):
if isinstance(event,Error): level = 3
ms = max(ms, level)
return [None, Info, Warning, Error][ms]
-
+
def header(self):
return '<ul>'
2  src/feedvalidator/formatter/text_plain.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
4 src/feedvalidator/formatter/text_xml.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -39,7 +37,7 @@ def format(self, event):
if key in order:
order.remove(key)
order.insert(0,key)
-
+
# output the elements
result = "<%s>\n" % level
for key in order:
2  src/feedvalidator/generator.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
2  src/feedvalidator/i18n/__init__.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
3  src/feedvalidator/i18n/en.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -256,6 +254,7 @@
UseZeroForUnknown: "Use zero for unknown length",
UnknownHost: "Unknown host",
UnknownNamespace: "Use of unknown namespace: %(namespace)s",
+ UnsupportedNamespace: "Unable to validate namespace: %(namespace)s. See the %(name)s specification at %(specification)s",
IntegerOverflow: "%(element)s value too large",
InvalidNSS: "Invalid Namespace Specific String: %(element)s",
SinceAfterUntil: "Since After until",
8 src/feedvalidator/image.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -47,7 +45,7 @@ def do_height(self):
def do_description(self):
return nonhtml(), noduplicates()
-
+
def do_dc_creator(self):
return text()
@@ -66,7 +64,7 @@ def validate(self):
if hasattr(self.parent.parent, 'link') and \
self.parent.parent.link and self.parent.parent.link != self.value:
self.log(ImageLinkDoesntMatch({"parent":self.parent.name, "element":self.name}))
-
+
class url(rfc2396_full):
def validate(self):
rfc2396_full.validate(self)
@@ -74,7 +72,7 @@ def validate(self):
ext = self.value.split('.')[-1].lower()
if re.match("^\w+$", ext) and ext not in ['jpg','jpeg','gif','png']:
self.log(ImageUrlFormat({"parent":self.parent.name, "element":self.name}))
-
+
class title(nonhtml, noduplicates):
def validate(self):
if not self.value.strip():
4 src/feedvalidator/iso639codes.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -193,7 +191,7 @@
'zu': 'Zulu',
'x' : 'a user-defined language',
'xx': 'a user-defined language',
-
+
'abk': 'Abkhazian',
'ace': 'Achinese',
'ach': 'Acoli',
12 src/feedvalidator/item.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -28,10 +26,10 @@ def validate(self):
self.log(SlashDate({}))
if self.itunes: itunes_item.validate(self)
-
+
def do_link(self):
return rfc2396_full(), noduplicates()
-
+
def do_title(self):
return nonhtml(), nonblank(), noduplicates()
@@ -90,7 +88,7 @@ def do_atom_published(self):
if "published" in self.children:
self.log(DuplicateItemSemantics({"core":"pubDate", "ext":"atom:published"}))
return rfc3339(), noduplicates()
-
+
def do_atom_updated(self):
return rfc3339(), noduplicates()
@@ -125,7 +123,7 @@ def do_comments(self):
def do_enclosure(self):
return enclosure(), noduplicates(DuplicateEnclosure)
-
+
def do_pubDate(self):
if "dc_date" in self.children:
self.log(DuplicateItemSemantics({"core":"pubDate", "ext":"dc:date"}))
@@ -247,7 +245,7 @@ def prevalidate(self):
self.validate_required_attribute((None,'url'), httpURL)
if self.attrs.has_key((None,u"url")):
- if hasattr(self.parent,'setEnclosure'):
+ if hasattr(self.parent,'setEnclosure'):
self.parent.setEnclosure(self.attrs.getValue((None, 'url')))
return validatorBase.prevalidate(self)
24 src/feedvalidator/itunes.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -48,7 +46,7 @@ def setItunes(self, value):
self.log(NotUTF8({"parent":self.parent.name, "element":self.name}))
if self.getFeedType() == TYPE_ATOM and 'entry' in self.children:
self.validate()
-
+
self.itunes |= value
def do_itunes_owner(self):
@@ -72,7 +70,7 @@ def validate(self):
pass
def setItunes(self, value):
- if value and not self.itunes:
+ if value and not self.itunes:
self.parent.setItunes(True)
self.itunes = value
if hasattr(self, 'enclosures'):
@@ -87,7 +85,7 @@ def setEnclosure(self, url):
if ext not in itunes_item.supported_formats:
from logging import UnsupportedItunesFormat
self.log(UnsupportedItunesFormat({"parent":self.parent.name, "element":self.name, "extension":ext}))
-
+
if not hasattr(self, 'enclosures'): self.enclosures = []
self.enclosures.append(url)
@@ -97,7 +95,7 @@ def do_itunes_duration(self):
class owner(validatorBase):
def validate(self):
if not "itunes_email" in self.children:
- self.log(MissingElement({"parent":self.name.replace("_",":"),
+ self.log(MissingElement({"parent":self.name.replace("_",":"),
"element":"itunes:email"}))
def do_itunes_email(self):
@@ -121,16 +119,16 @@ def prevalidate(self):
self.text=self.attrs.getValue((None, "text"))
if not self.text in self.newlist:
if self.text in self.oldlist:
- self.log(ObsoleteItunesCategory({"parent":self.parent.name.replace("_",":"),
- "element":self.name.replace("_",":"),
+ self.log(ObsoleteItunesCategory({"parent":self.parent.name.replace("_",":"),
+ "element":self.name.replace("_",":"),
"text":self.text}))
else:
- self.log(InvalidItunesCategory({"parent":self.parent.name.replace("_",":"),
- "element":self.name.replace("_",":"),
+ self.log(InvalidItunesCategory({"parent":self.parent.name.replace("_",":"),
+ "element":self.name.replace("_",":"),
"text":self.text}))
except KeyError:
- self.log(MissingAttribute({"parent":self.parent.name.replace("_",":"),
- "element":self.name.replace("_",":"),
+ self.log(MissingAttribute({"parent":self.parent.name.replace("_",":"),
+ "element":self.name.replace("_",":"),
"attr":"text"}))
class image(validatorBase):
@@ -149,7 +147,7 @@ def do_itunes_category(self):
if not self.text: return eater()
return subcategory(valid_itunes_categories.get(self.text,[]),
old_itunes_categories.get(self.text,[]))
-
+
valid_itunes_categories = {
"Arts": [
"Design",
2  src/feedvalidator/kml.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Gregor J. Rothfuss <http://greg.abstrakt.ch/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
8 src/feedvalidator/link.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -54,7 +52,7 @@ def getExpectedAttrNames(self):
(u'http://purl.org/syndication/thread/1.0', u'count'),
(u'http://purl.org/syndication/thread/1.0', u'when'),
(u'http://purl.org/syndication/thread/1.0', u'updated')]
-
+
def validate(self):
self.type = ""
self.rel = "alternate"
@@ -65,10 +63,10 @@ def validate(self):
if self.attrs.has_key((None, "rel")):
self.value = self.rel = self.attrs.getValue((None, "rel"))
- if self.rel.startswith('http://www.iana.org/assignments/relation/'):
+ if self.rel.startswith('http://www.iana.org/assignments/relation/'):
self.rel=self.rel[len('http://www.iana.org/assignments/relation/'):]
- if self.rel in self.validRelations:
+ if self.rel in self.validRelations:
self.log(ValidAtomLinkRel({"parent":self.parent.name, "element":self.name, "attr":"rel", "value":self.rel}))
elif rfc2396_full.rfc2396_re.match(self.rel.encode('idna')):
self.log(ValidAtomLinkRel({"parent":self.parent.name, "element":self.name, "attr":"rel", "value":self.rel}))
21 src/feedvalidator/logging.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -210,12 +208,12 @@ class ContainsEmail(Warning): pass
class ContainsHTML(Warning): pass
class ContainsUndeclaredHTML(ContainsHTML): pass
-class MissingSelf(Warning): pass
-class SelfDoesntMatchLocation(Warning): pass
-class RelativeSelf(Warning): pass
+class MissingSelf(Warning): pass
+class SelfDoesntMatchLocation(Warning): pass
+class RelativeSelf(Warning): pass
-class MissingSourceElement(Warning): pass
-class MissingTypeAttr(Warning): pass
+class MissingSourceElement(Warning): pass
+class MissingTypeAttr(Warning): pass
class DuplicateIds(Error): pass
class DuplicateEntries(Warning): pass
@@ -248,6 +246,7 @@ class MisplacedXHTMLContent(Warning): pass
class SchemeNotIANARegistered(Warning): pass
class AvoidNamespacePrefix(Warning): pass
class UnknownNamespace(Warning): pass
+class UnsupportedNamespace(Warning): pass
class MissingRecommendedAttribute(Warning): pass
@@ -273,10 +272,10 @@ class Uncompressed(Info): pass
class ObsoleteVersion(Warning): pass
class ObsoleteNamespace(Error): pass
-class ConflictingCatAttr(Error): pass
-class ConflictingCatChildren(Error): pass
-class InvalidMediaRange(Error): pass
-class UndefinedParam(Warning): pass
+class ConflictingCatAttr(Error): pass
+class ConflictingCatChildren(Error): pass
+class InvalidMediaRange(Error): pass
+class UndefinedParam(Warning): pass
class InvalidURI(InvalidValue) : pass
class InvalidURN(InvalidValue): pass
5 src/feedvalidator/mediaTypes.py
View
@@ -1,5 +1,4 @@
"""
-$Id$
This module deals with valid internet media types for feeds.
"""
@@ -14,7 +13,7 @@
'text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml',
'application/atom+xml', 'text/x-opml', 'application/xrds+xml',
'application/opensearchdescription+xml', 'application/vnd.google-earth.kml+xml', 'application/vnd.google-earth.kmz',
- 'application/atomsvc+xml', 'application/atomcat+xml',
+ 'application/atomsvc+xml', 'application/atomcat+xml',
]
# Is the Content-Type correct?
@@ -82,7 +81,7 @@ def contentSniffing(mediaType, rawdata, loggedEvents):
if block.find('<opml') >= 0: return
if block.find('<kml') >= 0: return
if block.find('<OpenSearchDescription') >= 0: return
- if (block.find('<rdf:RDF') >=0 and
+ if (block.find('<rdf:RDF') >=0 and
block.find('http://www.w3.org/1999/02/22-rdf-syntax-ns#') >= 0 and
block.find( 'http://purl.org/rss/1.0/')): return
12 src/feedvalidator/opml.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -24,7 +22,7 @@ def validate(self):
self.log(InvalidOPMLVersion({"parent":self.parent.name, "element":self.name, "value":self.attrs[(None,'version')]}))
elif self.name != 'outlineDocument':
self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"version"}))
-
+
if 'head' not in self.children:
self.log(MissingElement({"parent":self.name, "element":"head"}))
@@ -104,9 +102,9 @@ def getExpectedAttrNames(self):
(None, u'isBreakpoint'),
(None, u'isComment'),
(None, u'language'),
- (None, u'text'),
+ (None, u'text'),
(None, u'title'),
- (None, u'type'),
+ (None, u'type'),
(None, u'url'),
(None, u'version'),
(None, u'xmlUrl'),
@@ -142,7 +140,7 @@ def validate(self):
if (None,'version') in self.attrs.getNames():
if self.attrs[(None,'version')] not in opmlOutline.versionList:
self.log(InvalidOutlineVersion({"parent":self.parent.name, "element":self.name, "value":self.attrs[(None,'version')]}))
-
+
if len(self.attrs)>1 and not (None,u'type') in self.attrs.getNames():
for name in u'description htmlUrl language title version xmlUrl'.split():
if (None, name) in self.attrs.getNames():
@@ -164,6 +162,6 @@ def characters(self, string):
if string.strip():
self.log(UnexpectedText({"element":self.name,"parent":self.parent.name}))
self.value = string
-
+
def do_outline(self):
return opmlOutline()
8 src/feedvalidator/rdf.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -34,7 +32,7 @@ def _withAbout(self,v):
return v
else:
return v, rdfAbout()
-
+
def do_item(self):
from item import rss10Item
return self._withAbout(rss10Item())
@@ -45,7 +43,7 @@ def do_textinput(self):
def do_image(self):
return self._withAbout(rss10Image())
-
+
def do_cc_License(self):
return eater()
@@ -57,7 +55,7 @@ def do_rdf_Description(self):
def prevalidate(self):
self.setFeedType(TYPE_RSS1)
-
+
def validate(self):
if not "channel" in self.children and not "rss090_channel" in self.children:
self.log(MissingElement({"parent":self.name.replace('_',':'), "element":"channel"}))
6 src/feedvalidator/root.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -18,7 +16,7 @@
kml22_namespace='http://www.opengis.net/kml/2.2'
#
-# Main document.
+# Main document.
# Supports rss, rdf, pie, kml, and ffkar
#
class root(validatorBase):
@@ -190,7 +188,7 @@ def do_opensearch_OpenSearchDescription(self):
def do_xrds_XRDS(self):
from xrd import xrds
return xrds()
-
+
def do_rdf_RDF(self):
from rdf import rdf
self.dispatcher.defaultNamespaces.append(purl1_namespace)
6 src/feedvalidator/rss.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -36,8 +34,8 @@ def prevalidate(self):
self.version = self.attrs[(None,'version')]
if self.version not in ['0.91', '0.92', '2.0']:
self.log(InvalidRSSVersion({"parent":self.parent.name, "element":self.name, "value":self.version}))
-
-
+
+
def validate(self):
if not "channel" in self.children:
self.log(MissingElement({"parent":self.name, "element":"channel"}))
4 src/feedvalidator/skipDays.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -12,7 +10,7 @@
# skipDays element
#
class skipDays(validatorBase):
-
+
def __init__(self):
self.days = []
validatorBase.__init__(self)
4 src/feedvalidator/skipHours.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
@@ -12,7 +10,7 @@
# skipHours element
#
class skipHours(validatorBase):
-
+
def __init__(self):
self.hours = []
validatorBase.__init__(self)
6 src/feedvalidator/sse.py
View
@@ -5,7 +5,7 @@
class Sharing(validatorBase):
def getExpectedAttrNames(self):
- return [ (None, u'expires'), (None, u'since'), (None, u'until') ]
+ return [ (None, u'expires'), (None, u'since'), (None, u'until') ]
def prevalidate(self):
if self.attrs.has_key((None,'until')):
@@ -89,9 +89,9 @@ def validate(self):
self.log(InvalidNSS({"element":self.name,"parent":self.parent.name}))
class Conflicts(validatorBase):
- def do_entry(self):
+ def do_entry(self):
from entry import entry
return entry()
- def do_item(self):
+ def do_item(self):
from item import item
return item()
2  src/feedvalidator/textInput.py
View
@@ -1,5 +1,3 @@
-"""$Id$"""
-
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision$"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
34 src/feedvalidator/timeoutsocket.py
View
@@ -1,10 +1,8 @@
-#$Id
-
####
# Copyright 2000,2001 by Timothy O'Malley <timo@alum.mit.edu>
-#
+#
# All Rights Reserved
-#
+#
# Permission to use, copy, modify, and distribute this software
# and its documentation for any purpose and without fee is hereby
# granted, provided that the above copyright notice appear in all
@@ -12,8 +10,8 @@
# notice appear in supporting documentation, and that the name of
# Timothy O'Malley not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
-# prior permission.
-#
+# prior permission.
+#
# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
@@ -21,7 +19,7 @@
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
-# PERFORMANCE OF THIS SOFTWARE.
+# PERFORMANCE OF THIS SOFTWARE.
#
####
@@ -43,7 +41,7 @@
The timeout applies to the socket functions that normally block on
-execution: read, write, connect, and accept. If any of these
+execution: read, write, connect, and accept. If any of these
operations exceeds the specified timeout, the exception Timeout
will be raised.
@@ -58,7 +56,7 @@
The TimeoutSocket class defines a socket-like object that attempts to
avoid the condition where a socket may block indefinitely. The
TimeoutSocket class raises a Timeout exception whenever the
-current operation delays too long.
+current operation delays too long.
The TimeoutFile class defines a file-like object that uses the TimeoutSocket
class. When the makefile() method of TimeoutSocket is called, it returns
@@ -163,7 +161,7 @@ class TimeoutSocket:
_copies = 0
_blocking = 1
-
+
def __init__(self, sock, timeout):
self._sock = sock
self._timeout = timeout
@@ -194,7 +192,7 @@ def connect_ex(self, addr):
errcode = why[0]
return errcode
# end connect_ex
-
+
def connect(self, addr, port=None, dumbhack=None):
# In case we were called as connect(host, port)
if port != None: addr = (addr, port)
@@ -213,11 +211,11 @@ def connect(self, addr, port=None, dumbhack=None):
except Error, why:
# Set the socket's blocking mode back
sock.setblocking(blocking)
-
+
# If we are not blocking, re-raise
if not blocking:
raise
-
+
# If we are already connected, then return success.
# If we got a genuine error, re-raise it.
errcode = why[0]
@@ -225,7 +223,7 @@ def connect(self, addr, port=None, dumbhack=None):
return
elif errcode not in _ConnectBusy:
raise
-
+
# Now, wait for the connect to happen
# ONLY if dumbhack indicates this is pass number one.
# If select raises an error, we pass it on.
@@ -263,12 +261,12 @@ def accept(self, dumbhack=None):
# If we are not supposed to block, then re-raise
if not blocking:
raise
-
+
# If we got a genuine error, re-raise it.
errcode = why[0]
if errcode not in _AcceptBusy:
raise
-
+
# Now, wait for the accept to happen
# ONLY if dumbhack indicates this is pass number one.
# If select raises an error, we pass it on.