Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

added a hardcopy of the feedvalidator from https://github.com/rubys/f…

  • Loading branch information...
commit ba7f78375ad6643bca4f2281971d30e0c1259dc5 1 parent c36a173
@tgpfeiffer authored
Showing with 10,490 additions and 0 deletions.
  1. +345 −0 feedvalidator/__init__.py
  2. +52 −0 feedvalidator/author.py
  3. +569 −0 feedvalidator/base.py
  4. +24 −0 feedvalidator/categories.py
  5. +22 −0 feedvalidator/category.py
  6. +20 −0 feedvalidator/cf.py
  7. +349 −0 feedvalidator/channel.py
  8. +40 −0 feedvalidator/compatibility.py
  9. +181 −0 feedvalidator/content.py
  10. +134 −0 feedvalidator/entry.py
  11. +1,305 −0 feedvalidator/extension.py
  12. +179 −0 feedvalidator/feed.py
  13. +7 −0 feedvalidator/formatter/__init__.py
  14. +27 −0 feedvalidator/formatter/application_test.py
  15. +69 −0 feedvalidator/formatter/base.py
  16. +112 −0 feedvalidator/formatter/text_html.py
  17. +15 −0 feedvalidator/formatter/text_plain.py
  18. +55 −0 feedvalidator/formatter/text_xml.py
  19. +24 −0 feedvalidator/generator.py
  20. +5 −0 feedvalidator/i18n/__init__.py
  21. +266 −0 feedvalidator/i18n/en.py
  22. +110 −0 feedvalidator/image.py
  23. +726 −0 feedvalidator/iso639codes.py
  24. +305 −0 feedvalidator/item.py
  25. +310 −0 feedvalidator/itunes.py
  26. +1,085 −0 feedvalidator/kml.py
  27. +172 −0 feedvalidator/link.py
  28. +427 −0 feedvalidator/logging.py
  29. +356 −0 feedvalidator/media.py
  30. +90 −0 feedvalidator/mediaTypes.py
  31. +148 −0 feedvalidator/opensearch.py
  32. +169 −0 feedvalidator/opml.py
  33. +162 −0 feedvalidator/rdf.py
  34. +217 −0 feedvalidator/root.py
  35. +43 −0 feedvalidator/rss.py
  36. +55 −0 feedvalidator/service.py
  37. +37 −0 feedvalidator/skipDays.py
  38. +44 −0 feedvalidator/skipHours.py
  39. +97 −0 feedvalidator/sse.py
  40. +46 −0 feedvalidator/textInput.py
  41. +425 −0 feedvalidator/timeoutsocket.py
  42. +238 −0 feedvalidator/uri.py
  43. +1,113 −0 feedvalidator/validators.py
  44. +288 −0 feedvalidator/xmlEncoding.py
  45. +27 −0 feedvalidator/xrd.py
View
345 feedvalidator/__init__.py
@@ -0,0 +1,345 @@
+"""$Id$"""
+
+__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
+__version__ = "$Revision$"
+__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
+
+import socket
+if hasattr(socket, 'setdefaulttimeout'):
+ socket.setdefaulttimeout(10)
+ Timeout = socket.timeout
+else:
+ import timeoutsocket
+ timeoutsocket.setDefaultSocketTimeout(10)
+ Timeout = timeoutsocket.Timeout
+
+import urllib2
+import logging
+from logging import *
+from xml.sax import SAXException
+from xml.sax.xmlreader import InputSource
+import re
+import xmlEncoding
+import mediaTypes
+from httplib import BadStatusLine
+
+MAXDATALENGTH = 2000000
+
+def sniffPossibleFeed(rawdata):
+ """ Use wild heuristics to detect something that might be intended as a feed."""
+ if rawdata.lower().startswith('<!DOCTYPE html'):
+ return False
+
+ rawdata=re.sub('<!--.*?-->','',rawdata)
+ firstPart = rawdata[:512]
+ for tag in ['<rss', '<feed', '<rdf:RDF', '<kml']:
+ if tag in firstPart:
+ return True
+
+ lastline = rawdata.strip().split('\n')[-1].strip()
+ return lastline in ['</rss>','</feed>','</rdf:RDF>', '</kml>']
+
+def _validate(aString, firstOccurrenceOnly, loggedEvents, base, encoding, selfURIs=None, mediaType=None):
+ """validate RSS from string, returns validator object"""
+ from xml.sax import make_parser, handler
+ from base import SAXDispatcher
+ from exceptions import UnicodeError
+ from cStringIO import StringIO
+
+ if re.match("^\s+<\?xml",aString) and re.search("<generator.*wordpress.*</generator>",aString):
+ lt = aString.find('<'); gt = aString.find('>')
+ if lt > 0 and gt > 0 and lt < gt:
+ loggedEvents.append(logging.WPBlankLine({'line':1,'column':1}))
+ # rearrange so that other errors can be found
+ aString = aString[lt:gt+1]+aString[0:lt]+aString[gt+1:]
+
+ # By now, aString should be Unicode
+ source = InputSource()
+ source.setByteStream(StringIO(xmlEncoding.asUTF8(aString)))
+
+ validator = SAXDispatcher(base, selfURIs or [base], encoding)
+ validator.setFirstOccurrenceOnly(firstOccurrenceOnly)
+
+ if mediaType == 'application/atomsvc+xml':
+ validator.setFeedType(TYPE_APP_SERVICE)
+ elif mediaType == 'application/atomcat+xml':
+ validator.setFeedType(TYPE_APP_CATEGORIES)
+
+ validator.loggedEvents += loggedEvents
+
+ # experimental RSS-Profile support
+ validator.rssCharData = [s.find('&#x')>=0 for s in aString.split('\n')]
+
+ xmlver = re.match("^<\?\s*xml\s+version\s*=\s*['\"]([-a-zA-Z0-9_.:]*)['\"]",aString)
+ if xmlver and xmlver.group(1)<>'1.0':
+ validator.log(logging.BadXmlVersion({"version":xmlver.group(1)}))
+
+ try:
+ from xml.sax.expatreader import ExpatParser
+ class fake_dtd_parser(ExpatParser):
+ def reset(self):
+ ExpatParser.reset(self)
+ self._parser.UseForeignDTD(1)
+ parser = fake_dtd_parser()
+ except:
+ parser = make_parser()
+
+ parser.setFeature(handler.feature_namespaces, 1)
+ parser.setContentHandler(validator)
+ parser.setErrorHandler(validator)
+ parser.setEntityResolver(validator)
+ if hasattr(parser, '_ns_stack'):
+ # work around bug in built-in SAX parser (doesn't recognize xml: namespace)
+ # PyXML doesn't have this problem, and it doesn't have _ns_stack either
+ parser._ns_stack.append({'http://www.w3.org/XML/1998/namespace':'xml'})
+
+ def xmlvalidate(log):
+ import libxml2
+ from StringIO import StringIO
+ from random import random
+
+ prefix="...%s..." % str(random()).replace('0.','')
+ msg=[]
+ libxml2.registerErrorHandler(lambda msg,str: msg.append(str), msg)
+
+ input = libxml2.inputBuffer(StringIO(xmlEncoding.asUTF8(aString)))
+ reader = input.newTextReader(prefix)
+ reader.SetParserProp(libxml2.PARSER_VALIDATE, 1)
+ ret = reader.Read()
+ while ret == 1: ret = reader.Read()
+
+ msg=''.join(msg)
+ for line in msg.splitlines():
+ if line.startswith(prefix): log(line.split(':',4)[-1].strip())
+ validator.xmlvalidator=xmlvalidate
+
+ try:
+ parser.parse(source)
+ except SAXException:
+ pass
+ except UnicodeError:
+ import sys
+ exctype, value = sys.exc_info()[:2]
+ validator.log(logging.UnicodeError({"exception":value}))
+
+ if validator.getFeedType() == TYPE_RSS1:
+ try:
+ from rdflib.syntax.parsers.RDFXMLHandler import RDFXMLHandler
+
+ class Handler(RDFXMLHandler):
+ ns_prefix_map = {}
+ prefix_ns_map = {}
+ def add(self, triple): pass
+ def __init__(self, dispatcher):
+ RDFXMLHandler.__init__(self, self)
+ self.dispatcher=dispatcher
+ def error(self, message):
+ self.dispatcher.log(InvalidRDF({"message": message}))
+
+ source.getByteStream().reset()
+ parser.reset()
+ parser.setContentHandler(Handler(parser.getContentHandler()))
+ parser.setErrorHandler(handler.ErrorHandler())
+ parser.parse(source)
+ except:
+ pass
+
+ return validator
+
+def validateStream(aFile, firstOccurrenceOnly=0, contentType=None, base=""):
+ loggedEvents = []
+
+ if contentType:
+ (mediaType, charset) = mediaTypes.checkValid(contentType, loggedEvents)
+ else:
+ (mediaType, charset) = (None, None)
+
+ rawdata = aFile.read(MAXDATALENGTH)
+ if aFile.read(1):
+ raise ValidationFailure(logging.ValidatorLimit({'limit': 'feed length > ' + str(MAXDATALENGTH) + ' bytes'}))
+
+ encoding, rawdata = xmlEncoding.decode(mediaType, charset, rawdata, loggedEvents, fallback='utf-8')
+
+
+ validator = _validate(rawdata, firstOccurrenceOnly, loggedEvents, base, encoding, mediaType=mediaType)
+
+ if mediaType and validator.feedType:
+ mediaTypes.checkAgainstFeedType(mediaType, validator.feedType, validator.loggedEvents)
+
+ return {"feedType":validator.feedType, "loggedEvents":validator.loggedEvents}
+
+def validateString(aString, firstOccurrenceOnly=0, fallback=None, base=""):
+ loggedEvents = []
+ if type(aString) != unicode:
+ encoding, aString = xmlEncoding.decode("", None, aString, loggedEvents, fallback)
+ else:
+ encoding = "utf-8" # setting a sane (?) default
+
+ if aString is not None:
+ validator = _validate(aString, firstOccurrenceOnly, loggedEvents, base, encoding)
+ return {"feedType":validator.feedType, "loggedEvents":validator.loggedEvents}
+ else:
+ return {"loggedEvents": loggedEvents}
+
+def validateURL(url, firstOccurrenceOnly=1, wantRawData=0):
+ """validate RSS from URL, returns events list, or (events, rawdata) tuple"""
+ loggedEvents = []
+ request = urllib2.Request(url)
+ request.add_header("Accept-encoding", "gzip, deflate")
+ request.add_header("User-Agent", "FeedValidator/1.3")
+ usock = None
+ try:
+ try:
+ usock = urllib2.urlopen(request)
+ rawdata = usock.read(MAXDATALENGTH)
+ if usock.read(1):
+ raise ValidationFailure(logging.ValidatorLimit({'limit': 'feed length > ' + str(MAXDATALENGTH) + ' bytes'}))
+
+ # check for temporary redirects
+ if usock.geturl()<>request.get_full_url():
+ from httplib import HTTPConnection
+ spliturl=url.split('/',3)
+ if spliturl[0]=="http:":
+ conn=HTTPConnection(spliturl[2])
+ conn.request("GET",'/'+spliturl[3].split("#",1)[0])
+ resp=conn.getresponse()
+ if resp.status<>301:
+ loggedEvents.append(TempRedirect({}))
+
+ except BadStatusLine, status:
+ raise ValidationFailure(logging.HttpError({'status': status.__class__}))
+
+ except urllib2.HTTPError, status:
+ rawdata = status.read()
+ if len(rawdata) < 512 or 'content-encoding' in status.headers:
+ loggedEvents.append(logging.HttpError({'status': status}))
+ usock = status
+ else:
+ rawdata=re.sub('<!--.*?-->','',rawdata)
+ lastline = rawdata.strip().split('\n')[-1].strip()
+ if sniffPossibleFeed(rawdata):
+ loggedEvents.append(logging.HttpError({'status': status}))
+ loggedEvents.append(logging.HttpErrorWithPossibleFeed({}))
+ usock = status
+ else:
+ raise ValidationFailure(logging.HttpError({'status': status}))
+ except urllib2.URLError, x:
+ raise ValidationFailure(logging.HttpError({'status': x.reason}))
+ except Timeout, x:
+ raise ValidationFailure(logging.IOError({"message": 'Server timed out', "exception":x}))
+ except Exception, x:
+ raise ValidationFailure(logging.IOError({"message": x.__class__.__name__,
+ "exception":x}))
+
+ if usock.headers.get('content-encoding', None) == None:
+ loggedEvents.append(Uncompressed({}))
+
+ if usock.headers.get('content-encoding', None) == 'gzip':
+ import gzip, StringIO
+ try:
+ rawdata = gzip.GzipFile(fileobj=StringIO.StringIO(rawdata)).read()
+ except:
+ import sys
+ exctype, value = sys.exc_info()[:2]
+ event=logging.IOError({"message": 'Server response declares Content-Encoding: gzip', "exception":value})
+ raise ValidationFailure(event)
+
+ if usock.headers.get('content-encoding', None) == 'deflate':
+ import zlib
+ try:
+ rawdata = zlib.decompress(rawdata, -zlib.MAX_WBITS)
+ except:
+ import sys
+ exctype, value = sys.exc_info()[:2]
+ event=logging.IOError({"message": 'Server response declares Content-Encoding: deflate', "exception":value})
+ raise ValidationFailure(event)
+
+ if usock.headers.get('content-type', None) == 'application/vnd.google-earth.kmz':
+ import tempfile, zipfile, os
+ try:
+ (fd, tempname) = tempfile.mkstemp()
+ os.write(fd, rawdata)
+ os.close(fd)
+ zfd = zipfile.ZipFile(tempname)
+ namelist = zfd.namelist()
+ for name in namelist:
+ if name.endswith('.kml'):
+ rawdata = zfd.read(name)
+ zfd.close()
+ os.unlink(tempname)
+ except:
+ import sys
+ value = sys.exc_info()[:1]
+ event=logging.IOError({"message": 'Problem decoding KMZ', "exception":value})
+ raise ValidationFailure(event)
+
+ mediaType = None
+ charset = None
+
+ # Is the Content-Type correct?
+ contentType = usock.headers.get('content-type', None)
+ if contentType:
+ (mediaType, charset) = mediaTypes.checkValid(contentType, loggedEvents)
+
+ # Check for malformed HTTP headers
+ for (h, v) in usock.headers.items():
+ if (h.find(' ') >= 0):
+ loggedEvents.append(HttpProtocolError({'header': h}))
+
+ selfURIs = [request.get_full_url()]
+ baseURI = usock.geturl()
+ if not baseURI in selfURIs: selfURIs.append(baseURI)
+
+ # Get baseURI from content-location and/or redirect information
+ if usock.headers.get('content-location', None):
+ from urlparse import urljoin
+ baseURI=urljoin(baseURI,usock.headers.get('content-location', ""))
+ elif usock.headers.get('location', None):
+ from urlparse import urljoin
+ baseURI=urljoin(baseURI,usock.headers.get('location', ""))
+
+ if not baseURI in selfURIs: selfURIs.append(baseURI)
+ usock.close()
+ usock = None
+
+ mediaTypes.contentSniffing(mediaType, rawdata, loggedEvents)
+
+ encoding, rawdata = xmlEncoding.decode(mediaType, charset, rawdata, loggedEvents, fallback='utf-8')
+
+ if rawdata is None:
+ return {'loggedEvents': loggedEvents}
+
+ rawdata = rawdata.replace('\r\n', '\n').replace('\r', '\n') # normalize EOL
+ validator = _validate(rawdata, firstOccurrenceOnly, loggedEvents, baseURI, encoding, selfURIs, mediaType=mediaType)
+
+ # Warn about mismatches between media type and feed version
+ if mediaType and validator.feedType:
+ mediaTypes.checkAgainstFeedType(mediaType, validator.feedType, validator.loggedEvents)
+
+ params = {"feedType":validator.feedType, "loggedEvents":validator.loggedEvents}
+ if wantRawData:
+ params['rawdata'] = rawdata
+ return params
+
+ finally:
+ try:
+ if usock: usock.close()
+ except:
+ pass
+
+__all__ = ['base',
+ 'channel',
+ 'compatibility',
+ 'image',
+ 'item',
+ 'logging',
+ 'rdf',
+ 'root',
+ 'rss',
+ 'skipHours',
+ 'sniffPossibleFeed',
+ 'textInput',
+ 'util',
+ 'validators',
+ 'validateURL',
+ 'validateString']
View
52 feedvalidator/author.py
@@ -0,0 +1,52 @@
+"""$Id$"""
+
+__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
+__version__ = "$Revision$"
+__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
+
+from base import validatorBase
+from validators import *
+
+#
+# author element.
+#
+class author(validatorBase):
+ def getExpectedAttrNames(self):
+ return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'parseType')]
+
+ def validate(self):
+ if not "name" in self.children and not "atom_name" in self.children:
+ self.log(MissingElement({"parent":self.name, "element":"name"}))
+
+ def do_name(self):
+ return nonhtml(), nonemail(), nonblank(), noduplicates()
+
+ def do_email(self):
+ return addr_spec(), noduplicates()
+
+ def do_uri(self):
+ return nonblank(), rfc3987(), nows(), noduplicates()
+
+ def do_foaf_workplaceHomepage(self):
+ return rdfResourceURI()
+
+ def do_foaf_homepage(self):
+ return rdfResourceURI()
+
+ def do_foaf_weblog(self):
+ return rdfResourceURI()
+
+ def do_foaf_plan(self):
+ return text()
+
+ def do_foaf_firstName(self):
+ return text()
+
+ def do_xhtml_div(self):
+ from content import diveater
+ return diveater()
+
+ # RSS/Atom support
+ do_atom_name = do_name
+ do_atom_email = do_email
+ do_atom_uri = do_uri
View
569 feedvalidator/base.py
@@ -0,0 +1,569 @@
+"""$Id$"""
+
+__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
+__version__ = "$Revision$"
+__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
+
+from xml.sax.handler import ContentHandler
+from xml.sax.xmlreader import Locator
+from logging import NonCanonicalURI, NotUTF8
+import re
+
+# references:
+# http://web.resource.org/rss/1.0/modules/standard.html
+# http://web.resource.org/rss/1.0/modules/proposed.html
+# http://dmoz.org/Reference/Libraries/Library_and_Information_Science/Technical_Services/Cataloguing/Metadata/RDF/Applications/RSS/Specifications/RSS1.0_Modules/
+namespaces = {
+ "http://www.bloglines.com/about/specs/fac-1.0": "access",
+ "http://webns.net/mvcb/": "admin",
+ "http://purl.org/rss/1.0/modules/aggregation/": "ag",
+ "http://purl.org/rss/1.0/modules/annotate/": "annotate",
+ "http://www.w3.org/2007/app": "app",
+ "http://media.tangent.org/rss/1.0/": "audio",
+ "http://backend.userland.com/blogChannelModule": "blogChannel",
+ "http://web.resource.org/cc/": "cc",
+ "http://www.microsoft.com/schemas/rss/core/2005": "cf",
+ "http://backend.userland.com/creativeCommonsRssModule": "creativeCommons",
+ "http://purl.org/rss/1.0/modules/company": "company",
+ "http://purl.org/rss/1.0/modules/content/": "content",
+ "http://conversationsnetwork.org/rssNamespace-1.0/": "conversationsNetwork",
+ "http://my.theinfo.org/changed/1.0/rss/": "cp",
+ "http://purl.org/dc/elements/1.1/": "dc",
+ "http://purl.org/dc/terms/": "dcterms",
+ "http://purl.org/rss/1.0/modules/email/": "email",
+ "http://purl.org/rss/1.0/modules/event/": "ev",
+ "http://purl.org/syndication/history/1.0": "fh",
+ "http://www.w3.org/2003/01/geo/wgs84_pos#": "geo",
+ "http://geourl.org/rss/module/": "geourl",
+ "http://www.georss.org/georss": "georss",
+ "http://www.opengis.net/gml": "gml",
+ "http://postneo.com/icbm": "icbm",
+ "http://purl.org/rss/1.0/modules/image/": "image",
+ "urn:atom-extension:indexing": "indexing",
+ "http://www.itunes.com/dtds/podcast-1.0.dtd": "itunes",
+ "http://rssnamespace.org/feedburner/ext/1.0": "feedburner",
+ "http://xmlns.com/foaf/0.1/": "foaf",
+ "http://purl.org/rss/1.0/modules/link/": "l",
+ "http://search.yahoo.com/mrss/": "media",
+ "http://www.w3.org/1998/Math/MathML": "mathml",
+ "http://a9.com/-/spec/opensearchrss/1.0/": "opensearch10",
+ "http://a9.com/-/spec/opensearch/1.1/": "opensearch",
+ "http://www.opml.org/spec2": "opml",
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
+ "http://www.w3.org/2000/01/rdf-schema#": "rdfs",
+ "http://purl.org/rss/1.0/modules/reference/": "ref",
+ "http://purl.org/rss/1.0/modules/richequiv/": "reqv",
+ "http://purl.org/rss/1.0/modules/rss091#": "rss091",
+ "http://purl.org/rss/1.0/modules/search/": "search",
+ "http://purl.org/rss/1.0/modules/slash/": "slash",
+ "http://purl.org/rss/1.0/modules/servicestatus/": "ss",
+ "http://hacks.benhammersley.com/rss/streaming/": "str",
+ "http://purl.org/rss/1.0/modules/subscription/": "sub",
+ "http://feedsync.org/2007/feedsync": "sx",
+ "http://www.w3.org/2000/svg": "svg",
+ "http://purl.org/rss/1.0/modules/syndication/": "sy",
+ "http://purl.org/rss/1.0/modules/taxonomy/": "taxo",
+ "http://purl.org/rss/1.0/modules/threading/": "thr",
+ "http://purl.org/syndication/thread/1.0": "thr",
+ "http://madskills.com/public/xml/rss/module/trackback/": "trackback",
+ "http://wellformedweb.org/CommentAPI/": "wfw",
+ "http://purl.org/rss/1.0/modules/wiki/": "wiki",
+ "http://www.usemod.com/cgi-bin/mb.pl?ModWiki": "wiki",
+ "http://schemas.xmlsoap.org/soap/envelope/": "soap",
+ "http://www.w3.org/2005/Atom": "atom",
+ "http://www.w3.org/1999/xhtml": "xhtml",
+ "http://my.netscape.com/rdf/simple/0.9/": "rss090",
+ "http://purl.org/rss/1.0/": "rss1",
+ "http://purl.org/net/rss1.1#": "rss11",
+ "http://base.google.com/ns/1.0": "g",
+ "http://www.w3.org/XML/1998/namespace": "xml",
+ "http://openid.net/xmlns/1.0": "openid",
+ "http://earth.google.com/kml/2.0": "kml20",
+ "http://earth.google.com/kml/2.1": "kml21",
+ "http://www.opengis.net/kml/2.2": "kml22",
+ "http://www.w3.org/1999/xlink": "xlink",
+ "xri://$xrd*($v*2.0)": "xrd",
+ "xri://$xrds": "xrds",
+}
+
+def near_miss(ns):
+ try:
+ return re.match(".*\w", ns).group().lower()
+ except:
+ return ns
+
+nearly_namespaces = dict([(near_miss(u),p) for u,p in namespaces.items()])
+
+stdattrs = [(u'http://www.w3.org/XML/1998/namespace', u'base'),
+ (u'http://www.w3.org/XML/1998/namespace', u'id'),
+ (u'http://www.w3.org/XML/1998/namespace', u'lang'),
+ (u'http://www.w3.org/XML/1998/namespace', u'space')]
+
+#
+# From the SAX parser's point of view, this class is the one responsible for
+# handling SAX events. In actuality, all this class does is maintain a
+# pushdown stack of the *real* content handlers, and delegates sax events
+# to the current one.
+#
+class SAXDispatcher(ContentHandler):
+
+ firstOccurrenceOnly = 0
+
+ def __init__(self, base, selfURIs, encoding):
+ from root import root
+ ContentHandler.__init__(self)
+ self.lastKnownLine = 1
+ self.lastKnownColumn = 0
+ self.loggedEvents = []
+ self.feedType = 0
+ try:
+ self.xmlBase = base.encode('idna')
+ except:
+ self.xmlBase = base
+ self.selfURIs = selfURIs
+ self.encoding = encoding
+ self.handler_stack=[[root(self, base)]]
+ self.defaultNamespaces = []
+
+ # experimental RSS-Profile support
+ self.rssCharData = []
+
+ def setDocumentLocator(self, locator):
+ self.locator = locator
+ ContentHandler.setDocumentLocator(self, self.locator)
+
+ def setFirstOccurrenceOnly(self, firstOccurrenceOnly=1):
+ self.firstOccurrenceOnly = firstOccurrenceOnly
+
+ def startPrefixMapping(self, prefix, uri):
+ for handler in iter(self.handler_stack[-1]):
+ handler.namespace[prefix] = uri
+ if uri and len(uri.split())>1:
+ from xml.sax import SAXException
+ self.error(SAXException('Invalid Namespace: %s' % uri))
+ if prefix in namespaces.values():
+ if not namespaces.get(uri,'') == prefix and prefix:
+ from logging import ReservedPrefix, MediaRssNamespace
+ preferredURI = [key for key, value in namespaces.items() if value == prefix][0]
+ if uri == 'http://search.yahoo.com/mrss':
+ self.log(MediaRssNamespace({'prefix':prefix, 'ns':preferredURI}))
+ else:
+ self.log(ReservedPrefix({'prefix':prefix, 'ns':preferredURI}))
+ elif prefix=='wiki' and uri.find('usemod')>=0:
+ from logging import ObsoleteWikiNamespace
+ self.log(ObsoleteWikiNamespace({'preferred':namespaces[uri], 'ns':uri}))
+ elif prefix in ['atom','xhtml']:
+ from logging import TYPE_ATOM, AvoidNamespacePrefix
+ if self.getFeedType() == TYPE_ATOM:
+ self.log(AvoidNamespacePrefix({'prefix':prefix}))
+ elif namespaces.has_key(uri):
+ if not namespaces[uri] == prefix and prefix:
+ from logging import NonstdPrefix
+ self.log(NonstdPrefix({'preferred':namespaces[uri], 'ns':uri}))
+ if namespaces[uri] in ['atom', 'xhtml']:
+ from logging import TYPE_UNKNOWN, TYPE_ATOM, AvoidNamespacePrefix
+ if self.getFeedType() in [TYPE_ATOM,TYPE_UNKNOWN]:
+ self.log(AvoidNamespacePrefix({'prefix':prefix}))
+ elif uri == 'http://search.yahoo.com/mrss':
+ from logging import MediaRssNamespace
+ uri = 'http://search.yahoo.com/mrss/'
+ self.log(MediaRssNamespace({'prefix':prefix, 'ns':uri}))
+ else:
+ from validators import rfc3987
+ rule=rfc3987()
+ rule.setElement('xmlns:'+str(prefix), {}, self.handler_stack[-1][0])
+ rule.value=uri
+ if not uri or rule.validate():
+ from logging import UnknownNamespace
+ self.log(UnknownNamespace({'namespace':uri}))
+
+ def namespaceFor(self, prefix):
+ return None
+
+ def startElementNS(self, name, qname, attrs):
+ self.lastKnownLine = self.locator.getLineNumber()
+ self.lastKnownColumn = self.locator.getColumnNumber()
+ qname, name = name
+ for handler in iter(self.handler_stack[-1]):
+ handler.startElementNS(name, qname, attrs)
+
+ if len(attrs):
+ present = attrs.getNames()
+ unexpected = filter(lambda x: x not in stdattrs, present)
+ for handler in iter(self.handler_stack[-1]):
+ ean = handler.getExpectedAttrNames()
+ if ean: unexpected = filter(lambda x: x not in ean, unexpected)
+ for u in unexpected:
+ if u[0] and near_miss(u[0]) not in nearly_namespaces:
+ feedtype=self.getFeedType()
+ if (not qname) and feedtype and (feedtype==TYPE_RSS2):
+ from logging import UseOfExtensionAttr
+ self.log(UseOfExtensionAttr({"attribute":u, "element":name}))
+ continue
+ from logging import UnexpectedAttribute
+ if not u[0]: u=u[1]
+ self.log(UnexpectedAttribute({"parent":name, "attribute":u, "element":name}))
+
+ def resolveEntity(self, publicId, systemId):
+ if not publicId and not systemId:
+ import cStringIO
+ return cStringIO.StringIO()
+
+ try:
+ def log(exception):
+ from logging import SAXError
+ self.log(SAXError({'exception':str(exception)}))
+ if self.xmlvalidator:
+ self.xmlvalidator(log)
+ self.xmlvalidator=0
+ except:
+ pass
+
+ if (publicId=='-//Netscape Communications//DTD RSS 0.91//EN' and
+ systemId=='http://my.netscape.com/publish/formats/rss-0.91.dtd'):
+ from logging import ValidDoctype, DeprecatedDTD
+ self.log(ValidDoctype({}))
+ self.log(DeprecatedDTD({}))
+ elif (publicId=='-//Netscape Communications//DTD RSS 0.91//EN' and
+ systemId=='http://www.rssboard.org/rss-0.91.dtd'):
+ from logging import ValidDoctype
+ self.log(ValidDoctype({}))
+ else:
+ from logging import ContainsSystemEntity
+ self.lastKnownLine = self.locator.getLineNumber()
+ self.lastKnownColumn = self.locator.getColumnNumber()
+ self.log(ContainsSystemEntity({}))
+ from StringIO import StringIO
+ return StringIO()
+
+ def skippedEntity(self, name):
+ from logging import ValidDoctype
+ if [e for e in self.loggedEvents if e.__class__ == ValidDoctype]:
+ from htmlentitydefs import name2codepoint
+ if name in name2codepoint: return
+ from logging import UndefinedNamedEntity
+ self.log(UndefinedNamedEntity({'value':name}))
+
+ def characters(self, string):
+ self.lastKnownLine = self.locator.getLineNumber()
+ self.lastKnownColumn = self.locator.getColumnNumber()
+ for handler in iter(self.handler_stack[-1]):
+ handler.characters(string)
+
+ def endElementNS(self, name, qname):
+ self.lastKnownLine = self.locator.getLineNumber()
+ self.lastKnownColumn = self.locator.getColumnNumber()
+ qname, name = name
+ for handler in iter(self.handler_stack[-1]):
+ handler.endElementNS(name, qname)
+ del self.handler_stack[-1]
+
+ def push(self, handlers, name, attrs, parent):
+ if hasattr(handlers,'__iter__'):
+ for handler in iter(handlers):
+ handler.setElement(name, attrs, parent)
+ handler.value=""
+ handler.prevalidate()
+ else:
+ handlers.setElement(name, attrs, parent)
+ handlers.value=""
+ handlers.prevalidate()
+ handlers = [handlers]
+ self.handler_stack.append(handlers)
+
+ def log(self, event, offset=(0,0)):
+ def findDuplicate(self, event):
+ duplicates = [e for e in self.loggedEvents if e.__class__ == event.__class__]
+ if duplicates and (event.__class__ in [NonCanonicalURI]):
+ return duplicates[0]
+
+ for dup in duplicates:
+ for k, v in event.params.items():
+ if k != 'value':
+ if not k in dup.params or dup.params[k] != v: break
+ else:
+ return dup
+
+ if event.params.has_key('element') and event.params['element']:
+ if not isinstance(event.params['element'],tuple):
+ event.params['element']=':'.join(event.params['element'].split('_', 1))
+ elif event.params['element'][0]==u'http://www.w3.org/XML/1998/namespace':
+ event.params['element'] = 'xml:' + event.params['element'][-1]
+ if self.firstOccurrenceOnly:
+ dup = findDuplicate(self, event)
+ if dup:
+ dup.params['msgcount'] = dup.params['msgcount'] + 1
+ return
+ event.params['msgcount'] = 1
+ try:
+ line = self.locator.getLineNumber() + offset[0]
+ backupline = self.lastKnownLine
+ column = (self.locator.getColumnNumber() or 0) + offset[1]
+ backupcolumn = self.lastKnownColumn
+ except AttributeError:
+ line = backupline = column = backupcolumn = 1
+ event.params['line'] = line
+ event.params['backupline'] = backupline
+ event.params['column'] = column
+ event.params['backupcolumn'] = backupcolumn
+ self.loggedEvents.append(event)
+
+ def error(self, exception):
+ from logging import SAXError
+ self.log(SAXError({'exception':str(exception)}))
+ raise exception
+ fatalError=error
+ warning=error
+
+ def getFeedType(self):
+ return self.feedType
+
+ def setFeedType(self, feedType):
+ self.feedType = feedType
+
+#
+# This base class for content handlers keeps track of such administrative
+# details as the parent of the current element, and delegating both log
+# and push events back up the stack. It will also concatenate up all of
+# the SAX events associated with character data into a value, handing such
+# things as CDATA and entities.
+#
+# Subclasses are expected to declare "do_name" methods for every
+# element that they support. These methods are expected to return the
+# appropriate handler for the element.
+#
+# The name of the element and the names of the children processed so
+# far are also maintained.
+#
+# Hooks are also provided for subclasses to do "prevalidation" and
+# "validation".
+#
+from logging import TYPE_RSS2
+
+class validatorBase(ContentHandler):
+
+ def __init__(self):
+ ContentHandler.__init__(self)
+ self.value = ""
+ self.attrs = None
+ self.children = []
+ self.isValid = 1
+ self.name = None
+ self.itunes = False
+ self.namespace = {}
+
+ def setElement(self, name, attrs, parent):
+ self.name = name
+ self.attrs = attrs
+ self.parent = parent
+ self.dispatcher = parent.dispatcher
+ self.line = self.dispatcher.locator.getLineNumber()
+ self.col = self.dispatcher.locator.getColumnNumber()
+ self.xmlLang = parent.xmlLang
+
+ if attrs and attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'base')):
+ self.xmlBase=attrs.getValue((u'http://www.w3.org/XML/1998/namespace', u'base'))
+ from validators import rfc3987
+ self.validate_attribute((u'http://www.w3.org/XML/1998/namespace',u'base'),
+ rfc3987)
+ from urlparse import urljoin
+ self.xmlBase = urljoin(parent.xmlBase, self.xmlBase)
+ else:
+ self.xmlBase = parent.xmlBase
+
+ return self
+
+ def simplename(self, name):
+ if not name[0]: return name[1]
+ return namespaces.get(name[0], name[0]) + ":" + name[1]
+
+ def namespaceFor(self, prefix):
+ if self.namespace.has_key(prefix):
+ return self.namespace[prefix]
+ elif self.parent:
+ return self.parent.namespaceFor(prefix)
+ else:
+ return None
+
+ def validate_attribute(self, name, rule):
+ if not isinstance(rule,validatorBase): rule = rule()
+ if isinstance(name,str): name = (None,name)
+ rule.setElement(self.simplename(name), {}, self)
+ rule.value=self.attrs.getValue(name)
+ rule.validate()
+
+ def validate_required_attribute(self, name, rule):
+ if self.attrs and self.attrs.has_key(name):
+ self.validate_attribute(name, rule)
+ else:
+ from logging import MissingAttribute
+ self.log(MissingAttribute({"attr": self.simplename(name)}))
+
+ def validate_optional_attribute(self, name, rule):
+ if self.attrs and self.attrs.has_key(name):
+ self.validate_attribute(name, rule)
+
+ def getExpectedAttrNames(self):
+ None
+
+ def unknown_starttag(self, name, qname, attrs):
+ from validators import any
+ return any(self, name, qname, attrs)
+
+ def startElementNS(self, name, qname, attrs):
+ if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')):
+ self.xmlLang=attrs.getValue((u'http://www.w3.org/XML/1998/namespace', u'lang'))
+ if self.xmlLang:
+ from validators import iso639_validate
+ iso639_validate(self.log, self.xmlLang, "xml:lang", name)
+
+ from validators import eater
+ feedtype=self.getFeedType()
+ if (not qname) and feedtype and (feedtype!=TYPE_RSS2):
+ from logging import UndeterminableVocabulary
+ self.log(UndeterminableVocabulary({"parent":self.name, "element":name, "namespace":'""'}))
+ qname="null"
+ if qname in self.dispatcher.defaultNamespaces: qname=None
+
+ nm_qname = near_miss(qname)
+ if nearly_namespaces.has_key(nm_qname):
+ prefix = nearly_namespaces[nm_qname]
+ qname, name = None, prefix + "_" + name
+ if prefix == 'itunes' and not self.itunes and not self.parent.itunes:
+ if hasattr(self, 'setItunes'): self.setItunes(True)
+
+ # ensure all attribute namespaces are properly defined
+ for (namespace,attr) in attrs.keys():
+ if ':' in attr and not namespace:
+ from logging import MissingNamespace
+ self.log(MissingNamespace({"parent":self.name, "element":attr}))
+
+ if qname=='http://purl.org/atom/ns#':
+ from logging import ObsoleteNamespace
+ self.log(ObsoleteNamespace({"element":"feed"}))
+
+ for key, string in attrs.items():
+ for c in string:
+ if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd':
+ from validators import BadCharacters
+ self.log(BadCharacters({"parent":name, "element":key[-1]}))
+
+ if qname:
+ handler = self.unknown_starttag(name, qname, attrs)
+ name="unknown_"+name
+ self.child=name
+ else:
+ try:
+ self.child=name
+ if name.startswith('dc_'):
+ # handle "Qualified" Dublin Core
+ handler = getattr(self, "do_" + name.replace("-","_").split('.')[0])()
+ else:
+ handler = getattr(self, "do_" + name.replace("-","_"))()
+ except AttributeError:
+ if name.find(':') != -1:
+ from logging import MissingNamespace
+ self.log(MissingNamespace({"parent":self.name, "element":name}))
+ handler = eater()
+ elif name.startswith('xhtml_'):
+ from logging import MisplacedXHTMLContent
+ self.log(MisplacedXHTMLContent({"parent": ':'.join(self.name.split("_",1)), "element":name}))
+ handler = eater()
+ else:
+ try:
+ from extension import Questionable
+
+ # requalify the name with the default namespace
+ qname = name
+ from logging import TYPE_APP_CATEGORIES, TYPE_APP_SERVICE
+ if self.getFeedType() in [TYPE_APP_CATEGORIES, TYPE_APP_SERVICE]:
+ if qname.startswith('app_'): qname=qname[4:]
+
+ if name.find('_')<0 and self.name.find('_')>=0:
+ if 'http://www.w3.org/2005/Atom' in self.dispatcher.defaultNamespaces:
+ qname='atom_'+qname
+
+ # is this element questionable?
+ handler = getattr(Questionable(), "do_" + qname.replace("-","_"))()
+ from logging import QuestionableUsage
+ self.log(QuestionableUsage({"parent": ':'.join(self.name.split("_",1)), "element":qname}))
+
+ except AttributeError:
+ from logging import UndefinedElement
+ self.log(UndefinedElement({"parent": ':'.join(self.name.split("_",1)), "element":name}))
+ handler = eater()
+
+ self.push(handler, name, attrs)
+
+ # MAP - always append name, even if already exists (we need this to
+ # check for too many hour elements in skipHours, and it doesn't
+ # hurt anything else)
+ self.children.append(self.child)
+
+ def normalizeWhitespace(self):
+ self.value = self.value.strip()
+
+ def endElementNS(self, name, qname):
+ self.normalizeWhitespace()
+ self.validate()
+ if self.isValid and self.name:
+ from validators import ValidElement
+ self.log(ValidElement({"parent":self.parent.name, "element":name}))
+
+ def textOK(self):
+ from validators import UnexpectedText
+ self.log(UnexpectedText({"element":self.name,"parent":self.parent.name}))
+
+ def characters(self, string):
+ if string.strip(): self.textOK()
+
+ line=column=0
+ pc=' '
+ for c in string:
+
+ # latin characters double encoded as utf-8
+ if 0x80 <= ord(c) <= 0xBF:
+ if 0xC2 <= ord(pc) <= 0xC3:
+ try:
+ string.encode('iso-8859-1').decode('utf-8')
+ from validators import BadCharacters
+ self.log(BadCharacters({"parent":self.parent.name, "element":self.name}), offset=(line,max(1,column-1)))
+ except:
+ pass
+ pc = c
+
+ # win1252
+ if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd':
+ from validators import BadCharacters
+ self.log(BadCharacters({"parent":self.parent.name, "element":self.name}), offset=(line,column))
+ column=column+1
+ if ord(c) in (10,13):
+ column=0
+ line=line+1
+
+ self.value = self.value + string
+
+ def log(self, event, offset=(0,0)):
+ if not event.params.has_key('element'):
+ event.params['element'] = self.name
+ self.dispatcher.log(event, offset)
+ self.isValid = 0
+
+ def setFeedType(self, feedType):
+ self.dispatcher.setFeedType(feedType)
+
+ def getFeedType(self):
+ return self.dispatcher.getFeedType()
+
+ def push(self, handler, name, value):
+ self.dispatcher.push(handler, name, value, self)
+
+ def leaf(self):
+ from validators import text
+ return text()
+
+ def prevalidate(self):
+ pass
+
+ def validate(self):
+ pass
View
24 feedvalidator/categories.py
@@ -0,0 +1,24 @@
+from base import validatorBase
+from category import category
+from validators import yesno
+from logging import ConflictingCatAttr, ConflictingCatChildren
+
+class categories(validatorBase):
+ def getExpectedAttrNames(self):
+ return [(None,u'scheme'),(None,u'fixed'),(None,u'href')]
+
+ def prevalidate(self):
+ self.validate_optional_attribute((None,'fixed'), yesno)
+
+ if self.attrs.has_key((None,'href')):
+ if self.attrs.has_key((None,'fixed')):
+ self.log(ConflictingCatAttr({'attr':'fixed'}))
+ if self.attrs.has_key((None,'scheme')):
+ self.log(ConflictingCatAttr({'attr':'scheme'}))
+
+ def validate(self):
+ if self.attrs.has_key((None,'href')) and self.children:
+ self.log(ConflictingCatChildren({}))
+
+ def do_atom_category(self):
+ return category()
View
22 feedvalidator/category.py
@@ -0,0 +1,22 @@
+"""$Id$"""
+
+__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
+__version__ = "$Revision$"
+__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
+
+from base import validatorBase
+from validators import *
+
+#
+# author element.
+#
+class category(validatorBase):
+ def getExpectedAttrNames(self):
+ return [(None,u'term'),(None,u'scheme'),(None,u'label')]
+
+ def prevalidate(self):
+ self.children.append(True) # force warnings about "mixed" content
+
+ self.validate_required_attribute((None,'term'), nonblank)
+ self.validate_optional_attribute((None,'scheme'), rfc3987_full)
+ self.validate_optional_attribute((None,'label'), nonhtml)
View
20 feedvalidator/cf.py
@@ -0,0 +1,20 @@
+# http://msdn.microsoft.com/XML/rss/sle/default.aspx
+
+from base import validatorBase
+from validators import eater, text
+
+class sort(validatorBase):
+ def getExpectedAttrNames(self):
+ return [(None,u'data-type'),(None,u'default'),(None,u'element'),(None, u'label'),(None,u'ns')]
+
+class group(validatorBase):
+ def getExpectedAttrNames(self):
+ return [(None,u'element'),(None, u'label'),(None,u'ns')]
+
+class listinfo(validatorBase):
+ def do_cf_sort(self):
+ return sort()
+ def do_cf_group(self):
+ return group()
+
+class treatAs(text): pass
View
349 feedvalidator/channel.py
@@ -0,0 +1,349 @@
+"""$Id$"""
+
+__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
+__version__ = "$Revision$"
+__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
+
+from base import validatorBase
+from logging import *
+from validators import *
+from itunes import itunes_channel
+from extension import *
+
+#
+# channel element.
+#
+class channel(validatorBase, rfc2396, extension_channel, itunes_channel):
+ def getExpectedAttrNames(self):
+ return [(u'urn:atom-extension:indexing', u'index')]
+ def prevalidate(self):
+ self.validate_optional_attribute((u'urn:atom-extension:indexing', u'index'), yesno)
+
+ def __init__(self):
+ self.link=None
+ self.docs=''
+ self.links = []
+ self.title=None
+ validatorBase.__init__(self)
+ def validate(self):
+ if not "description" in self.children:
+ self.log(MissingDescription({"parent":self.name,"element":"description"}))
+ if not "link" in self.children:
+ self.log(MissingLink({"parent":self.name, "element":"link"}))
+ if not "title" in self.children:
+ self.log(MissingTitle({"parent":self.name, "element":"title"}))
+ if not "dc_language" in self.children and not "language" in self.children:
+ if not self.xmlLang:
+ self.log(MissingDCLanguage({"parent":self.name, "element":"language"}))
+ if self.children.count("image") > 1:
+ self.log(DuplicateElement({"parent":self.name, "element":"image"}))
+ if self.children.count("textInput") > 1:
+ self.log(DuplicateElement({"parent":self.name, "element":"textInput"}))
+ if self.children.count("skipHours") > 1:
+ self.log(DuplicateElement({"parent":self.name, "element":"skipHours"}))
+ if self.children.count("skipDays") > 1:
+ self.log(DuplicateElement({"parent":self.name, "element":"skipDays"}))
+ if self.attrs.has_key((rdfNS,"about")):
+ self.value = self.attrs.getValue((rdfNS, "about"))
+ rfc2396.validate(self, extraParams={"attr": "rdf:about"})
+ if not "items" in self.children:
+ self.log(MissingElement({"parent":self.name, "element":"items"}))
+
+ if self.parent.name == 'rss' and self.parent.version == '2.0':
+ for link in self.links:
+ if link.rel=='self': break
+ else:
+ self.log(MissingAtomSelfLink({}))
+
+ if self.itunes: itunes_channel.validate(self)
+
+ # don't warn about use of extension attributes for rss-board compliant feeds
+ if self.docs == 'http://www.rssboard.org/rss-specification':
+ self.dispatcher.loggedEvents = [event for
+ event in self.dispatcher.loggedEvents
+ if not isinstance(event,UseOfExtensionAttr)]
+
+ def metadata(self):
+ pass
+
+ def do_image(self):
+ self.metadata()
+ from image import image
+ return image(), noduplicates()
+
+ def do_textInput(self):
+ self.metadata()
+ from textInput import textInput
+ return textInput(), noduplicates()
+
+ def do_textinput(self):
+ self.metadata()
+ if not self.attrs.has_key((rdfNS,"about")):
+ # optimize for RSS 2.0. If it is not valid RDF, assume that it is
+ # a simple misspelling (in other words, the error message will be
+ # less than helpful on RSS 1.0 feeds.
+ self.log(UndefinedElement({"parent":self.name, "element":"textinput"}))
+ return eater(), noduplicates()
+
+ def do_link(self):
+ self.metadata()
+ return link(), noduplicates()
+
+ def do_title(self):
+ self.metadata()
+ return title(), noduplicates(), nonblank()
+
+ def do_description(self):
+ self.metadata()
+ return nonhtml(), noduplicates()
+
+ def do_blink(self):
+ return blink(), noduplicates()
+
+ def do_atom_author(self):
+ from author import author
+ return author()
+
+ def do_atom_category(self):
+ from category import category
+ return category()
+
+ def do_atom_contributor(self):
+ from author import author
+ return author()
+
+ def do_atom_generator(self):
+ from generator import generator
+ return generator(), nonblank(), noduplicates()
+
+ def do_atom_id(self):
+ return rfc2396_full(), noduplicates()
+
+ def do_atom_icon(self):
+ return nonblank(), rfc2396(), noduplicates()
+
+ def do_atom_link(self):
+ self.metadata()
+ from link import link
+ self.links.append(link())
+ return self.links[-1]
+
+ def do_atom_logo(self):
+ return nonblank(), rfc2396(), noduplicates()
+
+ def do_atom_title(self):
+ from content import textConstruct
+ return textConstruct(), noduplicates()
+
+ def do_atom_subtitle(self):
+ from content import textConstruct
+ return textConstruct(), noduplicates()
+
+ def do_atom_rights(self):
+ from content import textConstruct
+ return textConstruct(), noduplicates()
+
+ def do_atom_updated(self):
+ return rfc3339(), noduplicates()
+
+ def do_dc_creator(self):
+ if "managingEditor" in self.children:
+ self.log(DuplicateSemantics({"core":"managingEditor", "ext":"dc:creator"}))
+ return text() # duplicates allowed
+
+ def do_dc_subject(self):
+ if "category" in self.children:
+ self.log(DuplicateSemantics({"core":"category", "ext":"dc:subject"}))
+ return text() # duplicates allowed
+
+ def do_dc_date(self):
+ if "pubDate" in self.children:
+ self.log(DuplicateSemantics({"core":"pubDate", "ext":"dc:date"}))
+ return w3cdtf(), noduplicates()
+
+ def do_cc_license(self):
+ if "creativeCommons_license" in self.children:
+ self.log(DuplicateSemantics({"core":"creativeCommons:license", "ext":"cc:license"}))
+ return eater()
+
+ def do_creativeCommons_license(self):
+ if "cc_license" in self.children:
+ self.log(DuplicateSemantics({"core":"creativeCommons:license", "ext":"cc:license"}))
+ return rfc2396_full()
+
+class rss20Channel(channel):
+ def __init__(self):
+ self.itemlocs=[]
+ channel.__init__(self)
+
+ def metadata(self):
+ locator=self.dispatcher.locator
+ for line,col in self.itemlocs:
+ offset=(line - locator.getLineNumber(), col - locator.getColumnNumber())
+ self.log(MisplacedItem({"parent":self.name, "element":"item"}), offset)
+ self.itemlocs = []
+
+ def do_textInput(self):
+ self.log(AvoidTextInput({}))
+ return channel.do_textInput(self)
+
+ def do_item(self):
+ locator=self.dispatcher.locator
+ self.itemlocs.append((locator.getLineNumber(), locator.getColumnNumber()))
+ from item import rss20Item
+ return rss20Item()
+
+ def do_category(self):
+ self.metadata()
+ return category()
+
+ def do_cloud(self):
+ self.metadata()
+ return cloud(), noduplicates()
+
+ do_rating = validatorBase.leaf # TODO test cases?!?
+
+ def do_ttl(self):
+ self.metadata()
+ return positiveInteger(), nonblank(), noduplicates()
+
+ def do_docs(self):
+ self.metadata()
+ return docs(), noduplicates()
+
+ def do_generator(self):
+ self.metadata()
+ if "admin_generatorAgent" in self.children:
+ self.log(DuplicateSemantics({"core":"generator", "ext":"admin:generatorAgent"}))
+ return text(), noduplicates()
+
+ def do_pubDate(self):
+ self.metadata()
+ if "dc_date" in self.children:
+ self.log(DuplicateSemantics({"core":"pubDate", "ext":"dc:date"}))
+ return rfc822(), noduplicates()
+
+ def do_managingEditor(self):
+ self.metadata()
+ if "dc_creator" in self.children:
+ self.log(DuplicateSemantics({"core":"managingEditor", "ext":"dc:creator"}))
+ return email_with_name(), noduplicates()
+
+ def do_webMaster(self):
+ self.metadata()
+ if "dc_publisher" in self.children:
+ self.log(DuplicateSemantics({"core":"webMaster", "ext":"dc:publisher"}))
+ return email_with_name(), noduplicates()
+
+ def do_language(self):
+ self.metadata()
+ if "dc_language" in self.children:
+ self.log(DuplicateSemantics({"core":"language", "ext":"dc:language"}))
+ return iso639(), noduplicates()
+
+ def do_copyright(self):
+ self.metadata()
+ if "dc_rights" in self.children:
+ self.log(DuplicateSemantics({"core":"copyright", "ext":"dc:rights"}))
+ return nonhtml(), noduplicates()
+
+ def do_lastBuildDate(self):
+ self.metadata()
+ if "dcterms_modified" in self.children:
+ self.log(DuplicateSemantics({"core":"lastBuildDate", "ext":"dcterms:modified"}))
+ return rfc822(), noduplicates()
+
+ def do_skipHours(self):
+ self.metadata()
+ from skipHours import skipHours
+ return skipHours()
+
+ def do_skipDays(self):
+ self.metadata()
+ from skipDays import skipDays
+ return skipDays()
+
+class rss10Channel(channel):
+ def getExpectedAttrNames(self):
+ return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'about'),
+ (u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'about')]
+
+ def prevalidate(self):
+ if self.attrs.has_key((rdfNS,"about")):
+ if not "abouts" in self.dispatcher.__dict__:
+ self.dispatcher.__dict__["abouts"] = []
+ self.dispatcher.__dict__["abouts"].append(self.attrs[(rdfNS,"about")])
+
+ def do_items(self): # this actually should be from the rss1.0 ns
+ if not self.attrs.has_key((rdfNS,"about")):
+ self.log(MissingAttribute({"parent":self.name, "element":self.name, "attr":"rdf:about"}))
+ from item import items
+ return items(), noduplicates()
+
+ def do_rdfs_label(self):
+ return text()
+
+ def do_rdfs_comment(self):
+ return text()
+
+
+class link(rfc2396_full):
+ def validate(self):
+ self.parent.link = self.value
+ rfc2396_full.validate(self)
+
+class title(nonhtml):
+ def validate(self):
+ self.parent.title = self.value
+ nonhtml.validate(self)
+
+class docs(rfc2396_full):
+ def validate(self):
+ self.parent.docs = self.value
+ rfc2396_full.validate(self)
+
+class blink(text):
+ def validate(self):
+ self.log(NoBlink({}))
+
+class category(nonhtml):
+ def getExpectedAttrNames(self):
+ return [(None, u'domain')]
+
+class cloud(validatorBase):
+ def getExpectedAttrNames(self):
+ return [(None, u'domain'), (None, u'path'), (None, u'registerProcedure'),
+ (None, u'protocol'), (None, u'port')]
+ def prevalidate(self):
+ if (None, 'domain') not in self.attrs.getNames():
+ self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"domain"}))
+ else:
+ self.log(ValidCloud({"parent":self.parent.name, "element":self.name, "attr":"domain"}))
+
+ try:
+ if int(self.attrs.getValue((None, 'port'))) <= 0:
+ self.log(InvalidIntegerAttribute({"parent":self.parent.name, "element":self.name, "attr":'port'}))
+ else:
+ self.log(ValidCloud({"parent":self.parent.name, "element":self.name, "attr":'port'}))
+ except KeyError:
+ self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":'port'}))
+ except ValueError:
+ self.log(InvalidIntegerAttribute({"parent":self.parent.name, "element":self.name, "attr":'port'}))
+
+ if (None, 'path') not in self.attrs.getNames():
+ self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"path"}))
+ else:
+ self.log(ValidCloud({"parent":self.parent.name, "element":self.name, "attr":"path"}))
+
+ if (None, 'registerProcedure') not in self.attrs.getNames():
+ self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"registerProcedure"}))
+ else:
+ self.log(ValidCloud({"parent":self.parent.name, "element":self.name, "attr":"registerProcedure"}))
+
+ if (None, 'protocol') not in self.attrs.getNames():
+ self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"protocol"}))
+ else:
+ self.log(ValidCloud({"parent":self.parent.name, "element":self.name, "attr":"protocol"}))
+ ## TODO - is there a list of accepted protocols for this thing?
+
+ return validatorBase.prevalidate(self)
View
40 feedvalidator/compatibility.py
@@ -0,0 +1,40 @@
+"""$Id$"""
+
+__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
+__version__ = "$Revision$"
+__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
+
+from logging import *
+
+def _must(event):
+ return isinstance(event, Error)
+
+def _should(event):
+ return isinstance(event, Warning)
+
+def _may(event):
+ return isinstance(event, Info)
+
+def A(events):
+ return [event for event in events if _must(event)]
+
+def AA(events):
+ return [event for event in events if _must(event) or _should(event)]
+
+def AAA(events):
+ return [event for event in events if _must(event) or _should(event) or _may(event)]
+
+def AAAA(events):
+ return events
+
+def analyze(events, rawdata):
+ block = rawdata[0:512].strip().upper()
+ if block.startswith('<HTML'): return 'html'
+ if block.startswith('<!DOCTYPE HTML'): return 'html'
+
+ for event in events:
+ if isinstance(event,UndefinedElement):
+ if event.params['parent'] == 'root':
+ if event.params['element'].lower() in ['html','xhtml:html']:
+ return "html"
+ return None
View
181 feedvalidator/content.py
@@ -0,0 +1,181 @@
+"""$Id$"""
+
+__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
+__version__ = "$Revision$"
+__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
+
+from base import validatorBase, namespaces
+from validators import *
+from logging import *
+#
+# item element.
+#
+class textConstruct(validatorBase,rfc2396,nonhtml):
+ from validators import mime_re
+ import re
+
+ def getExpectedAttrNames(self):
+ return [(None, u'type'),(None, u'src')]
+
+ def normalizeWhitespace(self):
+ pass
+
+ def maptype(self):
+ if self.type.find('/') > -1:
+ self.log(InvalidTextType({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type}))
+
+ def prevalidate(self):
+ nonhtml.start(self)
+ if self.attrs.has_key((None,"src")):
+ self.type=''
+ else:
+ self.type='text'
+ if self.getFeedType() == TYPE_RSS2 and self.name != 'atom_summary':
+ self.log(DuplicateDescriptionSemantics({"element":self.name}))
+
+ if self.attrs.has_key((None,"type")):
+ self.type=self.attrs.getValue((None,"type"))
+ if not self.type:
+ self.log(AttrNotBlank({"parent":self.parent.name, "element":self.name, "attr":"type"}))
+
+ self.maptype()
+
+ if self.attrs.has_key((None,"src")):
+ self.children.append(True) # force warnings about "mixed" content
+ self.value=self.attrs.getValue((None,"src"))
+ rfc2396.validate(self, errorClass=InvalidURIAttribute, extraParams={"attr": "src"})
+ self.value=""
+
+ if not self.attrs.has_key((None,"type")):
+ self.log(MissingTypeAttr({"parent":self.parent.name, "element":self.name, "attr":"type"}))
+
+ if self.type in ['text','html','xhtml'] and not self.attrs.has_key((None,"src")):
+ pass
+ elif self.type and not self.mime_re.match(self.type):
+ self.log(InvalidMIMEType({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type}))
+ else:
+ self.log(ValidMIMEAttribute({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type}))
+
+ if not self.xmlLang:
+ self.log(MissingDCLanguage({"parent":self.name, "element":"xml:lang"}))
+
+ def validate(self):
+ if self.type in ['text','xhtml']:
+ if self.type=='xhtml':
+ nonhtml.validate(self, NotInline)
+ else:
+ nonhtml.validate(self, ContainsUndeclaredHTML)
+ else:
+ if self.type.find('/') > -1 and not (
+ self.type.endswith('+xml') or self.type.endswith('/xml') or
+ self.type.startswith('text/')):
+ import base64
+ try:
+ self.value=base64.decodestring(self.value)
+ if self.type.endswith('/html'): self.type='html'
+ except:
+ self.log(NotBase64({"parent":self.parent.name, "element":self.name,"value":self.value}))
+
+ if self.type=='html' or self.type.endswith("/html"):
+ self.validateSafe(self.value)
+
+ if self.type.endswith("/html"):
+ if self.value.find("<html")<0 and not self.attrs.has_key((None,"src")):
+ self.log(HtmlFragment({"parent":self.parent.name, "element":self.name,"value":self.value, "type":self.type}))
+ else:
+ nonhtml.validate(self, ContainsUndeclaredHTML)
+
+ if not self.value and len(self.children)==0 and not self.attrs.has_key((None,"src")):
+ self.log(NotBlank({"parent":self.parent.name, "element":self.name}))
+
+ def textOK(self):
+ if self.children: validatorBase.textOK(self)
+
+ def characters(self, string):
+ for c in string:
+ if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd':
+ from validators import BadCharacters
+ self.log(BadCharacters({"parent":self.parent.name, "element":self.name}))
+ if (self.type=='xhtml') and string.strip() and not self.value.strip():
+ self.log(MissingXhtmlDiv({"parent":self.parent.name, "element":self.name}))
+ validatorBase.characters(self,string)
+
+ def startElementNS(self, name, qname, attrs):
+ if (self.type<>'xhtml') and not (
+ self.type.endswith('+xml') or self.type.endswith('/xml')):
+ self.log(UndefinedElement({"parent":self.name, "element":name}))
+
+ if self.type=="xhtml":
+ if name<>'div' and not self.value.strip():
+ self.log(MissingXhtmlDiv({"parent":self.parent.name, "element":self.name}))
+ elif qname not in ["http://www.w3.org/1999/xhtml"]:
+ self.log(NotHtml({"parent":self.parent.name, "element":self.name, "message":"unexpected namespace", "value": qname}))
+
+ if self.type=="application/xhtml+xml":
+ if name<>'html':
+ self.log(HtmlFragment({"parent":self.parent.name, "element":self.name,"value":self.value, "type":self.type}))
+ elif qname not in ["http://www.w3.org/1999/xhtml"]:
+ self.log(NotHtml({"parent":self.parent.name, "element":self.name, "message":"unexpected namespace", "value":qname}))
+
+ if self.attrs.has_key((None,"mode")):
+ if self.attrs.getValue((None,"mode")) == 'escaped':
+ self.log(NotEscaped({"parent":self.parent.name, "element":self.name}))
+
+ if name=="div" and qname=="http://www.w3.org/1999/xhtml":
+ handler=diveater()
+ else:
+ handler=eater()
+ self.children.append(handler)
+ self.push(handler, name, attrs)
+
+# treat xhtml:div as part of the content for purposes of detecting escaped html
+class diveater(eater):
+ def __init__(self):
+ eater.__init__(self)
+ self.mixed = False
+ def textOK(self):
+ pass
+ def characters(self, string):
+ validatorBase.characters(self, string)
+ def startElementNS(self, name, qname, attrs):
+ if not qname:
+ self.log(MissingNamespace({"parent":"xhtml:div", "element":name}))
+ elif qname == 'http://www.w3.org/1999/xhtml':
+ if name not in HTMLValidator.htmltags:
+ self.log(NotHtml({'message':'Non-XHTML element', 'value':name}))
+ elif name not in HTMLValidator.acceptable_elements:
+ self.log(SecurityRisk({'tag':name}))
+ for ns,attr in attrs.getNames():
+ if not ns and attr not in HTMLValidator.acceptable_attributes:
+ if attr == 'style':
+ for value in checkStyle(attrs.get((ns,attr))):
+ self.log(DangerousStyleAttr({"attr":attr, "value":value}))
+ else:
+ self.log(SecurityRiskAttr({'attr':attr}))
+ elif qname == 'http://www.w3.org/2000/svg':
+ if name not in HTMLValidator.svg_elements:
+ self.log(SecurityRisk({'tag':name}))
+ for ns,attr in attrs.getNames():
+ if not ns and attr not in HTMLValidator.svg_attributes:
+ self.log(SecurityRiskAttr({'attr':attr}))
+ elif qname == 'http://www.w3.org/1998/Math/MathML':
+ if name not in HTMLValidator.mathml_elements:
+ self.log(SecurityRisk({'tag':name}))
+ for ns,attr in attrs.getNames():
+ if not ns and attr not in HTMLValidator.mathml_attributes:
+ self.log(SecurityRiskAttr({'attr':attr}))
+ elif namespaces.has_key(qname):
+ if self.name != 'metadata':
+ self.log(UndefinedElement({"parent": self.name, "element":namespaces[qname] + ":" + name}))
+ self.push(eater(), name, attrs)
+ return
+
+ self.mixed = True
+ eater.startElementNS(self, name, qname, attrs)
+ def validate(self):
+ if not self.mixed: self.parent.value += self.value
+
+class content(textConstruct):
+ def maptype(self):
+ if self.type == 'multipart/alternative':
+ self.log(InvalidMIMEType({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type}))
View
134 feedvalidator/entry.py
@@ -0,0 +1,134 @@
+"""$Id$"""
+
+__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
+__version__ = "$Revision$"
+__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
+
+from base import validatorBase
+from validators import *
+from logging import *
+from itunes import itunes_item
+from extension import extension_entry
+
+#
+# pie/echo entry element.
+#
+class entry(validatorBase, extension_entry, itunes_item):
+ def getExpectedAttrNames(self):
+ return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'parseType')]
+
+ def prevalidate(self):
+ self.links=[]
+ self.content=None
+
+ def validate(self):
+ if not 'title' in self.children:
+ self.log(MissingElement({"parent":self.name, "element":"title"}))
+ if not 'author' in self.children and not 'author' in self.parent.children:
+ self.log(MissingElement({"parent":self.name, "element":"author"}))
+ if not 'id' in self.children:
+ self.log(MissingElement({"parent":self.name, "element":"id"}))
+ if not 'updated' in self.children:
+ self.log(MissingElement({"parent":self.name, "element":"updated"}))
+
+ if self.content:
+ if not 'summary' in self.children:
+ if self.content.attrs.has_key((None,"src")):
+ self.log(MissingSummary({"parent":self.parent.name, "element":self.name}))
+ ctype = self.content.type
+ if ctype.find('/') > -1 and not (
+ ctype.endswith('+xml') or ctype.endswith('/xml') or
+ ctype.startswith('text/')):
+ self.log(MissingSummary({"parent":self.parent.name, "element":self.name}))
+ else:
+ if not 'summary' in self.children:
+ self.log(MissingTextualContent({"parent":self.parent.name, "element":self.name}))
+ for link in self.links:
+ if link.rel == 'alternate': break
+ else:
+ self.log(MissingContentOrAlternate({"parent":self.parent.name, "element":self.name}))
+
+ # can only have one alternate per type
+ types={}
+ for link in self.links:
+ if not link.rel=='alternate': continue
+ if not link.type in types: types[link.type]=[]
+ if link.hreflang in types[link.type]:
+ self.log(DuplicateAtomLink({"parent":self.name, "element":"link", "type":link.type, "hreflang":link.hreflang}))
+ else:
+ types[link.type] += [link.hreflang]
+
+ if self.itunes: itunes_item.validate(self)
+
+ def do_author(self):
+ from author import author
+ return author()
+
+ def do_category(self):
+ from category import category
+ return category()
+
+ def do_content(self):
+ from content import content
+ self.content=content()
+ return self.content, noduplicates()
+
+ def do_contributor(self):
+ from author import author
+ return author()
+
+ def do_id(self):
+ return canonicaluri(), nows(), noduplicates(), unique('id',self.parent,DuplicateEntries)
+
+ def do_link(self):
+ from link import link
+ self.links += [link()]
+ return self.links[-1]
+
+ def do_published(self):
+ return rfc3339(), nows(), noduplicates()
+
+ def do_source(self):
+ return source(), noduplicates()
+
+ def do_rights(self):
+ from content import textConstruct
+ return textConstruct(), noduplicates()
+
+ def do_summary(self):
+ from content import textConstruct
+ return textConstruct(), noduplicates()
+
+ def do_title(self):
+ from content import textConstruct
+ return textConstruct(), noduplicates()
+
+ def do_updated(self):
+ return rfc3339(), nows(), noduplicates(), unique('updated',self.parent,DuplicateUpdated)
+
+ def do_app_edited(self):
+ return rfc3339(), nows(), noduplicates()
+
+ def do_app_control(self):
+ return app_control(), noduplicates()
+
+class app_control(validatorBase):
+ def do_app_draft(self):
+ return yesno(), noduplicates()
+
+from feed import feed
+class source(feed):
+ def missingElement(self, params):
+ self.log(MissingSourceElement(params))
+
+ def validate(self):
+ self.validate_metadata()
+
+ def do_author(self):
+ if not 'author' in self.parent.children:
+ self.parent.children.append('author')
+ return feed.do_author(self)
+
+ def do_entry(self):
+ self.log(UndefinedElement({"parent":self.name, "element":"entry"}))
+ return eater()
View
1,305 feedvalidator/extension.py
@@ -0,0 +1,1305 @@
+"""$Id$"""
+
+__author__ = "Sam Ruby <http://intertwingly.net>, Mark Pilgrim <http://diveintomark.org/> and Phil Ringnalda <http://philringnalda.com>"
+__version__ = "$Revision$"
+__copyright__ = "Copyright (c) 2002 Sam Ruby, Mark Pilgrim and Phil Ringnalda"
+
+from validators import *
+from logging import *
+
+########################################################################
+# Extensions that are valid everywhere #
+########################################################################
+
+class extension_everywhere:
+ def do_dc_title(self):
+ return text(), noduplicates()
+
+ def do_dc_description(self):
+ return text(), noduplicates()
+
+ def do_dc_publisher(self):
+ if "webMaster" in self.children:
+ self.log(DuplicateSemantics({"core":"webMaster", "ext":"dc:publisher"}))
+ return text() # duplicates allowed
+
+ def do_dc_contributor(self):
+ return text() # duplicates allowed
+
+ def do_dc_type(self):
+ return text(), noduplicates()
+
+ def do_dc_format(self):
+ return text(), noduplicates()
+
+ def do_dc_identifier(self):
+ return text()
+
+ def do_dc_source(self):
+ if "source" in self.children:
+ self.log(DuplicateItemSemantics({"core":"source", "ext":"dc:source"}))
+ return text(), noduplicates()
+
+ def do_dc_language(self):
+ if "language" in self.children:
+ self.log(DuplicateSemantics({"core":"language", "ext":"dc:language"}))
+ return iso639(), noduplicates()
+
+ def do_dc_relation(self):
+ return text(), # duplicates allowed
+
+ def do_dc_coverage(self):
+ return text(), # duplicates allowed
+
+ def do_dc_rights(self):
+ if "copyright" in self.children:
+ self.log(DuplicateSemantics({"core":"copyright", "ext":"dc:rights"}))
+ return nonhtml(), noduplicates()
+
+ def do_dcterms_alternative(self):
+ return text() #duplicates allowed
+
+ def do_dcterms_abstract(self):
+ return text(), noduplicates()
+
+ def do_dcterms_tableOfContents(self):
+ return rdfResourceURI(), noduplicates()
+
+ def do_dcterms_created(self):
+ return w3cdtf(), noduplicates()
+
+ def do_dcterms_valid(self):
+ return eater()
+
+ def do_dcterms_available(self):
+ return eater()
+
+ def do_dcterms_issued(self):
+ return w3cdtf(), noduplicates()
+
+ def do_dcterms_modified(self):
+ if "lastBuildDate" in self.children:
+ self.log(DuplicateSemantics({"core":"lastBuildDate", "ext":"dcterms:modified"}))
+ return w3cdtf(), noduplicates()
+
+ def do_dcterms_dateAccepted(self):
+ return text(), noduplicates()
+
+ def do_dcterms_dateCopyrighted(self):
+ return text(), noduplicates()
+
+ def do_dcterms_dateSubmitted(self):
+ return text(), noduplicates()
+
+ def do_dcterms_extent(self):
+ return positiveInteger(), nonblank(), noduplicates()
+
+# def do_dcterms_medium(self):
+# spec defines it as something that should never be used
+# undefined element'll do for now
+
+ def do_dcterms_isVersionOf(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_dcterms_hasVersion(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_dcterms_isReplacedBy(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_dcterms_replaces(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_dcterms_isRequiredBy(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_dcterms_requires(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_dcterms_isPartOf(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_dcterms_hasPart(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_dcterms_isReferencedBy(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_dcterms_references(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_dcterms_isFormatOf(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_dcterms_hasFormat(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_dcterms_conformsTo(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_dcterms_spatial(self):
+ return eater()
+
+ def do_dcterms_temporal(self):
+ return eater()
+
+ def do_dcterms_audience(self):
+ return text()
+
+ def do_dcterms_mediator(self):
+ return text(), noduplicates()
+
+ # added to DMCI, but no XML mapping has been defined
+ def do_dcterms_accessRights(self):
+ return eater()
+
+ def do_dcterms_accrualMethod(self):
+ return eater()
+
+ def do_dcterms_accrualPeriodicity(self):
+ return eater()
+
+ def do_dcterms_accrualPolicy(self):
+ return eater()
+
+ def do_dcterms_bibliographicCitation(self):
+ return eater()
+
+ def do_dcterms_educationLevel(self):
+ return eater()
+
+ def do_dcterms_instructionalMethod(self):
+ return eater()
+
+ def do_dcterms_license(self):
+ return eater()
+
+ def do_dcterms_provenance(self):
+ return eater()
+
+ def do_dcterms_rightsHolder(self):
+ return eater()
+
+ def do_rdf_RDF(self):
+ return eater()
+
+ def do_rdf_type(self):
+ return eater()
+
+ def do_rdf_Description(self):
+ return eater()
+
+ def do_rdfs_seeAlso(self):
+ return rdfResourceURI() # duplicates allowed
+
+ def do_geo_Point(self):
+ return geo_point()
+
+ def do_geo_lat(self):
+ return latitude()
+
+ def do_geo_long(self):
+ return longitude()
+
+ def do_geo_alt(self):
+ return decimal()
+
+ def do_geourl_latitude(self):
+ return latitude()
+
+ def do_geourl_longitude(self):
+ return longitude()
+
+ def do_georss_where(self):
+ return georss_where()
+
+ def do_georss_point(self):
+ return gml_pos()
+
+ def do_georss_line(self):
+ return gml_posList()
+
+ def do_georss_polygon(self):
+ return gml_posList()
+
+ def do_georss_featureTypeTag(self):
+ return text()
+
+ def do_georss_relationshipTag(self):
+ return text()
+
+ def do_georss_featureName(self):
+ return text()
+
+ def do_georss_elev(self):
+ return decimal()
+
+ def do_georss_floor(self):
+ return Integer()
+
+ def do_georss_radius(self):
+ return Float()
+
+ def do_icbm_latitude(self):
+ return latitude()
+
+ def do_icbm_longitude(self):
+ return longitude()
+
+ def do_opml_dateCreated(self):
+ return rfc822(), noduplicates()
+
+ def do_opml_dateModified(self):
+ return rfc822(), noduplicates()
+
+ def do_opml_ownerName(self):
+ return safeHtml(), noduplicates()
+
+ def do_opml_ownerEmail(self):
+ return email(), noduplicates()
+
+ def do_opml_ownerId(self):
+ return httpURL(), noduplicates()
+
+
+########################################################################
+# Extensions that are valid at either the channel or item levels #
+########################################################################
+
+from media import media_elements, media_content, media_group
+class extension_channel_item(extension_everywhere, media_elements):
+ def do_taxo_topics(self):
+ return eater()
+
+ def do_l_link(self):
+ return l_link()
+
+
+########################################################################
+# Extensions that are valid at only at the item level #
+########################################################################
+
+class extension_item(extension_channel_item):
+ def do_annotate_reference(self):
+ return rdfResourceURI(), noduplicates()
+
+ def do_ag_source(self):
+ return text(), noduplicates()
+
+ def do_ag_sourceURL(self):
+ return rfc2396_full(), noduplicates()
+
+ def do_ag_timestamp(self):
+ return iso8601(), noduplicates()
+
+ def do_ev_startdate(self):
+ return unbounded_iso8601(), noduplicates()
+
+ def do_ev_enddate(self):
+ return unbounded_iso8601(), noduplicates()
+
+ def do_ev_location(self):
+ return eater()
+
+ def do_ev_organizer(self):
+ return eater()
+
+ def do_ev_type(self):
+ return text(), noduplicates()
+
+ def do_feedburner_awareness(self):
+ return rfc2396_full(), noduplicates()
+
+ def do_feedburner_origEnclosureLink(self):
+ return rfc2396_full(), noduplicates()
+
+ def do_feedburner_origLink(self):
+ return rfc2396_full(), noduplicates()
+
+ def do_foaf_maker(self):
+ return eater()
+
+ def do_foaf_primaryTopic(self):
+ return eater()
+
+ def do_slash_comments(self):
+ return nonNegativeInteger(), noduplicates()
+
+ def do_slash_section(self):
+ return text()
+
+ def do_slash_department(self):
+ return text()
+
+ def do_slash_hit_parade(self):
+ return commaSeparatedIntegers(), noduplicates()
+
+ def do_thr_children(self):
+ if self.getFeedType() != TYPE_RSS1:
+ self.log(UndefinedElement({'parent':self.name,"element":"thr:children"}))
+ return eater()
+
+ def do_thr_total(self):
+ return nonNegativeInteger(), noduplicates()
+
+ def do_thr_in_reply_to(self):
+ return in_reply_to()
+
+ def do_wfw_comment(self):
+ return rfc2396_full(), noduplicates()
+
+ def do_wfw_commentRss(self):
+ return rfc2396_full(), noduplicates()
+
+ def do_wfw_commentRSS(self):
+ self.log(CommentRSS({"parent":self.parent.name, "element":self.name}))
+ return rfc2396_full(), noduplicates()
+
+ def do_wiki_diff(self):
+ return text()
+
+ def do_wiki_history(self):
+ return text()
+
+ def do_wiki_importance(self):
+ return text()
+
+ def do_wiki_status(self):
+ return text()
+
+ def do_wiki_version(self):
+ return text()
+
+ def do_g_actor(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_age(self):
+ return nonNegativeInteger(), noduplicates()
+
+ def do_g_agent(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_area(self):
+ return nonhtml(), noduplicates() # intUnit
+
+ def do_g_apparel_type(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_artist(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_author(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_bathrooms(self):
+ return nonNegativeInteger(), noduplicates()
+
+ def do_g_bedrooms(self):
+ return nonNegativeInteger(), noduplicates()
+
+ def do_g_brand(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_calories(self):
+ return g_float(), noduplicates()
+
+ def do_g_cholesterol(self):
+ return g_float(), noduplicates()
+
+ def do_g_color(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_cooking_time(self):
+ return g_float(), noduplicates()
+
+ def do_g_condition(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_course(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_course_date_range(self):
+ return g_dateTimeRange(), noduplicates()
+
+ def do_g_course_number(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_course_times(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_cuisine(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_currency(self):
+ return iso4217(), noduplicates()
+
+ def do_g_delivery_notes(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_delivery_radius(self):
+ return floatUnit(), noduplicates()
+
+ def do_g_education(self):
+ return nonhtml(), noduplicates()
+
+ def do_g_employer(self):
+ return nonhtml(), noduplicates()
+
+