Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100755 355 lines (302 sloc) 14.52 kB
b2c3302 @rubys Initial revision
authored
1 #!/usr/bin/env python
2 from config import *
3
f6f37fc Percent-encode URLs as parameters in banner link. Double-escape URLs in
Joseph Walton authored
4 import cgi, sys, os, urlparse, sys, re, urllib
b2c3302 @rubys Initial revision
authored
5 import cgitb
6 cgitb.enable()
7
3a86424 Decode Unicode before parsing XML, to cover cases Expat doesn't deal …
Joseph Walton authored
8 import codecs
9 ENCODING='UTF-8'
10 sys.stdout = codecs.getwriter(ENCODING)(sys.stdout)
11
a6bc7ac Fixed '?manual=1' mode and validation of POSTed feeds.
Joseph Walton authored
12 # Used for CGI parameters
13 decUTF8 = codecs.getdecoder('utf-8')
14 decW1252 = codecs.getdecoder('windows-1252')
15
b2c3302 @rubys Initial revision
authored
16 if PYDIR not in sys.path:
17 sys.path.insert(0, PYDIR)
18
19 if WEBDIR not in sys.path:
20 sys.path.insert(0, WEBDIR)
21
22 if SRCDIR not in sys.path:
23 sys.path.insert(0, SRCDIR)
24 import feedvalidator
25 from feedvalidator.logging import FEEDTYPEDISPLAY, VALIDFEEDGRAPHIC
26
607caf6 Catch and show ValidationFailure in check.cgi. Changed text_html.py
Joseph Walton authored
27 from feedvalidator.logging import Info, Warning, Error, ValidationFailure
822753a @rubys Rough in XRD (YADIS) support
authored
28 from feedvalidator.logging import TYPE_ATOM_ENTRY, TYPE_OPENSEARCH, TYPE_XRD
2156ffd @rubys Complete rough-in of categories and services document
authored
29 from feedvalidator.logging import TYPE_APP_SERVICE, TYPE_APP_CATEGORIES
963235c @rubys Fix for [ 923703 ] CGI should not flag warnings as failure
authored
30
b2c3302 @rubys Initial revision
authored
31 def applyTemplate(templateFile, params={}):
ce08c5f Put the CSS directory URL in config. Fixed tabs -> spaces.
Joseph Walton authored
32 params['CSSURL'] = CSSURL
b2c3302 @rubys Initial revision
authored
33 fsock = open(os.path.join(WEBDIR, 'templates', templateFile))
34 data = fsock.read() % params
35 fsock.close()
03fe5f2 @rubys FastCGI support
authored
36 return data.encode('utf-8')
b2c3302 @rubys Initial revision
authored
37
38 def sanitizeURL(url):
9fe9c58 Accept feed: URIs, to make cutting and pasting from browsers easier.
Joseph Walton authored
39 # Allow feed: URIs, as described by draft-obasanjo-feed-URI-scheme-02
40 if url.lower().startswith('feed:'):
a578c76 @rubys Ensure URIs are properly escaped
authored
41 url = url[5:]
42 if url.startswith('//'):
43 url = 'http:' + url
9fe9c58 Accept feed: URIs, to make cutting and pasting from browsers easier.
Joseph Walton authored
44
a578c76 @rubys Ensure URIs are properly escaped
authored
45 if not url.split(':')[0].lower() in ['http','https']:
b2c3302 @rubys Initial revision
authored
46 url = 'http://%s' % url
47 url = url.strip()
48
49 # strip user and password
50 url = re.sub(r'^(\w*://)[-+.\w]*(:[-+.\w]+)?@', r'\1' ,url)
51
52 return url
53
a578c76 @rubys Ensure URIs are properly escaped
authored
54 def escapeURL(url):
dddaed3 @rubys Validate even on HTTP errors if the last line is rss, rdf, or atom
authored
55 parts = list(urlparse.urlparse(url))
1f968be @rubys Be more tolerant of ports
authored
56 safe = ['/', '/:@', '/', '/', '/?&=;', '/']
dddaed3 @rubys Validate even on HTTP errors if the last line is rss, rdf, or atom
authored
57 for i in range(0,len(parts)):
1f968be @rubys Be more tolerant of ports
authored
58 parts[i] = urllib.quote(urllib.unquote(parts[i]),safe[i])
54768ec @rubys Don't blow up on invalid IRIs
authored
59 url = cgi.escape(urlparse.urlunparse(parts))
60 try:
61 return url.decode('idna')
62 except:
63 return url
a578c76 @rubys Ensure URIs are properly escaped
authored
64
3a86424 Decode Unicode before parsing XML, to cover cases Expat doesn't deal …
Joseph Walton authored
65 import feedvalidator.formatter.text_html
66
03fe5f2 @rubys FastCGI support
authored
67 def buildCodeListing(events, rawdata, url):
b2c3302 @rubys Initial revision
authored
68 # print feed
69 codelines = []
70 linenum = 1
71 linesWithErrors = [e.params.get('line', 0) for e in events]
72 for line in rawdata.split('\n'):
3a86424 Decode Unicode before parsing XML, to cover cases Expat doesn't deal …
Joseph Walton authored
73 line = feedvalidator.formatter.text_html.escapeAndMark(line)
b2c3302 @rubys Initial revision
authored
74 if not line: line = ' '
75 linetype = linenum in linesWithErrors and "b" or "a"
03fe5f2 @rubys FastCGI support
authored
76 codelines.append(applyTemplate('code_listing_line.tmpl', {"line":line, "linenum":linenum, "linetype":linetype}).decode('utf-8'))
b2c3302 @rubys Initial revision
authored
77 linenum += 1
78 codelisting = "".join(codelines)
9f827f0 @rubys Escape another URI (spotter: Olivier Thereaux)
authored
79 return applyTemplate('code_listing.tmpl', {"codelisting":codelisting, "url":escapeURL(url)})
b2c3302 @rubys Initial revision
authored
80
03fe5f2 @rubys FastCGI support
authored
81 def yieldEventList(output):
72325a6 @rubys Visually separate errors and warnings
authored
82 errors, warnings = output.getErrors(), output.getWarnings()
83
03fe5f2 @rubys FastCGI support
authored
84 yield output.header()
72325a6 @rubys Visually separate errors and warnings
authored
85 for o in output.getErrors():
97b578f @rubys More unicode fixes
authored
86 yield o.encode('utf-8')
72325a6 @rubys Visually separate errors and warnings
authored
87 if errors and warnings:
03fe5f2 @rubys FastCGI support
authored
88 yield output.footer()
da8c1cb @rubys pluralization support
authored
89 if len(warnings) == 1:
03fe5f2 @rubys FastCGI support
authored
90 yield applyTemplate('andwarn1.tmpl')
da8c1cb @rubys pluralization support
authored
91 else:
03fe5f2 @rubys FastCGI support
authored
92 yield applyTemplate('andwarn2.tmpl')
93 yield output.header()
72325a6 @rubys Visually separate errors and warnings
authored
94 for o in output.getWarnings():
ca71626 @rubys Unicode error
authored
95 yield o.encode('utf-8')
03fe5f2 @rubys FastCGI support
authored
96 yield output.footer()
607caf6 Catch and show ValidationFailure in check.cgi. Changed text_html.py
Joseph Walton authored
97
98 from feedvalidator.formatter.text_html import Formatter
99
b2c3302 @rubys Initial revision
authored
100 def postvalidate(url, events, rawdata, feedType, autofind=1):
101 """returns dictionary including 'url', 'events', 'rawdata', 'output', 'specialCase', 'feedType'"""
102 # filter based on compatibility level
103 from feedvalidator import compatibility
104 filterFunc = compatibility.AA # hardcoded for now
105 events = filterFunc(events)
106
107 specialCase = None
108 formattedOutput = Formatter(events, rawdata)
109 if formattedOutput:
110 # check for special cases
111 specialCase = compatibility.analyze(events, rawdata)
112 if (specialCase == 'html') and autofind:
113 try:
963235c @rubys Fix for [ 923703 ] CGI should not flag warnings as failure
authored
114 try:
b2c3302 @rubys Initial revision
authored
115 import feedfinder
ecf5881 @rubys Don't try so hard to find feeds
authored
116 rssurls = feedfinder.getLinks(rawdata,url)
963235c @rubys Fix for [ 923703 ] CGI should not flag warnings as failure
authored
117 except:
118 rssurls = [url]
b2c3302 @rubys Initial revision
authored
119 if rssurls:
120 url = rssurls[0]
121 params = feedvalidator.validateURL(url, firstOccurrenceOnly=1, wantRawData=1)
122 events = params['loggedEvents']
123 rawdata = params['rawdata']
124 feedType = params['feedType']
125 return postvalidate(url, events, rawdata, feedType, autofind=0)
126 except:
127 pass
128
129 return {"url":url, "events":events, "rawdata":rawdata, "output":formattedOutput, "specialCase":specialCase, "feedType":feedType}
130
03fe5f2 @rubys FastCGI support
authored
131 def checker_app(environ, start_response):
132
133 method = environ['REQUEST_METHOD'].lower()
134 contentType = environ.get('CONTENT_TYPE', None)
135 output_option = ''
136
137 if (method == 'get') or (contentType and cgi.parse_header(contentType)[0].lower() == 'application/x-www-form-urlencoded'):
138 fs = cgi.FieldStorage(fp=environ.get('wsgi.input',None), environ=environ)
139 url = fs.getvalue("url") or ''
140 try:
141 if url: url = url.decode('utf-8').encode('idna')
142 except:
143 pass
144 manual = fs.getvalue("manual") or 0
145 rawdata = fs.getvalue("rawdata") or ''
146 output_option = fs.getvalue("output") or ''
147
148 # XXX Should use 'charset'
149 try:
150 rawdata = decUTF8(rawdata)[0]
151 except UnicodeError:
152 rawdata = decW1252(rawdata)[0]
153
154 rawdata = rawdata[:feedvalidator.MAXDATALENGTH].replace('\r\n', '\n').replace('\r', '\n')
155 else:
156 url = None
157 manual = None
158 rawdata = None
159
160 if (output_option == "soap12"):
161 # SOAP
162 try:
163 if ((method == 'post') and (not rawdata)):
164 params = feedvalidator.validateStream(sys.stdin, contentType=contentType)
165 elif rawdata :
166 params = feedvalidator.validateString(rawdata, firstOccurrenceOnly=1)
167 elif url:
168 url = sanitizeURL(url)
169 params = feedvalidator.validateURL(url, firstOccurrenceOnly=1, wantRawData=1)
a6bc7ac Fixed '?manual=1' mode and validation of POSTed feeds.
Joseph Walton authored
170
03fe5f2 @rubys FastCGI support
authored
171 events = params['loggedEvents']
172 feedType = params['feedType']
a6bc7ac Fixed '?manual=1' mode and validation of POSTed feeds.
Joseph Walton authored
173
03fe5f2 @rubys FastCGI support
authored
174 # filter based on compatibility level
175 from feedvalidator import compatibility
176 filterFunc = compatibility.AA # hardcoded for now
177 events = filterFunc(events)
b2c3302 @rubys Initial revision
authored
178
03fe5f2 @rubys FastCGI support
authored
179 events_error = list()
180 events_warn = list()
181 events_info = list()
182
183
184 # format as xml
185 from feedvalidator.formatter.text_xml import Formatter as xmlformat
186 output = xmlformat(events)
187
188 for event in events:
189 if isinstance(event,Error): events_error.append(output.format(event))
190 if isinstance(event,Warning): events_warn.append(output.format(event))
191 if isinstance(event,Info): events_info.append(output.format(event))
192 if len(events_error) > 0:
193 validation_bool = "false"
b2c3302 @rubys Initial revision
authored
194 else:
03fe5f2 @rubys FastCGI support
authored
195 validation_bool = "true"
196
197 from datetime import datetime
198 right_now = datetime.now()
199 validationtime = str( right_now.isoformat())
200
201 body = applyTemplate('soap.tmpl', {
202 'errorlist':"\n".join( events_error), 'errorcount': str(len(events_error)),
203 'warninglist':"\n".join( events_warn), 'warningcount': str(len(events_warn)),
204 'infolist':"\n".join( events_info), 'infocount': str(len(events_info)),
205 'home_url': HOMEURL, 'url': url, 'date_time': validationtime, 'validation_bool': validation_bool
206 })
5056b26 @rubys Separate response from body
authored
207 start_response('200 OK', [('Content-type', 'application/soap+xml; charset=' + ENCODING)])
208 yield body
03fe5f2 @rubys FastCGI support
authored
209
210 except:
211 import traceback
212 tb = ''.join(apply(traceback.format_exception, sys.exc_info()))
213
214 from feedvalidator.formatter.text_xml import xmlEncode
215 start_response('500 Internal Error', [('Content-type', 'text/xml; charset=' + ENCODING)])
216
217 yield applyTemplate('fault.tmpl', {'code':sys.exc_info()[0],
218 'string':sys.exc_info()[1], 'traceback':xmlEncode(tb)})
219
220 else:
221 start_response('200 OK', [('Content-type', 'text/html; charset=' + ENCODING)])
222
223 if url or rawdata:
224 # validate
225 goon = 0
226 if rawdata:
227 # validate raw data (from text form)
0e2e12b @rubys Make Validation links for IRIs safe for copy/paste
authored
228 try:
03fe5f2 @rubys FastCGI support
authored
229 params = feedvalidator.validateString(rawdata, firstOccurrenceOnly=1)
230 events = params['loggedEvents']
231 feedType = params['feedType']
232 goon = 1
233 except ValidationFailure, vfv:
234 yield applyTemplate('header.tmpl', {'title':'Feed Validator Results: %s' % escapeURL(url)})
235 yield applyTemplate('manual.tmpl', {'rawdata':escapeURL(url)})
236 output = Formatter([vfv.event], None)
9e2f5da @rubys Actually show the error messages
authored
237 for item in yieldEventList(output):
238 yield item
03fe5f2 @rubys FastCGI support
authored
239 yield applyTemplate('error.tmpl')
0e2e12b @rubys Make Validation links for IRIs safe for copy/paste
authored
240 except:
03fe5f2 @rubys FastCGI support
authored
241 yield applyTemplate('header.tmpl', {'title':'Feed Validator Results: %s' % escapeURL(url)})
242 yield applyTemplate('manual.tmpl', {'rawdata':escapeURL(url)})
243 yield applyTemplate('error.tmpl')
244 else:
245 url = sanitizeURL(url)
246 try:
247 params = feedvalidator.validateURL(url, firstOccurrenceOnly=1, wantRawData=1)
248 events = params['loggedEvents']
249 rawdata = params['rawdata']
250 feedType = params['feedType']
251 goon = 1
252 except ValidationFailure, vfv:
253 yield applyTemplate('header.tmpl', {'title':'Feed Validator Results: %s' % escapeURL(url)})
254 yield applyTemplate('index.tmpl', {'value':escapeURL(url)})
255 output = Formatter([vfv.event], None)
9e2f5da @rubys Actually show the error messages
authored
256 for item in yieldEventList(output):
257 yield item
03fe5f2 @rubys FastCGI support
authored
258 yield applyTemplate('error.tmpl')
259 except:
260 yield applyTemplate('header.tmpl', {'title':'Feed Validator Results: %s' % escapeURL(url)})
261 yield applyTemplate('index.tmpl', {'value':escapeURL(url)})
262 yield applyTemplate('error.tmpl')
263 if goon:
264 # post-validate (will do RSS autodiscovery if needed)
265 validationData = postvalidate(url, events, rawdata, feedType)
266
267 # write output header
268 url = validationData['url']
269 feedType = validationData['feedType']
270 rawdata = validationData['rawdata']
7f6e13e @rubys Reorder the results page, based on feedback
authored
271
272 htmlUrl = escapeURL(urllib.quote(url))
273 try:
274 htmlUrl = htmlUrl.encode('idna')
275 except:
276 pass
277 docType = 'feed'
278 if feedType == TYPE_ATOM_ENTRY: docType = 'entry'
279 if feedType == TYPE_XRD: docType = 'document'
280 if feedType == TYPE_APP_CATEGORIES: docType = 'Document'
281 if feedType == TYPE_APP_SERVICE: docType = 'Document'
282 if feedType == TYPE_OPENSEARCH: docType = 'description document'
283
03fe5f2 @rubys FastCGI support
authored
284 yield applyTemplate('header.tmpl', {'title':'Feed Validator Results: %s' % escapeURL(url)})
285 if manual:
286 yield applyTemplate('manual.tmpl', {'rawdata':cgi.escape(rawdata)})
287 else:
288 yield applyTemplate('index.tmpl', {'value':escapeURL(url)})
289
290 output = validationData.get('output', None)
291
292 # print special case, if any
293 specialCase = validationData.get('specialCase', None)
294 if specialCase:
295 yield applyTemplate('%s.tmpl' % specialCase)
296
297 msc = output.mostSeriousClass()
298
299 # Explain the overall verdict
300 if msc == Error:
301 from feedvalidator.logging import ObsoleteNamespace
302 if len(output.getErrors())==1 and \
303 isinstance(output.data[0],ObsoleteNamespace):
304 yield applyTemplate('notsupported.tmpl')
b69531f @rubys Too much information confuses newbies
authored
305 elif specialCase != 'html':
03fe5f2 @rubys FastCGI support
authored
306 yield applyTemplate('invalid.tmpl')
7f6e13e @rubys Reorder the results page, based on feedback
authored
307 else:
308 yield applyTemplate('congrats.tmpl', {"feedType":FEEDTYPEDISPLAY[feedType], "graphic":VALIDFEEDGRAPHIC[feedType], "docType":docType})
309 if msc == Warning:
310 yield applyTemplate('warning.tmpl')
311 elif msc == Info:
312 yield applyTemplate('info.tmpl')
03fe5f2 @rubys FastCGI support
authored
313
314 # Print any issues, whether or not the overall feed is valid
315 if output:
b69531f @rubys Too much information confuses newbies
authored
316 if specialCase != 'html':
317 for item in yieldEventList(output):
318 yield item
03fe5f2 @rubys FastCGI support
authored
319
320 # print code listing
321 yield buildCodeListing(validationData['events'], validationData['rawdata'], url)
322
323 # As long as there were no errors, show that the feed is valid
324 if msc != Error:
325 # valid
326 yield applyTemplate('valid.tmpl', {"url":htmlUrl, "srcUrl":htmlUrl, "feedType":FEEDTYPEDISPLAY[feedType], "graphic":VALIDFEEDGRAPHIC[feedType], "HOMEURL":HOMEURL, "docType":docType})
b2c3302 @rubys Initial revision
authored
327 else:
03fe5f2 @rubys FastCGI support
authored
328 # nothing to validate, just write basic form
329 yield applyTemplate('header.tmpl', {'title':'Feed Validator for Atom and RSS'})
330 if manual:
331 yield applyTemplate('manual.tmpl', {'rawdata':''})
332 else:
333 yield applyTemplate('index.tmpl', {'value':'http://'})
334 yield applyTemplate('special.tmpl', {})
335
336 yield applyTemplate('navbar.tmpl')
337 yield applyTemplate('footer.tmpl')
338
339 if __name__ == "__main__":
340 if len(sys.argv)==1 or not sys.argv[1].isdigit():
341 def start_response(status, headers):
342 print 'Status: %s\r\n' % status,
343 for header,value in headers:
344 print '%s: %s\r\n' % (header, value),
345 print
346 for output in checker_app(os.environ, start_response):
347 print output.decode('utf-8')
348 else:
349 # export HTTP_HOST=http://feedvalidator.org/
350 # export SCRIPT_NAME=check.cgi
351 # export SCRIPT_FILENAME=/home/rubys/svn/feedvalidator/check.cgi
352 import fcgi
353 port=int(sys.argv[1])
354 fcgi.WSGIServer(checker_app, bindAddress=("127.0.0.1", port)).run()
Something went wrong with that request. Please try again.