Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100755 349 lines (298 sloc) 14.161 kB
b2c3302 @rubys Initial revision
authored
1 #!/usr/bin/env python
2 from config import *
3
f6f37fc Percent-encode URLs as parameters in banner link. Double-escape URLs in
Joseph Walton authored
4 import cgi, sys, os, urlparse, sys, re, urllib
b2c3302 @rubys Initial revision
authored
5 import cgitb
6 cgitb.enable()
7
3a86424 Decode Unicode before parsing XML, to cover cases Expat doesn't deal …
Joseph Walton authored
8 import codecs
9 ENCODING='UTF-8'
10 sys.stdout = codecs.getwriter(ENCODING)(sys.stdout)
11
a6bc7ac Fixed '?manual=1' mode and validation of POSTed feeds.
Joseph Walton authored
12 # Used for CGI parameters
13 decUTF8 = codecs.getdecoder('utf-8')
14 decW1252 = codecs.getdecoder('windows-1252')
15
b2c3302 @rubys Initial revision
authored
16 if PYDIR not in sys.path:
17 sys.path.insert(0, PYDIR)
18
19 if WEBDIR not in sys.path:
20 sys.path.insert(0, WEBDIR)
21
22 if SRCDIR not in sys.path:
23 sys.path.insert(0, SRCDIR)
24 import feedvalidator
25 from feedvalidator.logging import FEEDTYPEDISPLAY, VALIDFEEDGRAPHIC
26
607caf6 Catch and show ValidationFailure in check.cgi. Changed text_html.py
Joseph Walton authored
27 from feedvalidator.logging import Info, Warning, Error, ValidationFailure
e2af5dc @rubys This *may* complete the first pass at OpenSearch...
authored
28 from feedvalidator.logging import TYPE_ATOM_ENTRY, TYPE_OPENSEARCH
963235c @rubys Fix for [ 923703 ] CGI should not flag warnings as failure
authored
29
b2c3302 @rubys Initial revision
authored
30 def applyTemplate(templateFile, params={}):
ce08c5f Put the CSS directory URL in config. Fixed tabs -> spaces.
Joseph Walton authored
31 params['CSSURL'] = CSSURL
b2c3302 @rubys Initial revision
authored
32 fsock = open(os.path.join(WEBDIR, 'templates', templateFile))
33 data = fsock.read() % params
34 fsock.close()
03fe5f2 @rubys FastCGI support
authored
35 return data.encode('utf-8')
b2c3302 @rubys Initial revision
authored
36
37 def sanitizeURL(url):
9fe9c58 Accept feed: URIs, to make cutting and pasting from browsers easier.
Joseph Walton authored
38 # Allow feed: URIs, as described by draft-obasanjo-feed-URI-scheme-02
39 if url.lower().startswith('feed:'):
a578c76 @rubys Ensure URIs are properly escaped
authored
40 url = url[5:]
41 if url.startswith('//'):
42 url = 'http:' + url
9fe9c58 Accept feed: URIs, to make cutting and pasting from browsers easier.
Joseph Walton authored
43
a578c76 @rubys Ensure URIs are properly escaped
authored
44 if not url.split(':')[0].lower() in ['http','https']:
b2c3302 @rubys Initial revision
authored
45 url = 'http://%s' % url
46 url = url.strip()
47
48 # strip user and password
49 url = re.sub(r'^(\w*://)[-+.\w]*(:[-+.\w]+)?@', r'\1' ,url)
50
51 return url
52
a578c76 @rubys Ensure URIs are properly escaped
authored
53 def escapeURL(url):
dddaed3 @rubys Validate even on HTTP errors if the last line is rss, rdf, or atom
authored
54 parts = list(urlparse.urlparse(url))
1f968be @rubys Be more tolerant of ports
authored
55 safe = ['/', '/:@', '/', '/', '/?&=;', '/']
dddaed3 @rubys Validate even on HTTP errors if the last line is rss, rdf, or atom
authored
56 for i in range(0,len(parts)):
1f968be @rubys Be more tolerant of ports
authored
57 parts[i] = urllib.quote(urllib.unquote(parts[i]),safe[i])
54768ec @rubys Don't blow up on invalid IRIs
authored
58 url = cgi.escape(urlparse.urlunparse(parts))
59 try:
60 return url.decode('idna')
61 except:
62 return url
a578c76 @rubys Ensure URIs are properly escaped
authored
63
3a86424 Decode Unicode before parsing XML, to cover cases Expat doesn't deal …
Joseph Walton authored
64 import feedvalidator.formatter.text_html
65
03fe5f2 @rubys FastCGI support
authored
66 def buildCodeListing(events, rawdata, url):
b2c3302 @rubys Initial revision
authored
67 # print feed
68 codelines = []
69 linenum = 1
70 linesWithErrors = [e.params.get('line', 0) for e in events]
71 for line in rawdata.split('\n'):
3a86424 Decode Unicode before parsing XML, to cover cases Expat doesn't deal …
Joseph Walton authored
72 line = feedvalidator.formatter.text_html.escapeAndMark(line)
b2c3302 @rubys Initial revision
authored
73 if not line: line = ' '
74 linetype = linenum in linesWithErrors and "b" or "a"
03fe5f2 @rubys FastCGI support
authored
75 codelines.append(applyTemplate('code_listing_line.tmpl', {"line":line, "linenum":linenum, "linetype":linetype}).decode('utf-8'))
b2c3302 @rubys Initial revision
authored
76 linenum += 1
77 codelisting = "".join(codelines)
9f827f0 @rubys Escape another URI (spotter: Olivier Thereaux)
authored
78 return applyTemplate('code_listing.tmpl', {"codelisting":codelisting, "url":escapeURL(url)})
b2c3302 @rubys Initial revision
authored
79
03fe5f2 @rubys FastCGI support
authored
80 def yieldEventList(output):
72325a6 @rubys Visually separate errors and warnings
authored
81 errors, warnings = output.getErrors(), output.getWarnings()
82
03fe5f2 @rubys FastCGI support
authored
83 yield output.header()
72325a6 @rubys Visually separate errors and warnings
authored
84 for o in output.getErrors():
97b578f @rubys More unicode fixes
authored
85 yield o.encode('utf-8')
72325a6 @rubys Visually separate errors and warnings
authored
86 if errors and warnings:
03fe5f2 @rubys FastCGI support
authored
87 yield output.footer()
da8c1cb @rubys pluralization support
authored
88 if len(warnings) == 1:
03fe5f2 @rubys FastCGI support
authored
89 yield applyTemplate('andwarn1.tmpl')
da8c1cb @rubys pluralization support
authored
90 else:
03fe5f2 @rubys FastCGI support
authored
91 yield applyTemplate('andwarn2.tmpl')
92 yield output.header()
72325a6 @rubys Visually separate errors and warnings
authored
93 for o in output.getWarnings():
ca71626 @rubys Unicode error
authored
94 yield o.encode('utf-8')
03fe5f2 @rubys FastCGI support
authored
95 yield output.footer()
607caf6 Catch and show ValidationFailure in check.cgi. Changed text_html.py
Joseph Walton authored
96
97 from feedvalidator.formatter.text_html import Formatter
98
b2c3302 @rubys Initial revision
authored
99 def postvalidate(url, events, rawdata, feedType, autofind=1):
100 """returns dictionary including 'url', 'events', 'rawdata', 'output', 'specialCase', 'feedType'"""
101 # filter based on compatibility level
102 from feedvalidator import compatibility
103 filterFunc = compatibility.AA # hardcoded for now
104 events = filterFunc(events)
105
106 specialCase = None
107 formattedOutput = Formatter(events, rawdata)
108 if formattedOutput:
109 # check for special cases
110 specialCase = compatibility.analyze(events, rawdata)
111 if (specialCase == 'html') and autofind:
112 try:
963235c @rubys Fix for [ 923703 ] CGI should not flag warnings as failure
authored
113 try:
b2c3302 @rubys Initial revision
authored
114 import feedfinder
21cb7f9 @rubys Upgrade feedfinder
authored
115 class NotARobot:
03fe5f2 @rubys FastCGI support
authored
116 base=url
21cb7f9 @rubys Upgrade feedfinder
authored
117 def get(self, url):
03fe5f2 @rubys FastCGI support
authored
118 if url == self.base: return rawdata
21cb7f9 @rubys Upgrade feedfinder
authored
119 return urllib.urlopen(url).read()
120 feedfinder._gatekeeper = NotARobot()
b2c3302 @rubys Initial revision
authored
121 rssurls = feedfinder.getFeeds(url)
963235c @rubys Fix for [ 923703 ] CGI should not flag warnings as failure
authored
122 except:
123 rssurls = [url]
b2c3302 @rubys Initial revision
authored
124 if rssurls:
125 url = rssurls[0]
126 params = feedvalidator.validateURL(url, firstOccurrenceOnly=1, wantRawData=1)
127 events = params['loggedEvents']
128 rawdata = params['rawdata']
129 feedType = params['feedType']
130 return postvalidate(url, events, rawdata, feedType, autofind=0)
131 except:
132 pass
133
134 return {"url":url, "events":events, "rawdata":rawdata, "output":formattedOutput, "specialCase":specialCase, "feedType":feedType}
135
03fe5f2 @rubys FastCGI support
authored
136 def checker_app(environ, start_response):
137
138 method = environ['REQUEST_METHOD'].lower()
139 contentType = environ.get('CONTENT_TYPE', None)
140 output_option = ''
141
142 if (method == 'get') or (contentType and cgi.parse_header(contentType)[0].lower() == 'application/x-www-form-urlencoded'):
143 fs = cgi.FieldStorage(fp=environ.get('wsgi.input',None), environ=environ)
144 url = fs.getvalue("url") or ''
145 try:
146 if url: url = url.decode('utf-8').encode('idna')
147 except:
148 pass
149 manual = fs.getvalue("manual") or 0
150 rawdata = fs.getvalue("rawdata") or ''
151 output_option = fs.getvalue("output") or ''
152
153 # XXX Should use 'charset'
154 try:
155 rawdata = decUTF8(rawdata)[0]
156 except UnicodeError:
157 rawdata = decW1252(rawdata)[0]
158
159 rawdata = rawdata[:feedvalidator.MAXDATALENGTH].replace('\r\n', '\n').replace('\r', '\n')
160 else:
161 url = None
162 manual = None
163 rawdata = None
164
165 if (output_option == "soap12"):
166 # SOAP
167 try:
168 if ((method == 'post') and (not rawdata)):
169 params = feedvalidator.validateStream(sys.stdin, contentType=contentType)
170 elif rawdata :
171 params = feedvalidator.validateString(rawdata, firstOccurrenceOnly=1)
172 elif url:
173 url = sanitizeURL(url)
174 params = feedvalidator.validateURL(url, firstOccurrenceOnly=1, wantRawData=1)
a6bc7ac Fixed '?manual=1' mode and validation of POSTed feeds.
Joseph Walton authored
175
03fe5f2 @rubys FastCGI support
authored
176 events = params['loggedEvents']
177 feedType = params['feedType']
a6bc7ac Fixed '?manual=1' mode and validation of POSTed feeds.
Joseph Walton authored
178
03fe5f2 @rubys FastCGI support
authored
179 # filter based on compatibility level
180 from feedvalidator import compatibility
181 filterFunc = compatibility.AA # hardcoded for now
182 events = filterFunc(events)
b2c3302 @rubys Initial revision
authored
183
03fe5f2 @rubys FastCGI support
authored
184 events_error = list()
185 events_warn = list()
186 events_info = list()
187
188
189 # format as xml
190 from feedvalidator.formatter.text_xml import Formatter as xmlformat
191 output = xmlformat(events)
192
193 for event in events:
194 if isinstance(event,Error): events_error.append(output.format(event))
195 if isinstance(event,Warning): events_warn.append(output.format(event))
196 if isinstance(event,Info): events_info.append(output.format(event))
197 if len(events_error) > 0:
198 validation_bool = "false"
b2c3302 @rubys Initial revision
authored
199 else:
03fe5f2 @rubys FastCGI support
authored
200 validation_bool = "true"
201
202 from datetime import datetime
203 right_now = datetime.now()
204 validationtime = str( right_now.isoformat())
205
206 body = applyTemplate('soap.tmpl', {
207 'errorlist':"\n".join( events_error), 'errorcount': str(len(events_error)),
208 'warninglist':"\n".join( events_warn), 'warningcount': str(len(events_warn)),
209 'infolist':"\n".join( events_info), 'infocount': str(len(events_info)),
210 'home_url': HOMEURL, 'url': url, 'date_time': validationtime, 'validation_bool': validation_bool
211 })
212 yield 'Content-type: application/soap+xml; charset=' + ENCODING + '\r\n\r\n' + body
213
214 except:
215 import traceback
216 tb = ''.join(apply(traceback.format_exception, sys.exc_info()))
217
218 from feedvalidator.formatter.text_xml import xmlEncode
219 start_response('500 Internal Error', [('Content-type', 'text/xml; charset=' + ENCODING)])
220
221 yield applyTemplate('fault.tmpl', {'code':sys.exc_info()[0],
222 'string':sys.exc_info()[1], 'traceback':xmlEncode(tb)})
223
224 else:
225 start_response('200 OK', [('Content-type', 'text/html; charset=' + ENCODING)])
226
227 if url or rawdata:
228 # validate
229 goon = 0
230 if rawdata:
231 # validate raw data (from text form)
0e2e12b @rubys Make Validation links for IRIs safe for copy/paste
authored
232 try:
03fe5f2 @rubys FastCGI support
authored
233 params = feedvalidator.validateString(rawdata, firstOccurrenceOnly=1)
234 events = params['loggedEvents']
235 feedType = params['feedType']
236 goon = 1
237 except ValidationFailure, vfv:
238 yield applyTemplate('header.tmpl', {'title':'Feed Validator Results: %s' % escapeURL(url)})
239 yield applyTemplate('manual.tmpl', {'rawdata':escapeURL(url)})
240 output = Formatter([vfv.event], None)
241 yieldEventList(output)
242 yield applyTemplate('error.tmpl')
0e2e12b @rubys Make Validation links for IRIs safe for copy/paste
authored
243 except:
03fe5f2 @rubys FastCGI support
authored
244 yield applyTemplate('header.tmpl', {'title':'Feed Validator Results: %s' % escapeURL(url)})
245 yield applyTemplate('manual.tmpl', {'rawdata':escapeURL(url)})
246 yield applyTemplate('error.tmpl')
247 else:
248 url = sanitizeURL(url)
249 try:
250 params = feedvalidator.validateURL(url, firstOccurrenceOnly=1, wantRawData=1)
251 events = params['loggedEvents']
252 rawdata = params['rawdata']
253 feedType = params['feedType']
254 goon = 1
255 except ValidationFailure, vfv:
256 yield applyTemplate('header.tmpl', {'title':'Feed Validator Results: %s' % escapeURL(url)})
257 yield applyTemplate('index.tmpl', {'value':escapeURL(url)})
258 output = Formatter([vfv.event], None)
259 yieldEventList(output)
260 yield applyTemplate('error.tmpl')
261 except:
262 yield applyTemplate('header.tmpl', {'title':'Feed Validator Results: %s' % escapeURL(url)})
263 yield applyTemplate('index.tmpl', {'value':escapeURL(url)})
264 yield applyTemplate('error.tmpl')
265 if goon:
266 # post-validate (will do RSS autodiscovery if needed)
267 validationData = postvalidate(url, events, rawdata, feedType)
268
269 # write output header
270 url = validationData['url']
271 feedType = validationData['feedType']
272 rawdata = validationData['rawdata']
273 yield applyTemplate('header.tmpl', {'title':'Feed Validator Results: %s' % escapeURL(url)})
274 if manual:
275 yield applyTemplate('manual.tmpl', {'rawdata':cgi.escape(rawdata)})
276 else:
277 yield applyTemplate('index.tmpl', {'value':escapeURL(url)})
278
279 output = validationData.get('output', None)
280
281 # print special case, if any
282 specialCase = validationData.get('specialCase', None)
283 if specialCase:
284 yield applyTemplate('%s.tmpl' % specialCase)
285
286 msc = output.mostSeriousClass()
287
288 # Explain the overall verdict
289 if msc == Error:
290 from feedvalidator.logging import ObsoleteNamespace
291 if len(output.getErrors())==1 and \
292 isinstance(output.data[0],ObsoleteNamespace):
293 yield applyTemplate('notsupported.tmpl')
294 else:
295 yield applyTemplate('invalid.tmpl')
296 elif msc == Warning:
297 yield applyTemplate('warning.tmpl')
298 elif msc == Info:
299 yield applyTemplate('info.tmpl')
300
301 # Print any issues, whether or not the overall feed is valid
302 if output:
303 for item in yieldEventList(output):
304 yield item
305
306 # print code listing
307 yield buildCodeListing(validationData['events'], validationData['rawdata'], url)
308
309 # As long as there were no errors, show that the feed is valid
310 if msc != Error:
311 # valid
312 htmlUrl = escapeURL(urllib.quote(url))
313 try:
314 htmlUrl = htmlUrl.encode('idna')
315 except:
316 pass
317 docType = 'feed'
318 if feedType == TYPE_ATOM_ENTRY: docType = 'entry'
319 if feedType == TYPE_OPENSEARCH: docType = 'description document'
320 yield applyTemplate('valid.tmpl', {"url":htmlUrl, "srcUrl":htmlUrl, "feedType":FEEDTYPEDISPLAY[feedType], "graphic":VALIDFEEDGRAPHIC[feedType], "HOMEURL":HOMEURL, "docType":docType})
b2c3302 @rubys Initial revision
authored
321 else:
03fe5f2 @rubys FastCGI support
authored
322 # nothing to validate, just write basic form
323 yield applyTemplate('header.tmpl', {'title':'Feed Validator for Atom and RSS'})
324 if manual:
325 yield applyTemplate('manual.tmpl', {'rawdata':''})
326 else:
327 yield applyTemplate('index.tmpl', {'value':'http://'})
328 yield applyTemplate('special.tmpl', {})
329
330 yield applyTemplate('navbar.tmpl')
331 yield applyTemplate('footer.tmpl')
332
333 if __name__ == "__main__":
334 if len(sys.argv)==1 or not sys.argv[1].isdigit():
335 def start_response(status, headers):
336 print 'Status: %s\r\n' % status,
337 for header,value in headers:
338 print '%s: %s\r\n' % (header, value),
339 print
340 for output in checker_app(os.environ, start_response):
341 print output.decode('utf-8')
342 else:
343 # export HTTP_HOST=http://feedvalidator.org/
344 # export SCRIPT_NAME=check.cgi
345 # export SCRIPT_FILENAME=/home/rubys/svn/feedvalidator/check.cgi
346 import fcgi
347 port=int(sys.argv[1])
348 fcgi.WSGIServer(checker_app, bindAddress=("127.0.0.1", port)).run()
Something went wrong with that request. Please try again.