diff --git a/sitetest/core/models.py b/sitetest/core/models.py index a0eef77..8ba11e2 100644 --- a/sitetest/core/models.py +++ b/sitetest/core/models.py @@ -55,7 +55,7 @@ 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive'} -USE_REQUESTS = True +USE_REQUESTS = False import ssl from functools import wraps @@ -545,7 +545,7 @@ def __init__(self, url, set, verbose=False): self.iframe_links = {} self.screenshots = {} # self.xhr_links = {} - self.url = self.ending_url = url + self.url = self.starting_url = self.ending_url = url parsed = urlparse.urlparse(url) name, extension = os.path.splitext(parsed.path) self.starting_type = self.ending_type = set.get_link_type(url) @@ -558,6 +558,7 @@ def __init__(self, url, set, verbose=False): self.html = None self.title = url self.redirect_path = None + self.has_sitemap_entry = False self.dequeried_url = clear_query_string(self.url) self.use_basic_auth = set.use_basic_auth @@ -701,7 +702,7 @@ def is_200(self): @property def is_redirect_page(self): - return (self.url != self.ending_url) + return (self.starting_url != self.ending_url) @@ -1190,7 +1191,7 @@ def trace_path_with_requests(url, is_internal, traced, enable_cookies = False, d #-- authorization #Don't verify cert here if we're testing the site. We'll test that on a separate step. - verify_cert = !is_internal + verify_cert = not is_internal if session: response = session.get(url, headers=HEADERS, allow_redirects=False, verify=verify_cert, timeout=10) @@ -1235,7 +1236,7 @@ def trace_path_with_requests(url, is_internal, traced, enable_cookies = False, d if enable_cookies: response_data['pickled_cookies'] = pickle.dumps(session.cookies._cookies) - + response_data['request_headers'] = response.request.headers traced.append(response_data) @@ -1277,34 +1278,31 @@ def trace_path_with_urllib2(url, is_internal, traced, enable_cookies = False, de traced[-1]['error'] = "Redirect loop detected to %s"%(url) return traced - - if enable_cookies: + use_auth = (auth=='basic' and is_internal) + if enable_cookies or use_auth: if not cj: - cj = cookielib.CookieJar() - + cj = cookielib.CookieJar() - use_password = False - if auth=='basic' and is_internal: - use_password = True + if use_auth: password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm() password_manager.add_password(None, url, username, password) password_handler = urllib2.HTTPBasicAuthHandler(password_manager) - if enable_cookies: - if use_password: + if cj: + if use_auth: opener = urllib2.build_opener(NoRedirection, urllib2.HTTPCookieProcessor(cj), password_handler) else: opener = urllib2.build_opener(NoRedirection, urllib2.HTTPCookieProcessor(cj)) else: - if use_password: - + if use_auth: opener = urllib2.build_opener(NoRedirection, password_handler) else: opener = urllib2.build_opener(NoRedirection) request = urllib2.Request(url, headers=HEADERS) + response = None response_data = { @@ -1375,6 +1373,8 @@ def trace_path_with_urllib2(url, is_internal, traced, enable_cookies = False, de if enable_cookies: response_data['pickled_cookies'] = pickle.dumps(cj._cookies) + response_data['request_headers'] = request.headers + traced.append(response_data) has_redirect = response_data['redirect']!=None @@ -1397,6 +1397,7 @@ def parse_trace_response(response, response_data, code, response_header, start_t def parse_trace_response_with_requests(response, response_data, code, response_header, start_time): end_time = datetime.datetime.now() + response_data['response_headers'] = response_header response_data['response_code'] = code response_data['response_content_type'] = response_header.get('Content-Type') response_data['response_encoding'] = response_header.get('Content-Encoding') @@ -1415,8 +1416,9 @@ def parse_trace_response_with_requests(response, response_data, code, response_h response_data['redirect'] = response_data['ending_url'] def parse_trace_response_with_urllib2(response, response_data, code, response_header, start_time): - + end_time = datetime.datetime.now() + response_data['response_headers'] = dict(response_header) response_data['response_code'] = code response_data['response_content_type'] = response_header.getheader('Content-Type') response_data['response_encoding'] = response_header.getheader('Content-Encoding') diff --git a/sitetest/core/sitemap.py b/sitetest/core/sitemap.py index 589e88f..9e206d0 100755 --- a/sitetest/core/sitemap.py +++ b/sitetest/core/sitemap.py @@ -59,6 +59,7 @@ def process_sitemap(self, sitemap_link): if not sitemap_link.content: return + xml = sitemap_link.content.encode('utf-8') parser = lxml.etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8') tree = lxml.etree.fromstring(xml, parser=parser) @@ -75,6 +76,6 @@ def process_sitemap(self, sitemap_link): url = loc.text.strip() self.urls.append(url) - # print 'Found url %s'%(url) + # print 'Found url (%s)'%(url) link = self.set.get_or_create_link_object(url, sitemap_link) \ No newline at end of file diff --git a/sitetest/templates/partials/result_title.html b/sitetest/templates/partials/result_title.html index 0f2999a..ff90df5 100644 --- a/sitetest/templates/partials/result_title.html +++ b/sitetest/templates/partials/result_title.html @@ -1,7 +1,7 @@

- {% if link.response_code %} + {% if link.response_code %} {% if link.response_code == 200 %} {{link.response_code}} {% elif link.response_code == "Unknown" %} @@ -9,12 +9,15 @@

{% else %} {{link.response_code}} {% endif %} - {% else %} + {% else %} N/A - {% endif %} + {% endif %} + + {% if link.is_redirect_page %} Redirected {% endif %} +   - {% if link.is_redirect_page %}Redirected {% endif %}{% if link.title %}{{link.title}}

{% endif %} - {{link.url|truncate(100, True)}}{% if link.url != link.ending_url %} > {{link.ending_url|truncate(100, True)}} {% endif %}
+ {% if link.title %}{{link.title}}

{% endif %} + {{link.starting_url|truncate(100, True)}}{% if link.starting_url != link.ending_url %} > {{link.ending_url|truncate(100, True)}} {% endif %}
diff --git a/sitetest/templates/result_full.html b/sitetest/templates/result_full.html index 40f1443..b1eda6c 100644 --- a/sitetest/templates/result_full.html +++ b/sitetest/templates/result_full.html @@ -160,6 +160,37 @@

There are {{link.script_links|length}} javascript files linked in this page< {% if link.enumerated_source %}
+ + + {% for item in link.redirect_path %} + +

{{item.response_code}} {{item.url}} [{{item.response_content_type}}] - {{item.response_load_time}} ms

+
+
+
Request Headers
+
+ {% for key, value in item.request_headers.iteritems() %} +
{{ key }}
+
{{ value }}
+ {% endfor %} +
+
+
+
Response Headers
+
+ {% for key, value in item.response_headers.iteritems() %} +
{{ key }}
+
{{ value }}
+ {% endfor %} +
+
+
+ +
+ + {% endfor %} + +
{{link.enumerated_source|e}}
{% endif %} diff --git a/sitetest/templates/results_base.html b/sitetest/templates/results_base.html index f77a2ae..93b80af 100644 --- a/sitetest/templates/results_base.html +++ b/sitetest/templates/results_base.html @@ -98,6 +98,45 @@ font-weight: normal; } + .box{ + width:100%; + overflow:hidden; + } + .box .inner{ + width:50%; + float:left; + padding-right: 20px; + } + .box .inner:last-child{ + padding-right:0; + } + dl { + display: block; + padding: 10px; + margin: 0 0 10px; + color: #333; + word-break: break-all; + word-wrap: break-word; + background-color: #f5f5f5; + border: 1px solid #ccc; + border-radius: 4px; + } + dt { + float: left; + clear: left; + width: 25%; + margin-right:7px; + text-align: right; + font-weight: bold; + } + dt:after { + content: ":"; + } + dd { + padding: 0 0 0.5em 0; + border-bottom: 1px solid #fff; + } + diff --git a/sitetest/tests/basic_site_quality.py b/sitetest/tests/basic_site_quality.py index ad4502d..8d3f6e5 100644 --- a/sitetest/tests/basic_site_quality.py +++ b/sitetest/tests/basic_site_quality.py @@ -52,13 +52,13 @@ def test_basic_site_quality(site, verbose=False): # #5 - Verify that sitemap matches up with the actual pages - # if sitemap_link.response_code == 200: if len(sitemap_links) > 0: for sitemap_link in sitemap_links: for link_url in sitemap_link.hyper_links: link_item = sitemap_link.hyper_links[link_url] link_item.has_sitemap_entry = True + # #6 - Verify that no public pages are blocked by robots.txt rp = robotparser.RobotFileParser() diff --git a/sitetest/tests/pagespeed.py b/sitetest/tests/pagespeed.py index c0f66e5..e575172 100644 --- a/sitetest/tests/pagespeed.py +++ b/sitetest/tests/pagespeed.py @@ -42,8 +42,12 @@ def test_pagespeed(set, credentials, options, max_test_count=1000, verbose=False else: url = link.url + + testing_url = 'https://www.googleapis.com/pagespeedonline/v1/runPagespeed?url=%s&key=%s'%(url, API_KEY) + # print testing_url + request = urllib2.Request(testing_url) response = urllib2.urlopen(request)