Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Fix some test issues.

  • Loading branch information...
commit ab2d1a807d901ba0ae7874115272f85fcab8e621 1 parent ec41167
@carljm carljm authored
View
1  requirements/pure.txt
@@ -6,3 +6,4 @@ w3lib==1.2
zope.interface==4.0.1
argparse==1.2.1
cssutils==0.9.10b1
+django-inmemorystorage==0.1.1
View
2  runtests.py
@@ -11,7 +11,7 @@
from spade.vendor import add_vendor_lib
def main():
- os.environ.setdefault("DJANGO_SETTINGS_MODULE", "spade.settings.default")
+ os.environ["DJANGO_SETTINGS_MODULE"] = "spade.tests.settings"
add_vendor_lib()
args = sys.argv[1:]
if not args:
View
101 spade/model/models.py
@@ -3,6 +3,7 @@
"""
from datetime import datetime
+
from django.db import models
@@ -38,8 +39,8 @@ def js_filename(instance, filename):
""" Models for scraper """
-class UserAgent(models.Model):
- """ A user-agent string we will use for scanning. """
+class BaseUserAgent(models.Model):
+ """Base common class for UserAgent and BatchUserAgent."""
DESKTOP = 0
MOBILE = 1
UA_TYPE_CHOICES = (
@@ -56,6 +57,14 @@ class UserAgent(models.Model):
def __unicode__(self):
return self.ua_string
+ class Meta:
+ abstract = True
+
+
+class UserAgent(BaseUserAgent):
+ """ A user-agent string we will use for scanning. """
+ pass
+
class Batch(models.Model):
""" A batch of sites scanned in one run. """
@@ -75,27 +84,16 @@ class Meta:
verbose_name_plural = u"Batches"
-class BatchUserAgent(models.Model):
- """ A user agent from a given batch """
- batch = models.ForeignKey(Batch)
-
- # The following clones the UserAgent model, so that we can retain history
- # for scan UAs while allowing the user to add/remove/modify user agents
- DESKTOP = 0
- MOBILE = 1
- UA_TYPE_CHOICES = (
- (DESKTOP, 'desktop'),
- (MOBILE, 'mobile'),
- )
+class BatchUserAgent(BaseUserAgent):
+ """
+ A user agent from a given batch.
- ua_string = models.CharField(max_length=250)
- ua_type = models.IntegerField(max_length=1,
- choices=UA_TYPE_CHOICES,
- default=DESKTOP)
- primary_ua = models.BooleanField(default=False)
+ Clones the UserAgent model, so that we can retain history for scan UAs
+ while allowing the user to add/remove/modify user agents for future
+ batches.
- def __unicode__(self):
- return self.ua_string
+ """
+ batch = models.ForeignKey(Batch)
class Meta:
unique_together = [("batch", "ua_string")]
@@ -172,7 +170,7 @@ class LinkedCSS(models.Model):
max_length=500, upload_to=css_filename)
def __unicode__(self):
- return self.raw_css.name
+ return self.url
class Meta:
verbose_name_plural = "Linked CSS"
@@ -188,7 +186,7 @@ class LinkedJS(models.Model):
max_length=500, upload_to=js_filename)
def __unicode__(self):
- return self.raw_js.name
+ return self.url
class Meta:
verbose_name_plural = "Linked JS"
@@ -225,18 +223,19 @@ class BatchData(models.Model):
batch = models.OneToOneField(Batch)
# Other metrics
- num_rules = models.IntegerField(max_length=50)
- num_properties = models.IntegerField(max_length=50)
- scanned_pages = models.IntegerField(max_length=50)
+ num_rules = models.IntegerField()
+ num_properties = models.IntegerField()
+ scanned_pages = models.IntegerField()
# Aggregate number of css issues from all scans in all user agents
- css_issues = models.IntegerField(max_length=50)
+ css_issues = models.IntegerField()
# Aggregate number of UA issues in this batch
- ua_issues = models.IntegerField(max_length=50)
+ ua_issues = models.IntegerField()
def __unicode__(self):
- return u"'Scan batch has ({0}) css issues and ({1}) ua issues".format(self.css_issues, self.ua_issues)
+ return u"'{0}' has ({1}) css issues and ({2}) ua issues".format(
+ self.batch, self.css_issues, self.ua_issues)
class SiteScanData(models.Model):
@@ -244,19 +243,20 @@ class SiteScanData(models.Model):
sitescan = models.OneToOneField(SiteScan)
# Other metrics
- num_rules = models.IntegerField(max_length=50)
- num_properties = models.IntegerField(max_length=50)
- scanned_pages = models.IntegerField(max_length=50)
+ num_rules = models.IntegerField()
+ num_properties = models.IntegerField()
+ scanned_pages = models.IntegerField()
# Aggregate number of css issues from all scans in all user agents
- css_issues = models.IntegerField(max_length=50)
+ css_issues = models.IntegerField()
# Aggregate number of sniffing issues detected in this site scan
- ua_issues = models.IntegerField(max_length=50)
+ ua_issues = models.IntegerField()
def __unicode__(self):
- return u"'Site scanned has ({0}) css issues and ({1}) ua issues".format(self.css_issues, self.ua_issues)
+ return u"'{0}' has ({1}) css issues and ({2}) ua issues".format(
+ self.sitescan, self.css_issues, self.ua_issues)
class URLScanData(models.Model):
@@ -267,19 +267,20 @@ class URLScanData(models.Model):
urlscan = models.OneToOneField(URLScan)
# Other metrics
- num_rules = models.IntegerField(max_length=50)
- num_properties = models.IntegerField(max_length=50)
- scanned_pages = models.IntegerField(max_length=50)
+ num_rules = models.IntegerField()
+ num_properties = models.IntegerField()
+ scanned_pages = models.IntegerField()
# Aggregate css_issues from all linked css stylesheets
- css_issues = models.IntegerField(max_length=50)
+ css_issues = models.IntegerField()
# If the url scan had a user agent issue (recognized non-primary mobile ua
# but not the primary mobile ua)
- ua_issues = models.BooleanField(default=False)
+ ua_issue = models.BooleanField(default=False)
def __unicode__(self):
- return u"'URL scanned has ({0}) css issues and ({1}) ua issues".format(self.css_issues, self.ua_issues)
+ return u"'{0}' has ({1}) css issues".format(
+ self.urlscan, self.css_issues)
class URLContentData(models.Model):
@@ -290,15 +291,15 @@ class URLContentData(models.Model):
urlcontent = models.OneToOneField(URLContent)
# Other metrics
- num_rules = models.IntegerField(max_length=50)
- num_properties = models.IntegerField(max_length=50)
+ num_rules = models.IntegerField()
+ num_properties = models.IntegerField()
# Aggregate css_issues from all linked css stylesheets
css_issues = models.IntegerField(max_length=50)
def __unicode__(self):
- return u"'Page scanned with user agent '{0}' has ({1}) css issues".format(
- self.urlcontent.user_agent, self.css_issues)
+ return u"{0} has ({1}) css issues".format(
+ self.urlcontent, self.css_issues)
class LinkedCSSData(models.Model):
@@ -306,12 +307,12 @@ class LinkedCSSData(models.Model):
linked_css = models.OneToOneField(LinkedCSS)
# These seem to be useful statistics to collect that we can drill down to
- num_rules = models.IntegerField(max_length=50)
- num_properties = models.IntegerField(max_length=50)
+ num_rules = models.IntegerField()
+ num_properties = models.IntegerField()
# Number of places where a rule used a prefixed property but no moz prefix
- css_issues = models.IntegerField(max_length=50)
+ css_issues = models.IntegerField()
def __unicode__(self):
- return u"'Linked CSS stylesheet has ({0}) css issues".format(
- self.css_issues)
+ return u"'{0}' has ({1}) css issues".format(
+ self.linked_css, self.css_issues)
View
23 spade/tests/model/factories.py
@@ -1,15 +1,16 @@
# Factories for testing objects
-import factory
-
from datetime import datetime
-from django.utils.timezone import utc
from hashlib import sha256
+
+from django.core.files.uploadedfile import SimpleUploadedFile
+from django.utils.timezone import utc
+import factory
+
from spade.model import models
MOCK_DATE = datetime(2012, 6, 29, 21, 10, 24, 10848, tzinfo=utc)
-MOCK_CSS_URL = (u"http://www.sammyliu.com/wp-content/themes/polaroid-perfect/"
- u"style.css")
-MOCK_JS_URL = u"http://code.jquery.com/jquery-1.7.2.min.js"
+MOCK_CSS_URL = u"http://example.com/test.css"
+MOCK_JS_URL = u"http://example.com/test.js"
class BatchFactory(factory.Factory):
@@ -54,7 +55,8 @@ class URLContentFactory(factory.Factory):
FACTORY_FOR = models.URLContent
url_scan = factory.SubFactory(URLScanFactory)
user_agent = factory.SubFactory(BatchUserAgentFactory)
- raw_markup = u"<html>hello world</html>"
+ raw_markup = SimpleUploadedFile(
+ "test.html", "<html>hello world</html>", "text/html")
headers = u""
@@ -64,7 +66,7 @@ class LinkedCSSFactory(factory.Factory):
batch = factory.SubFactory(BatchFactory)
url = MOCK_CSS_URL
url_hash = sha256(MOCK_CSS_URL).hexdigest()
- raw_css = u"body{color:#000}"
+ raw_css = SimpleUploadedFile("test.css", "body{color:#000}", "text/css")
class LinkedJSFactory(factory.Factory):
@@ -73,7 +75,8 @@ class LinkedJSFactory(factory.Factory):
batch = factory.SubFactory(BatchFactory)
url = MOCK_JS_URL
url_hash = sha256(MOCK_JS_URL).hexdigest()
- raw_js = u"document.write('hello world')"
+ raw_js = SimpleUploadedFile(
+ "test.js", "document.write('hello world')", "application/javascript")
class CSSRuleFactory(factory.Factory):
@@ -128,7 +131,7 @@ class URLScanDataFactory(factory.Factory):
num_properties = 10
scanned_pages = 1
css_issues = 3
- ua_issues = 1
+ ua_issue = True
class URLContentDataFactory(factory.Factory):
View
2  spade/tests/model/test_batch_data.py
@@ -17,4 +17,4 @@ def test_unicode():
batchdata = factories.BatchDataFactory.create(
batch=batch, num_rules=1, num_properties=2, css_issues=3, ua_issues=4)
- assert unicode(batchdata) == u"'Scan batch has (3) css issues and (4) ua issues"
+ assert unicode(batchdata) == u"''Batch started at 2012-06-29 21:10:24.010848+00:00' has (3) css issues and (4) ua issues"
View
4 spade/tests/model/test_linkedcontent.py
@@ -6,9 +6,9 @@
def test_css_unicode():
linkedcss = factories.LinkedCSSFactory()
- assert unicode(linkedcss) == u"body{color:#000}"
+ assert unicode(linkedcss) == u"http://example.com/test.css"
def test_js_unicode():
linkedjs = factories.LinkedJSFactory()
- assert unicode(linkedjs) == u"document.write('hello world')"
+ assert unicode(linkedjs) == u"http://example.com/test.js"
View
2  spade/tests/model/test_linkedcss_data.py
@@ -12,5 +12,5 @@ def test_unicode():
linkedcss_data = factories.LinkedCSSDataFactory.create(
linked_css=linkedcss, css_issues=3)
- assert unicode(linkedcss_data) == u"'Linked CSS stylesheet has (3) css issues"
+ assert unicode(linkedcss_data) == u"'http://example.com/test.css' has (3) css issues"
View
4 spade/tests/model/test_sitescan_data.py
@@ -13,5 +13,5 @@ def test_unicode():
css_issues=3,
ua_issues=4)
- assert unicode(sitescan_data) == (u"'Site scanned has (3) css issues and"
- u" (4) ua issues")
+ assert unicode(sitescan_data) == (
+ u"'http://www.mozilla.com' has (3) css issues and (4) ua issues")
View
4 spade/tests/model/test_urlcontent_data.py
@@ -14,5 +14,5 @@ def test_unicode():
urlcontent_data = factories.URLContentDataFactory.create(
urlcontent=urlcontent, css_issues=3)
- assert unicode(urlcontent_data) == (u"'Page scanned with user agent "
- u"'Mozilla / 5.0' has (3) css issues")
+ assert unicode(urlcontent_data) == (
+ u"'http://www.mozilla.com' scanned with 'Mozilla / 5.0' has (3) css issues")
View
8 spade/tests/model/test_urlscan_data.py
@@ -10,8 +10,8 @@ def test_unicode():
urlscan = factories.URLScanFactory.create()
urlscan_data = factories.URLScanDataFactory.create(urlscan=urlscan,
- css_issues=3,
- ua_issues=4)
+ css_issues=3,
+ ua_issue=True)
- assert unicode(urlscan_data) == (u"'URL scanned has (3) css issues and (4) "
- u"ua issues")
+ assert unicode(urlscan_data) == (
+ u"'http://www.mozilla.com' has (3) css issues")
View
3  spade/tests/settings.py
@@ -0,0 +1,3 @@
+from spade.settings.default import *
+
+DEFAULT_FILE_STORAGE = "inmemorystorage.InMemoryStorage"
View
20 spade/utils/data_aggregator.py
@@ -103,7 +103,7 @@ def aggregate_batch(self, batch):
# Aggregate data for each sitescan
for sitescan in sitescans:
- sitescan_data = aggregate_sitescan(sitescan)
+ sitescan_data = self.aggregate_sitescan(sitescan)
total_rules += sitescan_data.num_rules
total_properties += sitescan_data.num_properties
total_pages_scanned += sitescan_data.scanned_pages
@@ -139,12 +139,13 @@ def aggregate_sitescan(self, sitescan):
# Aggregate data for each urlscan
for urlscan in urlscans:
- urlscan_data = aggregate_urlscan(urlscan)
+ urlscan_data = self.aggregate_urlscan(urlscan)
total_rules += urlscan_data.num_rules
total_properties += urlscan_data.num_properties
total_pages_scanned += urlscan_data.scanned_pages
total_css_issues += urlscan_data.css_issues
- total_ua_issues += urlscan_data.ua_issues
+ if urlscan_data.ua_issue:
+ total_ua_issues += 1
# Actually update the sitescan field
sitescandata = models.SiteScanData.objects.create(sitescan=sitescan)
@@ -168,7 +169,7 @@ def aggregate_urlscan(self, urlscan):
total_properties = 0
total_pages_scanned = 0
total_css_issues = 0
- total_ua_issues = 0
+ ua_issue = False
# TODO: determine # pages scanned by counting urlcontents belonging to
# this urlscan belonging to a single ua? or all? how??
@@ -178,13 +179,13 @@ def aggregate_urlscan(self, urlscan):
# Detect user agent sniffing issues via the class function
if self.detect_ua_issue(urlscan):
- total_ua_issues += 1
+ ua_issue = True
# Aggregate data for each urlcontent
# TODO: add a filter that uses user_agent so that we can aggregate
# data to only particular user agents rather than all user agents
for urlcontent in urlcontents:
- urlcontent_data = aggregate_urlcontent(urlcontent)
+ urlcontent_data = self.aggregate_urlcontent(urlcontent)
total_rules += urlcontent_data.num_rules
total_properties += urlcontent_data.num_properties
total_css_issues += urlcontent_data.css_issues
@@ -195,7 +196,7 @@ def aggregate_urlscan(self, urlscan):
urlscandata.num_properties = total_properties
urlscandata.scanned_pages = total_pages_scanned
urlscandata.css_issues = total_css_issues
- urlscandata.ua_issues = total_ua_issues
+ urlscandata.ua_issue = ua_issue
urlscandata.save()
return urlscandata
@@ -213,7 +214,7 @@ def aggregate_urlcontent(self, urlcontent):
# Aggregate data for each linked css stylesheet
for linkedcss in linkedstyles:
- linkedcss_data = aggregate_linkedcss(linkedcss)
+ linkedcss_data = self.aggregate_linkedcss(linkedcss)
total_rules += linkedcss_data.num_rules
total_properties += linkedcss_data.num_properties
total_css_issues += linkedcss_data.css_issues
@@ -240,7 +241,8 @@ def aggregate_linkedcss(self, linkedcss):
# TODO: Detect how many rules, properties, and css issues exist.
# Update this linkedcss's data model
- linkedcssdata = models.LinkedCSSData.objects.create(urlscan=urlscan)
+ linkedcssdata = models.LinkedCSSData.objects.create(
+ linked_css=linkedcss)
linkedcssdata.num_rules = total_rules
linkedcssdata.num_properties = total_properties
linkedcssdata.css_issues = total_css_issues
View
1  vendor/inmemorystorage/__init__.py
@@ -0,0 +1 @@
+from .storage import InMemoryStorage
View
113 vendor/inmemorystorage/storage.py
@@ -0,0 +1,113 @@
+from django.core.files.storage import Storage
+from django.core.files.base import ContentFile
+
+class PathDoesNotExist(Exception):
+ pass
+
+class InMemoryNode(object):
+ """
+ Base class for files and directories.
+ """
+ parent = None
+
+ def add_child(self, name, child):
+ child.parent = self
+ self.children[name] = child
+
+class InMemoryFile(InMemoryNode):
+ """
+ Stores contents of file and stores reference to parent.
+ """
+ def __init__(self, contents='', parent=None):
+ self.contents = contents
+ self.parent = parent
+
+class InMemoryDir(InMemoryNode):
+ """
+ Stores dictionary of child directories/files and reference to parent.
+ """
+ def __init__(self, dirs=None, files=None, parent=None):
+ self.children = {}
+ self.parent = parent
+
+ def resolve(self, path, create=False):
+ path_bits = path.strip('/').split('/', 1)
+ current = path_bits[0]
+ rest = path_bits[1] if len(path_bits) > 1 else None
+ if not rest:
+ if current == '':
+ return self
+ if current in self.children.keys():
+ return self.children[current]
+ if not create:
+ raise PathDoesNotExist()
+ node = InMemoryFile()
+ self.add_child(current, node)
+ return node
+ if current in self.children.keys():
+ return self.children[current].resolve(rest, create=create)
+ if not create:
+ raise PathDoesNotExist()
+ node = InMemoryDir()
+ self.add_child(current, node)
+ return self.children[current].resolve(rest, create)
+
+ def ls(self, path=''):
+ return self.resolve(path).children.keys()
+
+ def listdir(self, dir):
+ nodes = tuple(self.resolve(dir).children.iteritems())
+ dirs = [k for (k, v) in nodes if isinstance(v, InMemoryDir)]
+ files = [k for (k, v) in nodes if isinstance(v, InMemoryFile)]
+ return [dirs, files]
+
+ def delete(self, path):
+ node = self.resolve(path)
+ for name, child in node.parent.children.iteritems():
+ if child is node:
+ del node.parent.children[name]
+ break
+
+ def exists(self, name):
+ try:
+ self.resolve(name)
+ except PathDoesNotExist:
+ return False
+ else:
+ return True
+
+ def size(self, name):
+ return len(self.resolve(name).contents)
+
+ def open(self, path):
+ return ContentFile(self.resolve(path, create=True).contents)
+
+ def save(self, path, content):
+ file = self.resolve(path, create=True)
+ file.contents = content
+ return path
+
+class InMemoryStorage(Storage):
+ """
+ Django storage class for in-memory filesystem.
+ """
+ def __init__(self, filesystem=None):
+ self.filesystem = filesystem or InMemoryDir()
+
+ def listdir(self, dir):
+ return self.filesystem.listdir(dir)
+
+ def delete(self, path):
+ return self.filesystem.delete(path)
+
+ def exists(self, name):
+ return self.filesystem.exists(name)
+
+ def size(self, name):
+ return self.filesystem.size(name)
+
+ def _open(self, name, mode=None):
+ return self.filesystem.open(name)
+
+ def _save(self, name, content):
+ return self.filesystem.save(name, content.read())
Please sign in to comment.
Something went wrong with that request. Please try again.