Skip to content
This repository has been archived by the owner on Dec 28, 2020. It is now read-only.

Commit

Permalink
Smoothed out the tests
Browse files Browse the repository at this point in the history
  • Loading branch information
palewire committed Jul 20, 2014
1 parent ded94a8 commit 83fe311
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 6 deletions.
79 changes: 76 additions & 3 deletions storytracker/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self, url, timestamp, html):
# Attributes that come in handy below
self.archive_path = None
self._hyperlinks = []
self._images = []

def __eq__(self, other):
"""
Expand Down Expand Up @@ -105,7 +106,7 @@ def get_hyperlinks(self, force=False):
if self._hyperlinks and not force:
return self._hyperlinks

# Target the <body> tag if it exists since
# Target the <body> tag if it exists since
# we don't care what's in the <head>
target = self.soup
if hasattr(target, 'body'):
Expand Down Expand Up @@ -135,6 +136,40 @@ def get_hyperlinks(self, force=False):
return link_list
hyperlinks = property(get_hyperlinks)

def get_images(self, force=False):
"""
Parse the archived HTML for images and returns them as a list
of Image objects.
The list is cached after it is first accessed.
Set the `force` kwargs to True to regenerate it from scratch.
"""
# If we already have the list, return it
if self._hyperlinks and not force:
return self._hyperlinks

# Target the <body> tag if it exists since
# we don't care what's in the <head>
target = self.soup
if hasattr(target, 'body'):
target = target.body

# Loop through all <img> tags with src attributes
# and convert them to Image objects
image_list = []
for img in target.findAll("img", {"src": True}):
# Create the Image object
image_obj = Image(img["src"])

# Add to the image list
image_list.append(image_obj)

# Stuff that list in our cache and then pass it out
self._images = image_list
return image_list
images = property(get_images)


class ArchivedURLSet(list):
"""
Expand Down Expand Up @@ -182,9 +217,28 @@ def __init__(self, href, string, images=[]):
self.domain = urlparse(href).netloc
self.images = images

def __eq__(self, other):
"""
Tests whether this object is equal to something else.
"""
if not isinstance(other, Image):
return NotImplemented
if self.href == other.href:
return True
return False

def __ne__(self, other):
"""
Tests whether this object is unequal to something else.
"""
result = self.__eq__(other)
if result is NotImplemented:
return result
return not result

def __unicode__(self):
if len(self.href) > 40:
return six.text_type(self.href[:40] + "...")
return six.text_type("%s..." % self.href[:40])
else:
return six.text_type(self.href)

Expand All @@ -196,8 +250,27 @@ class Image(UnicodeMixin):
def __init__(self, src):
self.src = src

def __eq__(self, other):
"""
Tests whether this object is equal to something else.
"""
if not isinstance(other, Image):
return NotImplemented
if self.src == other.src:
return True
return False

def __ne__(self, other):
"""
Tests whether this object is unequal to something else.
"""
result = self.__eq__(other)
if result is NotImplemented:
return result
return not result

def __unicode__(self):
if len(self.src) > 40:
return six.text_type(self.src[:40] + "...")
return six.text_type("%s..." % self.src[:40])
else:
return six.text_type(self.src)
24 changes: 21 additions & 3 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from bs4 import BeautifulSoup
from storytracker.analysis import ArchivedURL
from storytracker.analysis import ArchivedURLSet
from storytracker.analysis import Hyperlink
from storytracker.analysis import Hyperlink, Image


class NullDevice():
Expand Down Expand Up @@ -111,19 +111,37 @@ def test_url_creation(self):
obj.write_gzip_to_directory(self.tmpdir)

def test_url_hyperlinks(self):
obj = storytracker.archive(self.url, output_dir=self.tmpdir)
obj = storytracker.archive(self.url)
self.assertEqual(obj._hyperlinks, [])
self.assertTrue(isinstance(obj.hyperlinks, list))
self.assertEqual(obj._hyperlinks, obj.hyperlinks)
[self.assertTrue(isinstance(a, Hyperlink)) for a in obj.hyperlinks]
a = obj.hyperlinks[0]
a.href
a.contents
a.string
a.domain
if a.images:
for i in a.images:
self.assertTrue(isinstance(i, Image))
i.src
i.__unicode__()
a.__unicode__()
a.__str__()
a.__repr__()

def test_url_images(self):
obj = storytracker.archive(self.url)
self.assertEqual(obj._images, [])
self.assertTrue(len(obj.images) > 0)
self.assertTrue(isinstance(obj.images, list))
self.assertEqual(obj._images, obj.images)
[self.assertTrue(isinstance(i, Image)) for i in obj.images]
img = obj.images[0]
img.src
img.__unicode__()
img.__str__()
img.__repr__()

def test_urlset_creation(self):
obj = ArchivedURL(self.url, datetime.now(), "foobar")
obj2 = ArchivedURL(self.url, datetime.now(), "foobar")
Expand Down

0 comments on commit 83fe311

Please sign in to comment.