Smoothed out the tests

palewire · Jul 20, 2014 · 83fe311 · 83fe311
1 parent ded94a8
commit 83fe311
Show file tree

Hide file tree

Showing 2 changed files with 97 additions and 6 deletions.
diff --git a/storytracker/analysis.py b/storytracker/analysis.py
@@ -24,6 +24,7 @@ def __init__(self, url, timestamp, html):
         # Attributes that come in handy below
         self.archive_path = None
         self._hyperlinks = []
+        self._images = []
 
     def __eq__(self, other):
         """
@@ -105,7 +106,7 @@ def get_hyperlinks(self, force=False):
         if self._hyperlinks and not force:
             return self._hyperlinks
 
-        # Target the <body> tag if it exists since 
+        # Target the <body> tag if it exists since
         # we don't care what's in the <head>
         target = self.soup
         if hasattr(target, 'body'):
@@ -135,6 +136,40 @@ def get_hyperlinks(self, force=False):
         return link_list
     hyperlinks = property(get_hyperlinks)
 
+    def get_images(self, force=False):
+        """
+        Parse the archived HTML for images and returns them as a list
+        of Image objects.
+
+        The list is cached after it is first accessed.
+
+        Set the `force` kwargs to True to regenerate it from scratch.
+        """
+        # If we already have the list, return it
+        if self._hyperlinks and not force:
+            return self._hyperlinks
+
+        # Target the <body> tag if it exists since
+        # we don't care what's in the <head>
+        target = self.soup
+        if hasattr(target, 'body'):
+            target = target.body
+
+        # Loop through all <img> tags with src attributes
+        # and convert them to Image objects
+        image_list = []
+        for img in target.findAll("img", {"src": True}):
+            # Create the Image object
+            image_obj = Image(img["src"])
+
+            # Add to the image list
+            image_list.append(image_obj)
+
+        # Stuff that list in our cache and then pass it out
+        self._images = image_list
+        return image_list
+    images = property(get_images)
+
 
 class ArchivedURLSet(list):
     """
@@ -182,9 +217,28 @@ def __init__(self, href, string, images=[]):
         self.domain = urlparse(href).netloc
         self.images = images
 
+    def __eq__(self, other):
+        """
+        Tests whether this object is equal to something else.
+        """
+        if not isinstance(other, Image):
+            return NotImplemented
+        if self.href == other.href:
+            return True
+        return False
+
+    def __ne__(self, other):
+        """
+        Tests whether this object is unequal to something else.
+        """
+        result = self.__eq__(other)
+        if result is NotImplemented:
+            return result
+        return not result
+
     def __unicode__(self):
         if len(self.href) > 40:
-            return six.text_type(self.href[:40] + "...")
+            return six.text_type("%s..." % self.href[:40])
         else:
             return six.text_type(self.href)
 
@@ -196,8 +250,27 @@ class Image(UnicodeMixin):
     def __init__(self, src):
         self.src = src
 
+    def __eq__(self, other):
+        """
+        Tests whether this object is equal to something else.
+        """
+        if not isinstance(other, Image):
+            return NotImplemented
+        if self.src == other.src:
+            return True
+        return False
+
+    def __ne__(self, other):
+        """
+        Tests whether this object is unequal to something else.
+        """
+        result = self.__eq__(other)
+        if result is NotImplemented:
+            return result
+        return not result
+
     def __unicode__(self):
         if len(self.src) > 40:
-            return six.text_type(self.src[:40] + "...")
+            return six.text_type("%s..." % self.src[:40])
         else:
             return six.text_type(self.src)
diff --git a/test.py b/test.py
@@ -9,7 +9,7 @@
 from bs4 import BeautifulSoup
 from storytracker.analysis import ArchivedURL
 from storytracker.analysis import ArchivedURLSet
-from storytracker.analysis import Hyperlink
+from storytracker.analysis import Hyperlink, Image
 
 
 class NullDevice():
@@ -111,19 +111,37 @@ def test_url_creation(self):
         obj.write_gzip_to_directory(self.tmpdir)
 
     def test_url_hyperlinks(self):
-        obj = storytracker.archive(self.url, output_dir=self.tmpdir)
+        obj = storytracker.archive(self.url)
         self.assertEqual(obj._hyperlinks, [])
         self.assertTrue(isinstance(obj.hyperlinks, list))
         self.assertEqual(obj._hyperlinks, obj.hyperlinks)
         [self.assertTrue(isinstance(a, Hyperlink)) for a in obj.hyperlinks]
         a = obj.hyperlinks[0]
         a.href
-        a.contents
+        a.string
         a.domain
+        if a.images:
+            for i in a.images:
+                self.assertTrue(isinstance(i, Image))
+                i.src
+                i.__unicode__()
         a.__unicode__()
         a.__str__()
         a.__repr__()
 
+    def test_url_images(self):
+        obj = storytracker.archive(self.url)
+        self.assertEqual(obj._images, [])
+        self.assertTrue(len(obj.images) > 0)
+        self.assertTrue(isinstance(obj.images, list))
+        self.assertEqual(obj._images, obj.images)
+        [self.assertTrue(isinstance(i, Image)) for i in obj.images]
+        img = obj.images[0]
+        img.src
+        img.__unicode__()
+        img.__str__()
+        img.__repr__()
+
     def test_urlset_creation(self):
         obj = ArchivedURL(self.url, datetime.now(), "foobar")
         obj2 = ArchivedURL(self.url, datetime.now(), "foobar")