Skip to content
This repository has been archived by the owner on Dec 28, 2020. It is now read-only.

Commit

Permalink
Created unicode methods for the URLSet and Hyperlink classes, as well…
Browse files Browse the repository at this point in the history
… as a new trick for Hyperlink. Fixes #16
  • Loading branch information
palewire committed Jul 17, 2014
1 parent a975ca5 commit 7ff3fc3
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 4 deletions.
1 change: 1 addition & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ include =
storytracker/analysis.py
storytracker/files.py
storytracker/get.py
storytracker/toolbox.py
20 changes: 16 additions & 4 deletions storytracker/analysis.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import os
import six
import copy
import gzip
import storytracker
from six import BytesIO
from bs4 import BeautifulSoup
from .toolbox import UnicodeMixin


class ArchivedURL(object):
class ArchivedURL(UnicodeMixin):
"""
An URL's archived HTML with tools for analysis
"""
Expand Down Expand Up @@ -40,6 +42,9 @@ def __ne__(self, other):
return result
return not result

def __unicode__(self):
return six.text_type("%s@%s" % (self.url, self.timestamp))

@property
def archive_filename(self):
"""
Expand Down Expand Up @@ -100,7 +105,10 @@ def get_hyperlinks(self, force=False):
# and convert them to Hyperlink objects
link_list = []
for a in self.soup.findAll("a", {"href": True}):
obj = Hyperlink(a["href"])
obj = Hyperlink(
a["href"],
a.contents
)
link_list.append(obj)

# Stuff that list in our cache and then pass it out
Expand Down Expand Up @@ -145,9 +153,13 @@ def append(self, obj):
super(ArchivedURLSet, self).append(copy.copy(obj))


class Hyperlink(object):
class Hyperlink(UnicodeMixin):
"""
A hyperlink extracted from an archived URL with tools for analysis
"""
def __init__(self, href):
def __init__(self, href, contents):
self.href = href
self.contents = contents

def __unicode__(self):
return six.text_type(self.href)
20 changes: 20 additions & 0 deletions storytracker/toolbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env python
import six


class UnicodeMixin(object):
"""
Mixin class to handle defining the proper __str__/__unicode__
methods in Python 2 or 3.
"""
# Python 3
if six.PY3:
def __str__(self):
return self.__unicode__()
# Python 2
else:
def __str__(self):
return self.__unicode__().encode('utf8')

def __repr__(self):
return '<%s: %s>' % (self.__class__.__name__, self.__str__())
9 changes: 9 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ def test_url_creation(self):
obj.html
obj.soup
obj.gzip
obj.__unicode__()
obj.__str__()
obj.__repr__()
self.assertEqual(obj.archive_path, None)
obj.write_gzip_to_directory(self.tmpdir)

Expand All @@ -113,6 +116,12 @@ def test_url_hyperlinks(self):
self.assertTrue(isinstance(obj.hyperlinks, list))
self.assertEqual(obj._hyperlinks, obj.hyperlinks)
[self.assertTrue(isinstance(a, Hyperlink)) for a in obj.hyperlinks]
a = obj.hyperlinks[0]
a.href
a.contents
a.__unicode__()
a.__str__()
a.__repr__()

def test_urlset_creation(self):
obj = ArchivedURL(self.url, datetime.now(), "foobar")
Expand Down

0 comments on commit 7ff3fc3

Please sign in to comment.