From 02b1b25046c435941727dbf175b8340f2de13395 Mon Sep 17 00:00:00 2001 From: palewire Date: Wed, 16 Jul 2014 21:03:33 -0700 Subject: [PATCH] Hyperlink class now parses out the domain as well --- storytracker/analysis.py | 5 +++++ test.py | 1 + 2 files changed, 6 insertions(+) diff --git a/storytracker/analysis.py b/storytracker/analysis.py index f54b877..4c14b37 100644 --- a/storytracker/analysis.py +++ b/storytracker/analysis.py @@ -6,6 +6,10 @@ from six import BytesIO from bs4 import BeautifulSoup from .toolbox import UnicodeMixin +try: + from urlparse import urlparse +except ImportError: + from six.moves.urllib.parse import urlparse class ArchivedURL(UnicodeMixin): @@ -160,6 +164,7 @@ class Hyperlink(UnicodeMixin): def __init__(self, href, contents): self.href = href self.contents = contents + self.domain = urlparse(href).netloc def __unicode__(self): return six.text_type(self.href) diff --git a/test.py b/test.py index 00e1e56..4e18139 100644 --- a/test.py +++ b/test.py @@ -119,6 +119,7 @@ def test_url_hyperlinks(self): a = obj.hyperlinks[0] a.href a.contents + a.domain a.__unicode__() a.__str__() a.__repr__()