From c969040019c3063741917bfdb10e2f0382601fc0 Mon Sep 17 00:00:00 2001 From: palewire Date: Sun, 24 Aug 2014 14:46:36 -0700 Subject: [PATCH] Extract height and width of hyperlinks. Fixes #44. --- storytracker/analysis.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/storytracker/analysis.py b/storytracker/analysis.py index 13ae545..0d86d72 100644 --- a/storytracker/analysis.py +++ b/storytracker/analysis.py @@ -217,15 +217,18 @@ def get_hyperlinks(self, force=False): except ValueError: pass # Create the Hyperlink object - location = a.location + alocation = a.location + asize = a.size hyperlink_obj = Hyperlink( a.get_attribute("href"), a.text, i, images=image_obj_list, - x=location['x'], - y=location['y'], - cell=self.get_cell(location['x'], location['y']), + width=asize['width'], + height=asize['height'], + x=alocation['x'], + y=alocation['y'], + cell=self.get_cell(alocation['x'], alocation['y']), font_size=a.value_of_css_property("font-size"), ) # Add to the link list @@ -314,6 +317,8 @@ def write_hyperlinks_csv_to_file(self, file): "url_string", "url_index", "url_is_story", + "url_width", + "url_height", "url_x", "url_y", "url_cell", @@ -406,7 +411,9 @@ class Hyperlink(UnicodeMixin): A hyperlink extracted from an archived URL. """ def __init__( - self, href, string, index, images=[], x=None, y=None, + self, href, string, index, images=[], + x=None, y=None, + width=None, height=None, cell=None, font_size=None ): self.href = href @@ -414,6 +421,8 @@ def __init__( self.index = index self.domain = urlparse(href).netloc self.images = images + self.width = width + self.height = height self.x = x self.y = y self.cell = cell @@ -454,6 +463,8 @@ def __csv__(self): self.string or '', self.index, self.is_story, + self.width, + self.height, self.x, self.y, self.cell,