Skip to content

Commit

Permalink
Find ketoconnect image when opengraph isn't set
Browse files Browse the repository at this point in the history
  • Loading branch information
mtlynch committed Oct 1, 2017
1 parent 22dc239 commit 9f82bcb
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
6 changes: 6 additions & 0 deletions ketohub/spiders.py
Expand Up @@ -44,6 +44,12 @@ def find_ketoconnect_image_url(response):
opengraph_url = _find_opengraph_image(response)
if opengraph_url:
return opengraph_url

for image_url in response.xpath(
'//div[@id="tve_editor"]/img/@src').extract():
if image_url.endswith('.jpg'):
return image_url

raise NoImageFound('Could not find image in source HTML: %s' % response.url)


Expand Down
15 changes: 14 additions & 1 deletion tests/test_spiders.py
Expand Up @@ -7,7 +7,7 @@

class FindKetoConnectImageUrlTest(unittest.TestCase):

def test_finds_correct_image_on_simple_page(self):
def test_finds_image_from_opengraph(self):
self.assertEqual(
spiders.find_ketoconnect_image_url(
http.TextResponse(
Expand All @@ -18,6 +18,19 @@ def test_finds_correct_image_on_simple_page(self):
<meta property="og:image" content="https://mock.com/recipe-image.jpg" />
</html>""")), 'https://mock.com/recipe-image.jpg')

def test_finds_image_when_opengraph_image_is_not_set(self):
self.assertEqual(
spiders.find_ketoconnect_image_url(
http.TextResponse(
url='https://www.foo.com',
request=http.Request('https://www.foo.com'),
body="""
<html>
<div id="tve_editor">
<img class="tve_image" alt="" style="width: 400px;" src="https://mock.com/recipe-image.jpg" width="400" height="600" data-attachment-id="9282">
</div>
</html>""")), 'https://mock.com/recipe-image.jpg')

def test_raises_error_when_image_not_found(self):
with self.assertRaises(spiders.NoImageFound):
spiders.find_ketoconnect_image_url(
Expand Down

0 comments on commit 9f82bcb

Please sign in to comment.