Skip to content

Commit

Permalink
Changing spider settings to use global settings
Browse files Browse the repository at this point in the history
Refactoring so that the spider uses global settings. It's ugly, but should
make it easier to pull logic out of the spiders.
  • Loading branch information
mtlynch committed Sep 30, 2017
1 parent dc63d60 commit 0b35d83
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 24 deletions.
3 changes: 2 additions & 1 deletion ketohub/spiders/raw_content_spider.py
Expand Up @@ -2,6 +2,7 @@
import os
import urllib

from scrapy import conf
from scrapy import spiders

from ketohub import persist
Expand Down Expand Up @@ -47,7 +48,7 @@ def _get_recipe_main_image_url(self, response):
pass

def _make_content_saver(self, url):
download_root = self.settings.get('DOWNLOAD_ROOT')
download_root = conf.settings.get('DOWNLOAD_ROOT')
if not download_root:
raise MissingDownloadDirectory(
'Make sure you\'re providing a download directory.')
Expand Down
1 change: 0 additions & 1 deletion tests/test_ketoconnect_crawl_spider.py
Expand Up @@ -20,7 +20,6 @@ def test_get_recipe_main_image_url_returns_second_image(self):
body=file_content)

spider = ketoconnect_crawl_spider.KetoconnectCrawlSpider()
spider.settings = self.mock_settings
spider.download_recipe_contents(response)

self.urlopen_patch.assert_called_with('images/right_image.jpg')
22 changes: 1 addition & 21 deletions tests/test_raw_content_spider.py
Expand Up @@ -38,9 +38,6 @@ def setUp(self):
self.addCleanup(mock_get_recipe_main_image.stop)
self.get_image_patch = mock_get_recipe_main_image.start()

self.mock_settings = mock.Mock(spec=['get'])
self.mock_settings.get.return_value = 'dummy_download_root'

def test_download_recipe_contents_with_a_simple_response(self):
"""Tests that download_recipe_contents works as expected for a simple response."""
response = http.TextResponse(
Expand All @@ -51,11 +48,10 @@ def test_download_recipe_contents_with_a_simple_response(self):
self.get_image_patch.return_value = 'https://mock.com/test_image.jpg'
self.urlopen_patch.return_value = io.BytesIO('dummy image data')
spider = raw_content_spider.RawContentSpider()
spider.settings = self.mock_settings
spider.download_recipe_contents(response)

self.content_saver_patch.assert_called_once_with(
'dummy_download_root/20170102/030405Z/foo-com')
'download_output/20170102/030405Z/foo-com')
self.mock_saver.save_recipe_html.assert_called_once_with(
'<html></html>')
self.mock_saver.save_metadata.assert_called_once_with({
Expand All @@ -76,22 +72,6 @@ def test_download_recipe_contents_with_an_empty_response(self):

self.get_image_patch.side_effect = IndexError
spider = raw_content_spider.RawContentSpider()
spider.settings = self.mock_settings

with self.assertRaises(raw_content_spider.UnexpectedResponse):
spider.download_recipe_contents(response)

def test_that_undefined_download_folder_location_raises_error(self):
"""Tests that download_recipe_contents raises an error with an undefined download folder."""
response = http.TextResponse(
url='https://www.foo.com',
request=http.Request('https://www.foo.com'),
body='')

mock_settings = mock.Mock()
mock_settings.get.return_value = None
spider = raw_content_spider.RawContentSpider()
spider.settings = mock_settings

with self.assertRaises(raw_content_spider.MissingDownloadDirectory):
spider.download_recipe_contents(response)
1 change: 0 additions & 1 deletion tests/test_ruled_me_crawl_spider.py
Expand Up @@ -20,7 +20,6 @@ def test_get_recipe_main_image_url_returns_first_image(self):
body=file_content)

spider = ruled_me_crawl_spider.RuledMeCrawlSpider()
spider.settings = self.mock_settings
spider.download_recipe_contents(response)

self.urlopen_patch.assert_called_once_with('images/right_image.jpg')

0 comments on commit 0b35d83

Please sign in to comment.