Skip to content

Commit

Permalink
Fixing _write_to_file
Browse files Browse the repository at this point in the history
The os.path.dirname call was causing it to only create up to n - 1 required
directories in the path. Now the function just takes a full path and deduces
the parent directory from that.
  • Loading branch information
mtlynch committed Sep 28, 2017
1 parent bb4b9a4 commit 5adfb90
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 17 deletions.
26 changes: 13 additions & 13 deletions ketohub/spiders/raw_content_spider.py
Expand Up @@ -31,10 +31,10 @@ def _ensure_directory_exists(directory_path):
os.makedirs(directory_path)


def _write_to_file(filepath, filename, content):
def _write_to_file(filepath, content):
"""Writes content to a local file."""
_ensure_directory_exists(os.path.dirname(filepath))
open(os.path.join(filepath, filename), 'w').write(content)
open(filepath, 'w').write(content)


class RawContentSpider(spiders.CrawlSpider):
Expand Down Expand Up @@ -83,25 +83,25 @@ def download_recipe_contents(self, response):
if not self._filepath_prefix:
self._set_download_root()

filepath = os.path.join(self._filepath_prefix,
self._format_recipe_key(response.url))
output_dir = os.path.join(self._filepath_prefix,
self._format_recipe_key(response.url))

# Write response body to file
_write_to_file(filepath, 'index.html', response.text.encode('utf8'))
_write_to_file(
os.path.join(output_dir, 'index.html'),
response.text.encode('utf8'))

# Write url to metadata file
_write_to_file(filepath, 'metadata.json',
json.dumps(
{
'url': response.url
},
indent=4,
separators=(',', ':')))
_write_to_file(
os.path.join(output_dir, 'metadata.json'),
json.dumps({
'url': response.url
}, indent=4, separators=(',', ':')))

# Find image and save it
try:
image_location = self._get_recipe_main_image_url(response)
except IndexError:
raise UnexpectedResponse('Could not extract image from page.')

urllib.urlretrieve(image_location, os.path.join(filepath, 'main.jpg'))
urllib.urlretrieve(image_location, os.path.join(output_dir, 'main.jpg'))
9 changes: 5 additions & 4 deletions tests/test_raw_content_spider.py
Expand Up @@ -40,10 +40,11 @@ def test_download_recipe_contents_with_a_simple_response(self):
spider.download_recipe_contents(response)

self.write_to_file_patch.assert_has_calls([
mock.call('/foo/download/root/20170102/030405Z/foo-com',
'index.html', '<html></html>'),
mock.call('/foo/download/root/20170102/030405Z/foo-com',
'metadata.json', '{\n "url":"https://www.foo.com"\n}')
mock.call('/foo/download/root/20170102/030405Z/foo-com/index.html',
'<html></html>'),
mock.call(
'/foo/download/root/20170102/030405Z/foo-com/metadata.json',
'{\n "url":"https://www.foo.com"\n}')
])

self.urllib_patch.assert_called_with(
Expand Down

0 comments on commit 5adfb90

Please sign in to comment.