Skip to content

Commit

Permalink
Merge pull request #36 from srirams6/master
Browse files Browse the repository at this point in the history
Futurized the code. Fixes #35.
  • Loading branch information
sananth12 committed Mar 7, 2015
2 parents fb75ac7 + 4afd06c commit 2f2632e
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 29 deletions.
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ python:
#install: pip install -r requirements.txt --use-mirrors
install:
- pip install ImageScraper
- pip install future

# command to run tests
script:
- nosetests
Expand Down
3 changes: 2 additions & 1 deletion image_scraper/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from mains import console_main, scrape_images
from __future__ import absolute_import
from .mains import console_main, scrape_images
44 changes: 25 additions & 19 deletions image_scraper/mains.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
from past.utils import old_div


def console_main():
import sys
from progressbar import *
from utils import (process_links, get_html, get_img_list,
download_image, process_download_path, get_arguments)
from exceptions import *
from .progressbar import *
from .utils import (process_links, get_html, get_img_list,
download_image, process_download_path, get_arguments)
from .exceptions import *
URL, no_to_download, format_list, download_path, max_filesize, dump_urls, scrape_reverse, use_ghost = get_arguments()
print "\nImageScraper\n============\nRequesting page....\n"
print("\nImageScraper\n============\nRequesting page....\n")

page_html, page_url = get_html(URL, use_ghost)
images = get_img_list(page_html, page_url, format_list)
Expand All @@ -15,23 +21,23 @@ def console_main():
if no_to_download == 0:
no_to_download = len(images)

print "Found {0} images: ".format(len(images))
print("Found {0} images: ".format(len(images)))

try:
process_download_path(download_path)
except DirectoryAccessError:
print "Sorry, the directory can't be accessed."
print("Sorry, the directory can't be accessed.")
sys.exit()
except DirectoryCreateError:
print "Sorry, the directory can't be created."
print("Sorry, the directory can't be created.")
sys.exit()

if scrape_reverse:
images.reverse()

if dump_urls:
for img_url in images:
print img_url
print(img_url)

count = 0
percent = 0.0
Expand All @@ -50,13 +56,13 @@ def console_main():
over_max_filesize += 1

count += 1
percent = percent + 100.0 / no_to_download
percent = percent + old_div(100.0, no_to_download)
pbar.update(percent % 100)
if count == no_to_download:
break

pbar.finish()
print "\nDone!\nDownloaded {0} images\nFailed: {1}\n".format(count-failed-over_max_filesize, failed)
print("\nDone!\nDownloaded {0} images\nFailed: {1}\n".format(count-failed-over_max_filesize, failed))
return


Expand All @@ -66,8 +72,8 @@ def scrape_images(url, no_to_download=0,
dump_urls=False, use_ghost=False):
import sys
import os
from utils import (process_links, get_html, get_img_list, download_image,
process_download_path, get_arguments)
from .utils import (process_links, get_html, get_img_list, download_image,
process_download_path, get_arguments)
page_html, page_url = get_html(url, use_ghost)
images = get_img_list(page_html, page_url, format_list)

Expand All @@ -85,12 +91,12 @@ def scrape_images(url, no_to_download=0,
over_max_filesize = 0

for img_url in images:
flag, size_flag = download_image(img_url, download_path, max_filesize)
if not flag:
if not size_flag:
failed += 1
else:
over_max_filesize += 1
try:
download_image(img_url, download_path, max_filesize)
except ImageDownloadError:
failed += 1
except ImageSizeError:
over_max_filesize += 1
count += 1
if count == no_to_download:
break
Expand Down
21 changes: 13 additions & 8 deletions image_scraper/progressbar.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from __future__ import division
from __future__ import print_function
from builtins import range
from past.utils import old_div
from builtins import object
import sys
import time
from array import array
Expand Down Expand Up @@ -74,7 +79,7 @@ def update(self, pbar):
if pbar.seconds_elapsed < 2e-6: # == 0:
bps = 0.0
else:
bps = float(pbar.currval) / pbar.seconds_elapsed
bps = old_div(float(pbar.currval), pbar.seconds_elapsed)
spd = bps
for u in self.units:
if spd < 1000:
Expand Down Expand Up @@ -110,7 +115,7 @@ def __init__(self, marker='#', left='|', right='|'):
self.right = right

def _format_marker(self, pbar):
if isinstance(self.marker, (str, unicode)):
if isinstance(self.marker, str):
return self.marker
else:
return self.marker.update(pbar)
Expand Down Expand Up @@ -208,15 +213,15 @@ def _format_widgets(self):
r.append(w)
hfill_inds.append(i)
num_hfill += 1
elif isinstance(w, (str, unicode)):
elif isinstance(w, str):
r.append(w)
currwidth += len(w)
else:
weval = w.update(self)
currwidth += len(weval)
r.append(weval)
for iw in hfill_inds:
r[iw] = r[iw].update(self, (self.term_width-currwidth)/num_hfill)
r[iw] = r[iw].update(self, old_div((self.term_width-currwidth), num_hfill))
return r

def _format_line(self):
Expand Down Expand Up @@ -273,7 +278,7 @@ def example1():
# do something
pbar.update(10*i+1)
pbar.finish()
print
print()

def example2():
class CrazyFileTransferSpeed(FileTransferSpeed):
Expand All @@ -292,7 +297,7 @@ def update(self, pbar):
# do something
pbar.update(5*i+1)
pbar.finish()
print
print()

def example3():
widgets = [Bar('>'), ' ', ETA(), ' ', ReverseBar('<')]
Expand All @@ -301,7 +306,7 @@ def example3():
# do something
pbar.update(10*i+1)
pbar.finish()
print
print()

def example4():
widgets = ['Test: ', Percentage(), ' ',
Expand All @@ -313,7 +318,7 @@ def example4():
time.sleep(0.2)
pbar.update(i)
pbar.finish()
print
print()

example1()
example2()
Expand Down
2 changes: 1 addition & 1 deletion image_scraper/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def download_image(img_url, download_path, max_filesize):

if int(img_request.headers['content-length']) < max_filesize:
img_content = img_request.content
with open(os.path.join(download_path.decode("utf-8"), img_url.split('/')[-1]), 'w') as f:
with open(os.path.join(download_path, img_url.split('/')[-1]), 'w') as f:
f.write(img_content)
else:
raise ImageSizeError(img_request.headers['content-length'])
Expand Down
Binary file modified tests/test.pyc
Binary file not shown.

0 comments on commit 2f2632e

Please sign in to comment.