Skip to content

Commit

Permalink
Merge pull request #26 from srirams6/master
Browse files Browse the repository at this point in the history
Typo and PEP8.
  • Loading branch information
sananth12 committed Mar 3, 2015
2 parents 063b28c + 8027ec9 commit 8e991bd
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 9 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ $ image-scraper [OPTIONS] URL
You can also use it in your python scripts.
```py
import image_scraper
image_scraper.scrape_image(URL)
image_scraper.scrape_images(URL)
```

Options
Expand Down
2 changes: 1 addition & 1 deletion image_scraper/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from mains import console_main, scrape_images
from mains import console_main, scrape_images
14 changes: 9 additions & 5 deletions image_scraper/mains.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
def console_main():
import sys
from progressbar import *
from utils import (process_links, get_html, get_img_list, download_image,
process_download_path, get_arguments)
from utils import (process_links, get_html, get_img_list,
download_image, process_download_path, get_arguments)
URL, no_to_download, format_list, download_path, max_filesize, dump_urls, scrape_reverse, use_ghost = get_arguments()
print "\nImageScraper\n============\nRequesting page....\n"

Expand Down Expand Up @@ -52,8 +52,13 @@ def console_main():
print "\nDone!\nDownloaded %s images" % (count-failed-over_max_filesize)
return

def scrape_images(url, no_to_download=0, format_list=["jpg", "png", "gif", "svg", "jpeg"], download_path='images', max_filesize=100000000, dump_urls=False, use_ghost=False):
import sys, os

def scrape_images(url, no_to_download=0,
format_list=["jpg", "png", "gif", "svg", "jpeg"],
download_path='images', max_filesize=100000000,
dump_urls=False, use_ghost=False):
import sys
import os
from utils import (process_links, get_html, get_img_list, download_image,
process_download_path, get_arguments)
page_html, page_url = get_html(url, use_ghost)
Expand Down Expand Up @@ -85,4 +90,3 @@ def scrape_images(url, no_to_download=0, format_list=["jpg", "png", "gif", "svg"
if count == no_to_download:
break
return count, failed

6 changes: 4 additions & 2 deletions image_scraper/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def process_links(links, formats=["jpg", "png", "gif", "svg", "jpeg"]):


def get_arguments():
parser = argparse.ArgumentParser(description='Dowloads images form given URL')
parser = argparse.ArgumentParser(
description='Dowloads images form given URL')
parser.add_argument('url2scrape', nargs=1, help="URL to scrape")
parser.add_argument('-m', '--max-images', type=int, default=0,
help="Limit on number of images\n")
Expand All @@ -37,7 +38,8 @@ def get_arguments():
if not re.match(r'^[a-zA-Z]+://', URL):
URL = 'http://' + URL
no_to_download = args.max_images
save_dir = args.save_dir + '_{uri.netloc}'.format(uri=urlparse.urlparse(URL))
save_dir = args.save_dir + '_{uri.netloc}'.format(
uri=urlparse.urlparse(URL))
if args.save_dir != "images":
save_dir = args.save_dir
download_path = os.path.join(os.getcwd(), save_dir)
Expand Down

0 comments on commit 8e991bd

Please sign in to comment.