Merge branch 'vigneshmanix-title'

sananth12 · Apr 1, 2015 · 43704fc · 43704fc
2 parents c70e371 + 44ebdf6
commit 43704fc
Show file tree

Hide file tree

Showing 6 changed files with 59 additions and 19 deletions.
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ You can also download using pip:
 $ pip install ImageScraper
 ``` 
 ####**Dependencies**
-Note that ``ImageScraper`` depends on ``lxml``, ``requests``, and ``future``. 
+Note that ``ImageScraper`` depends on ``lxml``, ``requests``, ``setproctitle``, and ``future``. 
 If you run into problems in the compilation of ``lxml`` through ``pip``, install the ``libxml2-dev`` and ``libxslt-dev`` packages on your system.
 
 Usage

diff --git a/README.rst b/README.rst
@@ -1,8 +1,7 @@
-ImageScraper
-============
+ImageScraper :page\_with\_curl:
+===============================
 
-A cool command line tool to download images in the given
-webpage.
+A cool command line tool which downloads images from the given webpage.
 
 +------------------+--------------------+--------------------+
 | Build Status     | Version            | Downloads          |
@@ -37,9 +36,10 @@ You can also download using pip:
 **Dependencies**
 ^^^^^^^^^^^^^^^^
 
-Note that ``ImageScraper`` depends on ``lxml`` and ``requests``. If you
-run into problems in the compilation of ``lxml`` through ``pip``,
-install the ``libxml2-dev`` and ``libxslt-dev`` packages on your system.
+Note that ``ImageScraper`` depends on ``lxml``, ``requests``,
+``setproctitle``, and ``future``. If you run into problems in the
+compilation of ``lxml`` through ``pip``, install the ``libxml2-dev`` and
+``libxslt-dev`` packages on your system.
 
 Usage
 -----
@@ -48,7 +48,7 @@ Usage
 
     $ image-scraper [OPTIONS] URL
 
-You can also use it in your python scripts.
+You can also use it in your Python scripts.
 
 .. code:: py
 
@@ -107,10 +107,48 @@ NOTE:
 By default, a new folder called "images\_" will be created in the
 working directory, containing all the downloaded images.
 
+Issues
+------
+
+Q.)All images were not downloaded?
+
+It could be that the content was injected into the page via JavaScript;
+this scraper doesn't run JavaScript.
+
+Contribute
+----------
+
+If you want to add features, improve them, or report issues, feel free
+to send a pull request!!
+
+Contributors
+~~~~~~~~~~~~
+
+-  `sananth12 <https://github.com/sananth12>`__ (`Anantha
+   Natarajan <http://ananth.co.in>`__)
+-  `srirams6 <https://github.com/srirams6>`__ (Sriram Sundarraj)
+-  `vigneshmanix <https://github.com/vigneshmanix>`__ (Vignesh M)
+-  `osborne6 <https://github.com/osborne6>`__
+-  `tsleyson <https://github.com/tsleyson>`__
+
+|Throughput Graph|
+
+License
+-------
+
+.. figure:: https://raw.githubusercontent.com/sananth12/ImageScraper/master/images/gpl.png
+   :alt: GPL V3
+
+   GPL V3
+|Analytics|
 
 .. |Build Status| image:: https://travis-ci.org/sananth12/ImageScraper.svg?branch=master
    :target: https://travis-ci.org/sananth12/ImageScraper
 .. |Latest Version| image:: https://pypip.in/v/ImageScraper/badge.png
    :target: https://pypi.python.org/pypi/ImageScraper/
-.. |PyPi downloads| image:: http://img.shields.io/badge/downloads-7.5k%20total-blue.svg
+.. |PyPi downloads| image:: http://img.shields.io/badge/downloads-8k%20total-blue.svg
    :target: https://pypi.python.org/pypi/ImageScraper
+.. |Throughput Graph| image:: https://graphs.waffle.io/sananth12/ImageScraper/throughput.svg
+   :target: https://waffle.io/sananth12/ImageScraper/metrics
+.. |Analytics| image:: https://ga-beacon.appspot.com/UA-60764448-1/ImageScraper/README.md
+   :target: https://github.com/igrigorik/ga-beacon
diff --git a/image_scraper/mains.py b/image_scraper/mains.py
@@ -6,8 +6,11 @@
 from .progressbar import *
 from .utils import ImageScraper
 from .exceptions import *
+from setproctitle import setproctitle
+
 
 def console_main():
+    setproctitle('image-scraper')
     scraper = ImageScraper()
     scraper.get_arguments()
     print("\nImageScraper\n============\nRequesting page....\n")
@@ -67,6 +70,7 @@ def console_main():
     print("\nDone!\nDownloaded {0} images\nFailed: {1}\n".format(count-failed-over_max_filesize, failed))
     return
 
+
 def scrape_images(url, no_to_download=None,
                   format_list=["jpg", "png", "gif", "svg", "jpeg"],
                   download_path='images', max_filesize=100000000,

diff --git a/image_scraper/utils.py b/image_scraper/utils.py
@@ -9,6 +9,7 @@
 import re
 from image_scraper.exceptions import *
 
+
 class ImageScraper:
     url = None
     no_to_download = 0
@@ -59,7 +60,7 @@ def get_arguments(self):
             self.url = 'http://' + self.url
         self.no_to_download = args.max_images
         save_dir = args.save_dir + '_{uri.netloc}'.format(
-            uri = urlparse(self.url))
+            uri=urlparse(self.url))
         if args.save_dir != "images":
             save_dir = args.save_dir
         self.download_path = os.path.join(os.getcwd(), save_dir)
@@ -71,7 +72,6 @@ def get_arguments(self):
         return (self.url, self.no_to_download, self.format_list, self.download_path, self.max_filesize,
                 self.dump_urls, self.scrape_reverse, self.use_ghost)
 
-
     def get_html(self):
         if self.use_ghost:
             self.url = urljoin("http://", self.url)
@@ -99,7 +99,6 @@ def get_html(self):
         self.page_url = page_url
         return (self.page_html, self.page_url)
 
-
     def get_img_list(self):
         tree = html.fromstring(self.page_html)
         img = tree.xpath('//img/@src')
@@ -135,13 +134,13 @@ def download_image(self, img_url):
         except:
             raise ImageDownloadError()
 
-        if img_url[-3:] == "svg" :
+        if img_url[-3:] == "svg":
             img_content = img_request.content
             with open(os.path.join(self.download_path,  img_url.split('/')[-1]), 'wb') as f:
                 byte_image = bytes(img_content)
                 f.write(byte_image)
 
-        elif int(img_request.headers['content-length']) < self.max_filesize :
+        elif int(img_request.headers['content-length']) < self.max_filesize:
             img_content = img_request.content
             with open(os.path.join(self.download_path,  img_url.split('/')[-1]), 'wb') as f:
                 byte_image = bytes(img_content)
@@ -150,11 +149,9 @@ def download_image(self, img_url):
             raise ImageSizeError(img_request.headers['content-length'])
         return True
 
-
     def process_links(self, links):
         x = []
         for l in links:
             if os.path.splitext(l)[1][1:].strip().lower() in self.format_list:
                     x.append(l)
         return x
-
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 lxml>=3.2.3
 requests>=2.1.0
 future>=0.14.3
+setproctitle>=1.1.8
diff --git a/setup.py b/setup.py
@@ -10,8 +10,8 @@
             extra['use_2to3'] = True
 
 setup(name='ImageScraper',
-    version='2.0.5',
-    install_requires=['lxml', 'requests', 'future'],
+    version='2.0.7',
+    install_requires=[r for r in open('requirements.txt', 'r').read().split('\n') if r],
     author='Anantha Natarajan S',
     author_email='sananthanatarajan12@gmail.com',
     packages=['image_scraper'],