Permalink
Browse files

initial import

  • Loading branch information...
0 parents commit 4e54b1a74c36ea07a7a3fc3e7092c55e60a92f47 @keul committed Dec 30, 2012
@@ -0,0 +1,3 @@
+*.egg
+src/*.egg-info
+
@@ -0,0 +1,6 @@
+include *.rst
+
+recursive-include docs *
+recursive-include src *
+
+global-exclude *.pyc
@@ -0,0 +1 @@
+TODO
@@ -0,0 +1,5 @@
+Changelog
+=========
+
+0.1 (Unreleased)
+----------------
@@ -0,0 +1,222 @@
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
@@ -0,0 +1,15 @@
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA.
@@ -0,0 +1,3 @@
+[nosetests]
+verbosity=2
+nocapture=1
@@ -0,0 +1,43 @@
+from setuptools import setup, find_packages
+import sys, os
+
+setup(name='Allanon',
+ # scripts=['src/allanon',],
+ version="0.1",
+ description="A crawler for visit a predictable set of URLs, "
+ "and download resources inside visited pages",
+ long_description=open("README.rst").read() + "\n" +
+ open(os.path.join("docs", "HISTORY.txt")).read(),
+ classifiers=["Development Status :: 3 - Alpha",
+ "Intended Audience :: End Users/Desktop",
+ "License :: OSI Approved :: GNU General Public License (GPL)",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python",
+ "Topic :: Utilities",
+ "Topic :: Internet :: WWW/HTTP",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 2.7",
+ ], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
+ keywords='crawler robot spider downloader parser',
+ author='keul',
+ author_email='luca@keul.it',
+ url='https://github.com/keul/Allanon',
+ license='GPL',
+ packages=find_packages('src', exclude=['ez_setup',]),
+ #py_modules=['allanon',],
+ package_dir = {'': 'src'},
+ include_package_data=True,
+ #package_data={'': ['example_profile.cfg', ]},
+ #data_files=[('profiles', ['src/example_profile.cfg'])],
+ tests_require=['nose', 'httpretty',],
+ setup_requires=['nose>=1.0'],
+ test_suite="nose.collector",
+ zip_safe=False,
+ install_requires=[
+ 'setuptools',
+ 'pyquery',
+ 'requests>=1.0.0',
+ ],
+ entry_points={'console_scripts': ['allanon = allanon.main:main', ]}
+ )
+
@@ -0,0 +1,3 @@
+import logging
+
+logger = logging.getLogger("Allanon")
@@ -0,0 +1,75 @@
+# -*- coding: utf8 -*-
+
+import os
+
+from optparse import OptionParser
+
+from allanon.url_generator import generate
+from allanon.resouce_grabber import ResourceGrabber
+
+VERSION = "0.1"
+DESCRIPTION = """
+Crawl a replicable set of URLs, then download resources from them.
+
+URLs can be composed by a variable range(s) like:
+
+ http://foo.org/{1:10}/page?section={1:4}
+
+This will make this utility to crawl through a set of URLs like this:
+
+ http://foo.org/1/page?section=1
+ http://foo.org/1/page?section=2
+ ...
+ http://foo.org/2/page?section=1
+ ...
+ http://foo.org/10/page?section=4
+
+""".strip()
+
+parser = OptionParser(usage="Usage: %prog [option, ...] urls_model [urls_model, ...]",
+ version="%prog " + VERSION,
+ description=DESCRIPTION,
+ prog="allanon")
+
+parser.remove_option("--help")
+parser.add_option('--help', '-h',
+ action="store_true", default=False,
+ help='show this help message and exit')
+
+parser.add_option('--search', '-s', dest="search", default=None,
+ help="Look for this resource inside every URLs and download it.\n"
+ "See the pyquery documentation for more info about the query "
+ "format (http://packages.python.org/pyquery/).")
+
+parser.add_option('--directory', '-d', dest="destination_directory", default=os.getcwd(),
+ metavar="TARGET_DIR",
+ help="Directory where to store all resources that will be downloaded.\n"
+ "Default if the current directory")
+
+
+def get_urls(raw_urls):
+ for raw_url in raw_urls:
+ for url in generate(raw_url):
+ yield url
+
+
+def main():
+ options, args = parser.parse_args()
+
+ if len(args)<1 or options.help:
+ # personal version of the help, to being able to keep \n in description
+ result = []
+ result.append(parser.get_usage())
+ result.append(DESCRIPTION+"\n")
+ result.append(parser.format_option_help(parser.formatter))
+ print "\n".join(result)
+
+ urls = get_urls(args)
+ for url in urls:
+ rg = ResourceGrabber(url)
+ if not options.search:
+ rg.download(options.destination_directory)
+
+
+if __name__ == '__main__':
+ main()
Oops, something went wrong.

0 comments on commit 4e54b1a

Please sign in to comment.