From 4c7466fbc7b7f0e9fb0a5907f2fb6f74a5f4a8d6 Mon Sep 17 00:00:00 2001 From: culshoefer Date: Sat, 30 Sep 2017 21:24:19 -0400 Subject: [PATCH 1/3] ADDED: First CLI parsing stub --- pyarxiv-cli.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 pyarxiv-cli.py diff --git a/pyarxiv-cli.py b/pyarxiv-cli.py new file mode 100644 index 0000000..18549eb --- /dev/null +++ b/pyarxiv-cli.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +""" +Example usage: +pyarxiv download 1409.6041 1709.1337 +pyarxiv download --target-folder=papers --use-title-for-filename=true --apend-id=true 1501.1729 + +pyarxiv query cat:cs.AI --max-results=3 +--ids=13,14,1 +--title='a new approach' +--authors='Andrej Karpathy' +--abstract='lol' +--journal_ref='a' +max_results=100, ids=[], categories=[], + title='', authors='', abstract='', journal_ref='', + querystring='' + +You can also chain things, e.g.: + +pyarxiv download $(pyarxiv query cat:cs.AI --max-results=50) +""" +from argparse import ArgumentParser + + +def main(): + parser = ArgumentParser("argparse [download | query] -h for help on subcommands") + subparsers = parser.add_subparsers(help='download|query arXiv') + + parser_query = subparsers.add_parser('query', help='query arXiv') + parser_query.add_argument('ids', metavar='N', type=str, nargs='*', help='ids of arXiv papers to download/query') + + parser_download = subparsers.add_parser('download', help='download arXiv.org papers') + parser_download.add_argument('ids', metavar='N', type=str, nargs='*', help='ids of arXiv papers to download/query') + # parser.add_argument('query', help="Query arXiv.org for a certain set of papers") + print(parser.parse_args()) + print(parser_download.parse_args()) + +if __name__ == "__main__": + main() From e61d5c0d2d323abface962053a67951ece3fdbe0 Mon Sep 17 00:00:00 2001 From: culshoefer Date: Sun, 1 Oct 2017 22:58:01 -0400 Subject: [PATCH 2/3] ADDED: Other CLI args --- pyarxiv-cli.py | 82 ++++++++++++++++++++++++++++++++++++++++++--- pyarxiv/__init__.py | 4 +-- 2 files changed, 80 insertions(+), 6 deletions(-) diff --git a/pyarxiv-cli.py b/pyarxiv-cli.py index 18549eb..c2fd4e4 100644 --- a/pyarxiv-cli.py +++ b/pyarxiv-cli.py @@ -5,7 +5,7 @@ pyarxiv download --target-folder=papers --use-title-for-filename=true --apend-id=true 1501.1729 pyarxiv query cat:cs.AI --max-results=3 ---ids=13,14,1 +--ids=13 14 1 --title='a new approach' --authors='Andrej Karpathy' --abstract='lol' @@ -20,19 +20,93 @@ """ from argparse import ArgumentParser +import pyarxiv + + +def progress_callback(elem, exc): + if not exc is None: + print(exc) + else: + tup = pyarxiv.get_arxiv_id(elem) + i = tup[0] + if not tup[1] is None: + i += 'v' + tup[1] + print('Downloaded %s' % i) + def main(): parser = ArgumentParser("argparse [download | query] -h for help on subcommands") subparsers = parser.add_subparsers(help='download|query arXiv') parser_query = subparsers.add_parser('query', help='query arXiv') + parser_query.set_defaults(which='query') parser_query.add_argument('ids', metavar='N', type=str, nargs='*', help='ids of arXiv papers to download/query') + parser_query.add_argument('--title', '-t', type=str, nargs='?', help='Title of paper') + parser_query.add_argument('--max-results', '-m', type=int, nargs='?', help='Max number of results to fetch') + parser_query.add_argument('--authors', '-au', type=str, nargs='?', help='Title of paper') + parser_query.add_argument('--abstract', '-abs', type=str, nargs='?', help='Abstract of paper') + parser_query.add_argument('--journalref', '-jr', type=str, nargs='?', help='Journal reference of paper') + parser_query.add_argument('--querystring', '-q', type=str, nargs='?', help='Query string') parser_download = subparsers.add_parser('download', help='download arXiv.org papers') + parser_download.set_defaults(which='download') parser_download.add_argument('ids', metavar='N', type=str, nargs='*', help='ids of arXiv papers to download/query') - # parser.add_argument('query', help="Query arXiv.org for a certain set of papers") - print(parser.parse_args()) - print(parser_download.parse_args()) + parser_download.add_argument('--target-folder', '-t', type=str, nargs='?', help='Target folder') + parser_download.add_argument('--use-title-for-filename', '-u', + help='Use title of paper for filename', action='store_true') + parser_download.add_argument('--append-id', '-a', + help='If using use-title-for-filename, append id', action='store_true') + parser_download.add_argument('--silent', '-s', + help='Do not show progress', action='store_true') + + args = parser.parse_args() + + if args.which == 'query': + max_r = 100 + title = '' + authors = '' + abstract = '' + journal_ref = '' + querystring = '' + if not args.max_results is None: + max_r = args.max_results + if not args.title is None: + title = args.title + if not args.authors is None: + authors = args.authors + if not args.abstract is None: + abstract = args.abstract + if not args.journalref is None: + journal_ref = args.journalref + if not args.querystring is None: + querystring = args.querystring + + ids = pyarxiv.query(ids=args.ids, + max_results=max_r, + title=title, + authors=authors, + abstract=abstract, + journal_ref=journal_ref, + querystring=querystring) + tuples = list(map(lambda x: x[0] if x[1] is None else x[0] + 'v' + x[1], + map(lambda x: pyarxiv.get_arxiv_id(x), ids)) + ) + print("\n".join(tuples)) + else: + target = '.' + prog = lambda x, y: id(x) + if not args.target_folder is None: + target = args.target_folder + if not args.silent: + prog = progress_callback + pyarxiv.download_entries(args.ids, + target_folder=target, + use_title_for_filename=args.use_title_for_filename, + append_id=args.append_id, + progress_callback=prog) + # print(parser_query.parse_args()) + # print(parser_download.parse_args()) + if __name__ == "__main__": main() diff --git a/pyarxiv/__init__.py b/pyarxiv/__init__.py index ca17fb2..0c38d91 100644 --- a/pyarxiv/__init__.py +++ b/pyarxiv/__init__.py @@ -24,9 +24,9 @@ def retrieve(url, file): if sys.version_info <= (3, 0): # pragma: no-cover - urllib.retrieve(url, file) + urllib.urlretrieve(url, file) else: - urllib.requests.urlretrieve(url, file) + urllib.request.urlretrieve(url, file) class ArxivQueryError(Exception): From b68e65ced95348096f4d434d2b2f2d5390c0be95 Mon Sep 17 00:00:00 2001 From: culshoefer Date: Tue, 3 Oct 2017 20:01:25 -0400 Subject: [PATCH 3/3] ADDED: setup.py install of CLI --- README.md | 1 - pyarxiv-cli.py => scripts/pyarxiv-cli | 0 setup.py | 5 +++-- 3 files changed, 3 insertions(+), 3 deletions(-) rename pyarxiv-cli.py => scripts/pyarxiv-cli (100%) mode change 100644 => 100755 diff --git a/README.md b/README.md index 8112bab..6ea0606 100644 --- a/README.md +++ b/README.md @@ -11,5 +11,4 @@ TODO - Query the arXiv API (atom feed) in your code - Use enums for arXiv categories - Download papers in your code as PDF - TODO - Do the above in the commandline diff --git a/pyarxiv-cli.py b/scripts/pyarxiv-cli old mode 100644 new mode 100755 similarity index 100% rename from pyarxiv-cli.py rename to scripts/pyarxiv-cli diff --git a/setup.py b/setup.py index 35c576a..70f9512 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from distutils.core import setup, find_packages +from distutils.core import setup with open('requirements.txt') as f: requirements = f.read().splitlines() @@ -11,7 +11,8 @@ description='Python Client Library and CLI client for the ArXiv.org API', author='Christoph Ulshoefer', author_email='c@culshoefer.com', - packages=find_packages(), + packages=['pyarxiv'], url='https://github.com/culshoefer/pyarxiv/', + scripts=['scripts/pyarxiv-cli'], test_suite='tests' )