Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added --overwrite-output (-O) option #716

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions scrapy/commands/crawl.py
Expand Up @@ -20,6 +20,8 @@ def add_options(self, parser):
help="set spider argument (may be repeated)")
parser.add_option("-o", "--output", metavar="FILE",
help="dump scraped items into FILE (use - for stdout)")
parser.add_option("-O", "--overwrite-output", metavar="FILE",
help="overwrite scraped items into FILE")
parser.add_option("-t", "--output-format", metavar="FORMAT",
help="format to use for dumping items with -o")

Expand All @@ -29,6 +31,11 @@ def process_options(self, args, opts):
opts.spargs = arglist_to_dict(opts.spargs)
except ValueError:
raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
if opts.overwrite_output:
if opts.output:
raise UsageError("Please use only one of --output and --overwrite-output")
opts.output = opts.overwrite_output
self.settings.overrides['FEED_URI_OVERWRITE'] = True
if opts.output:
if opts.output == '-':
self.settings.overrides['FEED_URI'] = 'stdout:'
Expand Down
7 changes: 7 additions & 0 deletions scrapy/commands/runspider.py
Expand Up @@ -43,6 +43,8 @@ def add_options(self, parser):
help="set spider argument (may be repeated)")
parser.add_option("-o", "--output", metavar="FILE",
help="dump scraped items into FILE (use - for stdout)")
parser.add_option("-O", "--overwrite-output", metavar="FILE",
help="overwrite scraped items into FILE")
parser.add_option("-t", "--output-format", metavar="FORMAT",
help="format to use for dumping items with -o")

Expand All @@ -52,6 +54,11 @@ def process_options(self, args, opts):
opts.spargs = arglist_to_dict(opts.spargs)
except ValueError:
raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
if opts.overwrite_output:
if opts.output:
raise UsageError("Please use only one of --output and --overwrite-output")
opts.output = opts.overwrite_output
self.settings.overrides['FEED_URI_OVERWRITE'] = True
if opts.output:
if opts.output == '-':
self.settings.overrides['FEED_URI'] = 'stdout:'
Expand Down
4 changes: 3 additions & 1 deletion scrapy/contrib/feedexport.py
Expand Up @@ -67,13 +67,15 @@ class FileFeedStorage(object):
implements(IFeedStorage)

def __init__(self, uri):
from scrapy.conf import settings
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

global import of settings is highly discouraged (and soon to be dropped), please switch to from_crawler class method (or similar)

self.path = file_uri_to_path(uri)
self.overwrite = settings['FEED_URI_OVERWRITE']
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FEED_OVERWRITE makes more sense to me, for the setting name.


def open(self, spider):
dirname = os.path.dirname(self.path)
if dirname and not os.path.exists(dirname):
os.makedirs(dirname)
return open(self.path, 'ab')
return open(self.path, 'wb' if self.overwrite else 'ab')

def store(self, file):
file.close()
Expand Down
1 change: 1 addition & 0 deletions scrapy/settings/default_settings.py
Expand Up @@ -119,6 +119,7 @@
}

FEED_URI = None
FEED_URI_OVERWRITE = False
FEED_URI_PARAMS = None # a function to extend uri arguments
FEED_FORMAT = 'jsonlines'
FEED_STORE_EMPTY = False
Expand Down