Skip to content

Commit

Permalink
enable configuration of timeout and number of retries for resource do…
Browse files Browse the repository at this point in the history
…wnload attempts
  • Loading branch information
Mark Allen Matney, Jr committed Oct 2, 2018
1 parent beabd5b commit 956c9ea
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 13 deletions.
10 changes: 10 additions & 0 deletions bin/resync
Expand Up @@ -173,6 +173,12 @@ def main():
opt.add_option('--eval', '-e', action='store_true',
help="output evaluation of source/client synchronization performance... "
"be warned, this is very verbose")
opt.add_option('--tries', '-t', type=int, action='store', metavar='TRIES',
help="set number of tries to TRIES. The default is to retry 20 times, "
"with the exception of fatal errors like \"connection refused\" "
"or \"not found\" (404), which are not retried.")
opt.add_option('--timeout', '-T', type=int, action='store', metavar='SECONDS',
help="set the request timeout for resource downloads to SECONDS seconds")

(args, map) = p.parse_args()

Expand Down Expand Up @@ -227,6 +233,10 @@ def main():
c.max_sitemap_entries = args.max_sitemap_entries
if (args.ignore_failures):
c.ignore_failures = args.ignore_failures
if (args.tries):
c.tries = args.tries
if (args.timeout):
c.timeout = args.timeout

# Links apply to anything that writes sitemaps
links = parse_links(args.link)
Expand Down
45 changes: 32 additions & 13 deletions resync/client.py
Expand Up @@ -60,6 +60,8 @@ def __init__(self, hashes=None, verbose=False, dryrun=False):
self.ignore_failures = False
self.pretty_xml = True
self.fake_input = None
self.tries = 20
self.timeout = None
# Default file names
self.status_file = '.resync-client-status.cfg'
self.default_resource_dump = 'resourcedump.zip'
Expand Down Expand Up @@ -496,19 +498,36 @@ def update_resource(self, resource, filename, change=None):
(resource.uri, filename))
else:
# 1. GET
try:
r = requests.get(resource.uri, stream=True)
with open(filename, 'wb') as fd:
for chunk in r.iter_content(chunk_size=1024):
fd.write(chunk)
num_updated += 1
except IOError as e:
msg = "Failed to GET %s -- %s" % (resource.uri, str(e))
if (self.ignore_failures):
self.logger.warning(msg)
return(num_updated)
else:
raise ClientFatalError(msg)
for try_i in range(1, self.tries + 1):
try:
r = requests.get(resource.uri, timeout=self.timeout, stream=True)
# Fail on 4xx or 5xx
r.raise_for_status()
with open(filename, 'wb') as fd:
for chunk in r.iter_content(chunk_size=1024):
fd.write(chunk)
num_updated += 1
break
except requests.Timeout as e:
if try_i < self.tries:
msg = 'Download timed out, retrying...'
self.logger.info(msg)
# Continue loop
else:
# No more tries left, so fail
msg = "Failed to GET %s after %s tries -- %s" % (resource.uri, self.tries, str(e))
if (self.ignore_failures):
self.logger.warning(msg)
return(num_updated)
else:
raise ClientFatalError(msg)
except (requests.RequestException, IOError) as e:
msg = "Failed to GET %s -- %s" % (resource.uri, str(e))
if (self.ignore_failures):
self.logger.warning(msg)
return(num_updated)
else:
raise ClientFatalError(msg)
# 2. set timestamp if we have one
if (resource.timestamp is not None):
unixtime = int(resource.timestamp) # no fractional
Expand Down

0 comments on commit 956c9ea

Please sign in to comment.