Skip to content
This repository has been archived by the owner on Sep 30, 2021. It is now read-only.

Commit

Permalink
recognize ES node loss, and anticipate resulting unresponsiveness
Browse files Browse the repository at this point in the history
  • Loading branch information
Kyle Lahnakoski committed Aug 18, 2016
1 parent 6777894 commit 560dc70
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions motreeherder/treeherder.py
Expand Up @@ -75,6 +75,9 @@ def _get_job_results_from_th(self, branch, revision):
if str(response.status_code)[0] == b'2':
results = convert.json2value(convert.utf82unicode(response.content)).results
break
elif str(response.status_code)[0] == b'5':
# WE MAY HAVE CRUSHED TH
Log.error(TRY_AGAIN_LATER, reason="HTTP " + unicode(response.status) + " ERROR")
elif response.status_code == 404:
if branch not in ["hg.mozilla.org"]:
Log.warning("{{branch}} rev {{revision}} returns 404 NOT FOUND", branch=branch, revision=revision)
Expand Down Expand Up @@ -289,10 +292,17 @@ def _get_markup_from_es(self, branch, revision, task_id=None, buildername=None,
docs = self.cache.search(query, timeout=600).hits.hits
break
except Exception, e:
if "EsRejectedExecutionException[rejected execution (queue capacity" not in e:
e = Except.wrap(e)
if "NodeNotConnectedException" in e:
# WE LOST A NODE, THIS MAY TAKE A WHILE
Thread.sleep(seconds=Random.int(5 * 60))
continue
elif "EsRejectedExecutionException[rejected execution (queue capacity" in e:
Thread.sleep(seconds=Random.int(30))
continue
else:
Log.warning("Bad ES call, fall back to TH", cause=e)
return None
Thread.sleep(seconds=Random.int(30))

if not docs:
if DEBUG:
Expand Down

0 comments on commit 560dc70

Please sign in to comment.