Permalink
Browse files

Update to allow custodian to set polling_time_step and monitor_freq.

  • Loading branch information...
1 parent 8ecac88 commit 1d32dbd4e32e4aebcdbfbac94f32da8639f2466d @shyuep shyuep committed Apr 3, 2013
Showing with 921 additions and 397 deletions.
  1. +51 −8 custodian/custodian.py
  2. BIN docs/_build/doctrees/changelog.doctree
  3. BIN docs/_build/doctrees/custodian.ansible.doctree
  4. BIN docs/_build/doctrees/custodian.doctree
  5. BIN docs/_build/doctrees/custodian.vasp.doctree
  6. BIN docs/_build/doctrees/environment.pickle
  7. BIN docs/_build/doctrees/index.doctree
  8. BIN docs/_build/doctrees/modules.doctree
  9. +2 −2 docs/_build/html/.buildinfo
  10. +7 −6 docs/_build/html/_modules/custodian/ansible/actions.html
  11. +7 −6 docs/_build/html/_modules/custodian/ansible/intepreter.html
  12. +105 −25 docs/_build/html/_modules/custodian/custodian.html
  13. +116 −6 docs/_build/html/_modules/custodian/vasp/handlers.html
  14. +8 −7 docs/_build/html/_modules/custodian/vasp/jobs.html
  15. +7 −6 docs/_build/html/_modules/index.html
  16. +6 −0 docs/_build/html/_sources/index.txt
  17. +1 −1 docs/_build/html/_static/basic.css
  18. +14 −2 docs/_build/html/_static/doctools.js
  19. +154 −4 docs/_build/html/_static/jquery.js
  20. +1 −1 docs/_build/html/_static/nature.css
  21. +148 −210 docs/_build/html/_static/searchtools.js
  22. +21 −29 docs/_build/html/_static/underscore.js
  23. +1 −1 docs/_build/html/_static/websupport.js
  24. +8 −7 docs/_build/html/changelog.html
  25. +8 −7 docs/_build/html/custodian.ansible.html
  26. +37 −12 docs/_build/html/custodian.html
  27. +89 −7 docs/_build/html/custodian.vasp.html
  28. +83 −13 docs/_build/html/genindex.html
  29. +22 −12 docs/_build/html/index.html
  30. +8 −7 docs/_build/html/modules.html
  31. BIN docs/_build/html/objects.inv
  32. +8 −7 docs/_build/html/py-modindex.html
  33. +8 −9 docs/_build/html/search.html
  34. +1 −1 docs/_build/html/searchindex.js
  35. BIN docs/{ → _static}/favicon.ico
  36. +0 −1 fabfile.py
@@ -29,13 +29,42 @@ class Custodian(object):
1. Let's say you have defined a list of jobs as [job1, job2, job3, ...] and
you have defined a list of possible error handlers as [err1, err2, ...]
- 2. Custodian will run the jobs in the order of job1, job2, ...
+ 2. Custodian will run the jobs in the order of job1, job2, ... During each
+ job, custodian will monitor for errors using the handlers that have
+ is_monitor == True. If an error is detected, corrective measures are
+ taken and the particular job is rerun.
3. At the end of each individual job, Custodian will run through the list
- error handlers. If an error is detected, corrective measures are taken
- and the particular job is rerun.
+ error handlers that have is_monitor == False. If an error is detected,
+ corrective measures are taken and the particular job is rerun.
+
+ .. attribute: max_errors
+
+ Maximum number of errors allowed.
+
+ .. attribute: handlers
+
+ Error handlers that are not Monitors.
+
+ .. attribute: monitors
+
+ Error handlers that are Monitors, i.e., handlers that monitors a job
+ as it is being run.
+
+ .. attribute: polling_time_step
+
+ The length of time in seconds between steps in which a job is
+ checked for completion.
+
+ .. attribute: monitor_freq
+
+ The number of polling steps before monitoring occurs. For example,
+ if you have a polling_time_step of 10seconds and a monitor_freq of
+ 30, this means that Custodian uses the monitors to check for errors
+ every 30 x 10 = 300 seconds, i.e., 5 minutes.
"""
- def __init__(self, handlers, jobs, max_errors=1):
+ def __init__(self, handlers, jobs, max_errors=1, polling_time_step=10,
+ monitor_freq=30):
"""
Args:
handlers:
@@ -46,11 +75,22 @@ def __init__(self, handlers, jobs, max_errors=1):
double-relaxation.
max_errors:
Maximum number of errors allowed before exiting.
+ polling_time_step:
+ The length of time in seconds between steps in which a
+ job is checked for completion. Defaults to 10 seconds.
+ monitor_freq:
+ The number of polling steps before monitoring occurs. For
+ example, if you have a polling_time_step of 10seconds and a
+ monitor_freq of 30, this means that Custodian uses the
+ monitors to check for errors every 30 x 10 = 300 seconds,
+ i.e., 5 minutes.
"""
self.max_errors = max_errors
self.jobs = jobs
self.handlers = filter(lambda x: not x.is_monitor, handlers)
self.monitors = filter(lambda x: x.is_monitor, handlers)
+ self.polling_time_step = polling_time_step
+ self.monitor_freq = monitor_freq
def run(self):
"""
@@ -67,8 +107,7 @@ def run(self):
for attempt in xrange(self.max_errors):
logging.info(
"Starting job no. {} ({}) attempt no. {}. Errors thus far"
- " = {}.".format(i + 1, job.name, attempt + 1,
- total_errors))
+ " = {}.".format(i + 1, job.name, attempt + 1, total_errors))
# If this is the start of the job, do the setup.
if not run_log[-1]["corrections"]:
@@ -79,15 +118,17 @@ def run(self):
# corrections.
error = False
+ # While the job is running, we use the handlers that are
+ # monitors to monitor the job.
if isinstance(p, subprocess.Popen):
if self.monitors:
n = 0
while True:
n += 1
- time.sleep(10)
+ time.sleep(self.polling_time_step)
if p.poll() is not None:
break
- if n % 30 == 0:
+ if n % self.monitor_freq == 0:
for h in self.monitors:
if h.check():
p.terminate()
@@ -99,6 +140,8 @@ def run(self):
else:
p.wait()
+ # If there are no errors *during* the run, we now check for
+ # errors *after* the run using handlers that are not monitors.
if not error:
for h in self.handlers:
if h.check():
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 2e05ab5fe7ad09614b28c350746b3bc5
-tags: a205e9ed8462ae86fdd2f73488852ba9
+config: 5cb0be43734da8edda5b762e55bd67d5
+tags: fbb0d17656682115ca4d033fb2f83ba1
@@ -1,3 +1,4 @@
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@@ -6,15 +7,15 @@
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
- <title>custodian.ansible.actions &mdash; custodian 0.3.3 documentation</title>
+ <title>custodian.ansible.actions &mdash; custodian 0.3.4b documentation</title>
<link rel="stylesheet" href="../../../_static/proBlue.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '../../../',
- VERSION: '0.3.3',
+ VERSION: '0.3.4b',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true
@@ -24,7 +25,7 @@
<script type="text/javascript" src="../../../_static/underscore.js"></script>
<script type="text/javascript" src="../../../_static/doctools.js"></script>
<link rel="shortcut icon" href="../../../_static/favicon.ico"/>
- <link rel="top" title="custodian 0.3.3 documentation" href="../../../index.html" />
+ <link rel="top" title="custodian 0.3.4b documentation" href="../../../index.html" />
<link rel="up" title="Module code" href="../../index.html" />
<script type="text/javascript">
@@ -44,7 +45,7 @@
<li class="right" >
<a href="../../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
- <li><a href="../../../index.html">custodian 0.3.3 documentation</a> &raquo;</li>
+ <li><a href="../../../index.html">custodian 0.3.4b documentation</a> &raquo;</li>
<li><a href="../../index.html" accesskey="U">Module code</a> &raquo;</li>
</ul>
</div>
@@ -314,14 +315,14 @@
<li class="right" >
<a href="../../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
- <li><a href="../../../index.html">custodian 0.3.3 documentation</a> &raquo;</li>
+ <li><a href="../../../index.html">custodian 0.3.4b documentation</a> &raquo;</li>
<li><a href="../../index.html" >Module code</a> &raquo;</li>
</ul>
</div>
<div class="footer">
&copy; Copyright 2013, Shyue Ping Ong.
- Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.3.
</div>
<div class="footer">This page uses <a href="http://analytics.google.com/">
Google Analytics</a> to collect statistics. You can disable it by blocking
@@ -1,3 +1,4 @@
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@@ -6,15 +7,15 @@
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
- <title>custodian.ansible.intepreter &mdash; custodian 0.3.3 documentation</title>
+ <title>custodian.ansible.intepreter &mdash; custodian 0.3.4b documentation</title>
<link rel="stylesheet" href="../../../_static/proBlue.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '../../../',
- VERSION: '0.3.3',
+ VERSION: '0.3.4b',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true
@@ -24,7 +25,7 @@
<script type="text/javascript" src="../../../_static/underscore.js"></script>
<script type="text/javascript" src="../../../_static/doctools.js"></script>
<link rel="shortcut icon" href="../../../_static/favicon.ico"/>
- <link rel="top" title="custodian 0.3.3 documentation" href="../../../index.html" />
+ <link rel="top" title="custodian 0.3.4b documentation" href="../../../index.html" />
<link rel="up" title="Module code" href="../../index.html" />
<script type="text/javascript">
@@ -44,7 +45,7 @@
<li class="right" >
<a href="../../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
- <li><a href="../../../index.html">custodian 0.3.3 documentation</a> &raquo;</li>
+ <li><a href="../../../index.html">custodian 0.3.4b documentation</a> &raquo;</li>
<li><a href="../../index.html" accesskey="U">Module code</a> &raquo;</li>
</ul>
</div>
@@ -191,14 +192,14 @@
<li class="right" >
<a href="../../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
- <li><a href="../../../index.html">custodian 0.3.3 documentation</a> &raquo;</li>
+ <li><a href="../../../index.html">custodian 0.3.4b documentation</a> &raquo;</li>
<li><a href="../../index.html" >Module code</a> &raquo;</li>
</ul>
</div>
<div class="footer">
&copy; Copyright 2013, Shyue Ping Ong.
- Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
+ Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.3.
</div>
<div class="footer">This page uses <a href="http://analytics.google.com/">
Google Analytics</a> to collect statistics. You can disable it by blocking
Oops, something went wrong.

0 comments on commit 1d32dbd

Please sign in to comment.