Skip to content

Commit

Permalink
Update to allow custodian to set polling_time_step and monitor_freq.
Browse files Browse the repository at this point in the history
  • Loading branch information
shyuep committed Apr 3, 2013
1 parent 8ecac88 commit 1d32dbd
Show file tree
Hide file tree
Showing 36 changed files with 921 additions and 397 deletions.
59 changes: 51 additions & 8 deletions custodian/custodian.py
Expand Up @@ -29,13 +29,42 @@ class Custodian(object):
1. Let's say you have defined a list of jobs as [job1, job2, job3, ...] and
you have defined a list of possible error handlers as [err1, err2, ...]
2. Custodian will run the jobs in the order of job1, job2, ...
2. Custodian will run the jobs in the order of job1, job2, ... During each
job, custodian will monitor for errors using the handlers that have
is_monitor == True. If an error is detected, corrective measures are
taken and the particular job is rerun.
3. At the end of each individual job, Custodian will run through the list
error handlers. If an error is detected, corrective measures are taken
and the particular job is rerun.
error handlers that have is_monitor == False. If an error is detected,
corrective measures are taken and the particular job is rerun.
.. attribute: max_errors
Maximum number of errors allowed.
.. attribute: handlers
Error handlers that are not Monitors.
.. attribute: monitors
Error handlers that are Monitors, i.e., handlers that monitors a job
as it is being run.
.. attribute: polling_time_step
The length of time in seconds between steps in which a job is
checked for completion.
.. attribute: monitor_freq
The number of polling steps before monitoring occurs. For example,
if you have a polling_time_step of 10seconds and a monitor_freq of
30, this means that Custodian uses the monitors to check for errors
every 30 x 10 = 300 seconds, i.e., 5 minutes.
"""

def __init__(self, handlers, jobs, max_errors=1):
def __init__(self, handlers, jobs, max_errors=1, polling_time_step=10,
monitor_freq=30):
"""
Args:
handlers:
Expand All @@ -46,11 +75,22 @@ def __init__(self, handlers, jobs, max_errors=1):
double-relaxation.
max_errors:
Maximum number of errors allowed before exiting.
polling_time_step:
The length of time in seconds between steps in which a
job is checked for completion. Defaults to 10 seconds.
monitor_freq:
The number of polling steps before monitoring occurs. For
example, if you have a polling_time_step of 10seconds and a
monitor_freq of 30, this means that Custodian uses the
monitors to check for errors every 30 x 10 = 300 seconds,
i.e., 5 minutes.
"""
self.max_errors = max_errors
self.jobs = jobs
self.handlers = filter(lambda x: not x.is_monitor, handlers)
self.monitors = filter(lambda x: x.is_monitor, handlers)
self.polling_time_step = polling_time_step
self.monitor_freq = monitor_freq

def run(self):
"""
Expand All @@ -67,8 +107,7 @@ def run(self):
for attempt in xrange(self.max_errors):
logging.info(
"Starting job no. {} ({}) attempt no. {}. Errors thus far"
" = {}.".format(i + 1, job.name, attempt + 1,
total_errors))
" = {}.".format(i + 1, job.name, attempt + 1, total_errors))

# If this is the start of the job, do the setup.
if not run_log[-1]["corrections"]:
Expand All @@ -79,15 +118,17 @@ def run(self):
# corrections.
error = False

# While the job is running, we use the handlers that are
# monitors to monitor the job.
if isinstance(p, subprocess.Popen):
if self.monitors:
n = 0
while True:
n += 1
time.sleep(10)
time.sleep(self.polling_time_step)
if p.poll() is not None:
break
if n % 30 == 0:
if n % self.monitor_freq == 0:
for h in self.monitors:
if h.check():
p.terminate()
Expand All @@ -99,6 +140,8 @@ def run(self):
else:
p.wait()

# If there are no errors *during* the run, we now check for
# errors *after* the run using handlers that are not monitors.
if not error:
for h in self.handlers:
if h.check():
Expand Down
Binary file modified docs/_build/doctrees/changelog.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/custodian.ansible.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/custodian.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/custodian.vasp.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/_build/doctrees/index.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/modules.doctree
Binary file not shown.
4 changes: 2 additions & 2 deletions docs/_build/html/.buildinfo
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 2e05ab5fe7ad09614b28c350746b3bc5
tags: a205e9ed8462ae86fdd2f73488852ba9
config: 5cb0be43734da8edda5b762e55bd67d5
tags: fbb0d17656682115ca4d033fb2f83ba1
13 changes: 7 additions & 6 deletions docs/_build/html/_modules/custodian/ansible/actions.html
@@ -1,3 +1,4 @@

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

Expand All @@ -6,15 +7,15 @@
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />

<title>custodian.ansible.actions &mdash; custodian 0.3.3 documentation</title>
<title>custodian.ansible.actions &mdash; custodian 0.3.4b documentation</title>

<link rel="stylesheet" href="../../../_static/proBlue.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />

<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '../../../',
VERSION: '0.3.3',
VERSION: '0.3.4b',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true
Expand All @@ -24,7 +25,7 @@
<script type="text/javascript" src="../../../_static/underscore.js"></script>
<script type="text/javascript" src="../../../_static/doctools.js"></script>
<link rel="shortcut icon" href="../../../_static/favicon.ico"/>
<link rel="top" title="custodian 0.3.3 documentation" href="../../../index.html" />
<link rel="top" title="custodian 0.3.4b documentation" href="../../../index.html" />
<link rel="up" title="Module code" href="../../index.html" />

<script type="text/javascript">
Expand All @@ -44,7 +45,7 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li><a href="../../../index.html">custodian 0.3.3 documentation</a> &raquo;</li>
<li><a href="../../../index.html">custodian 0.3.4b documentation</a> &raquo;</li>
<li><a href="../../index.html" accesskey="U">Module code</a> &raquo;</li>
</ul>
</div>
Expand Down Expand Up @@ -314,14 +315,14 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li><a href="../../../index.html">custodian 0.3.3 documentation</a> &raquo;</li>
<li><a href="../../../index.html">custodian 0.3.4b documentation</a> &raquo;</li>
<li><a href="../../index.html" >Module code</a> &raquo;</li>
</ul>
</div>

<div class="footer">
&copy; Copyright 2013, Shyue Ping Ong.
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.3.
</div>
<div class="footer">This page uses <a href="http://analytics.google.com/">
Google Analytics</a> to collect statistics. You can disable it by blocking
Expand Down
13 changes: 7 additions & 6 deletions docs/_build/html/_modules/custodian/ansible/intepreter.html
@@ -1,3 +1,4 @@

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

Expand All @@ -6,15 +7,15 @@
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />

<title>custodian.ansible.intepreter &mdash; custodian 0.3.3 documentation</title>
<title>custodian.ansible.intepreter &mdash; custodian 0.3.4b documentation</title>

<link rel="stylesheet" href="../../../_static/proBlue.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />

<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '../../../',
VERSION: '0.3.3',
VERSION: '0.3.4b',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true
Expand All @@ -24,7 +25,7 @@
<script type="text/javascript" src="../../../_static/underscore.js"></script>
<script type="text/javascript" src="../../../_static/doctools.js"></script>
<link rel="shortcut icon" href="../../../_static/favicon.ico"/>
<link rel="top" title="custodian 0.3.3 documentation" href="../../../index.html" />
<link rel="top" title="custodian 0.3.4b documentation" href="../../../index.html" />
<link rel="up" title="Module code" href="../../index.html" />

<script type="text/javascript">
Expand All @@ -44,7 +45,7 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li><a href="../../../index.html">custodian 0.3.3 documentation</a> &raquo;</li>
<li><a href="../../../index.html">custodian 0.3.4b documentation</a> &raquo;</li>
<li><a href="../../index.html" accesskey="U">Module code</a> &raquo;</li>
</ul>
</div>
Expand Down Expand Up @@ -191,14 +192,14 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li><a href="../../../index.html">custodian 0.3.3 documentation</a> &raquo;</li>
<li><a href="../../../index.html">custodian 0.3.4b documentation</a> &raquo;</li>
<li><a href="../../index.html" >Module code</a> &raquo;</li>
</ul>
</div>

<div class="footer">
&copy; Copyright 2013, Shyue Ping Ong.
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.2b1.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.3.
</div>
<div class="footer">This page uses <a href="http://analytics.google.com/">
Google Analytics</a> to collect statistics. You can disable it by blocking
Expand Down

0 comments on commit 1d32dbd

Please sign in to comment.