Skip to content

Commit

Permalink
Merge pull request #15 from pypr/minor-fix
Browse files Browse the repository at this point in the history
BUG: Fix more subtle errors.
  • Loading branch information
prabhuramachandran committed Oct 24, 2018
2 parents 170a9cf + d7c33f7 commit bb33e7b
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 22 deletions.
62 changes: 43 additions & 19 deletions automan/automation.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,17 @@ def _check_status_of_requires(self, task):
return 'running'

def _check_status_of_task(self, task):
complete = False
try:
complete = task.complete()
self.task_status[task] = 'done' if complete else 'running'
except Exception:
complete = 'error'
self.task_status[task] = 'error'
return complete
if self.task_status.get(task) == 'not started':
return False
else:
complete = False
try:
complete = task.complete()
self.task_status[task] = 'done' if complete else 'running'
except Exception:
complete = 'error'
self.task_status[task] = 'error'
return complete

def _get_tasks_with_status(self, status):
return [
Expand All @@ -127,7 +130,8 @@ def _is_output_registered(self, task):
self.repeat_tasks.add(task)
return True
else:
self.task_outputs.add(output_str)
if output:
self.task_outputs.add(output_str)
return False

def _run(self, task):
Expand Down Expand Up @@ -160,9 +164,9 @@ def _wait_for_running_tasks(self, wait):
time.sleep(wait)
running = self._get_tasks_with_status('running')
errors = self._get_tasks_with_status('error')
print("{n_err} jobs had errors.".format(n_err=len(errors)))
print("Please fix the issues and re-run.")
return len(errors)
n_err = len(errors)
print("{n_err} jobs had errors.".format(n_err=n_err))
return n_err

# #### Public protocol ##############################################

Expand Down Expand Up @@ -213,6 +217,8 @@ def run(self, wait=5):
n_errors = self._wait_for_running_tasks(wait)
if n_errors == 0:
print("Finished!")
else:
print("Please fix the issues and re-run.")
return n_errors


Expand Down Expand Up @@ -256,17 +262,22 @@ def __init__(self, command, output_dir, job_info=None, depends=None):
self._job = None

def __str__(self):
return ('%s with output directory: %s ' %
(self.__class__.__name__, os.path.basename(self.output_dir)))
return ('%s, output in: %s ' %
(self.__class__.__name__, self.output_dir))

# #### Public protocol ###########################################

def complete(self):
"""Should return True/False indicating success of task.
"""
job_proxy = self.job_proxy
if job_proxy is None or self._finished:
if job_proxy is None:
return self._is_done()
elif self._finished:
if os.path.exists(self._error_status_file):
raise RuntimeError(
'Error in task with output in %s.' % self.output_dir
)
else:
return self._copy_output_and_check_status()

Expand Down Expand Up @@ -382,11 +393,18 @@ def _is_done(self):
"""
if not os.path.exists(self.output_dir):
return False
info_fname = self._get_info_filename()
if not info_fname or not os.path.exists(info_fname):
job_status = self.job.status()
if job_status == 'error':
# If job information exists, it trumps everything else
# as it stores the process exit status which is usually
# a much better indicator of the job status.
return False
d = json.load(open(info_fname))
return d.get('completed')
else:
info_fname = self._get_info_filename()
if not info_fname or not os.path.exists(info_fname):
return False
d = json.load(open(info_fname))
return d.get('completed')

def _get_info_filename(self):
files = glob.glob(os.path.join(self.output_dir, '*.info'))
Expand Down Expand Up @@ -836,6 +854,12 @@ def _make_task(self, obj):
def __str__(self):
return 'Problem named %s' % self.problem.get_name()

def complete(self):
if len(self.match) == 0:
return super(SolveProblem, self).complete()
else:
return all(r.complete() for r in self.requires())

def output(self):
return self.problem.get_outputs()

Expand Down
50 changes: 47 additions & 3 deletions automan/tests/test_automation.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,47 @@ def test_task_runner_checks_for_error_in_running_tasks(self, m_t_cores):
self.assertTrue(os.path.exists(ct2_dir))
self.assertFalse(os.path.exists(ct1_dir))

@mock.patch('automan.jobs.total_cores', return_value=2)
def test_task_runner_doesnt_block_on_problem_with_error(self, m_t_cores):
# Given
class A(Problem):
def get_requires(self):
cmd = ('python -c "import sys, time; time.sleep(0.1); '
'sys.exit(1)"')
ct = CommandTask(cmd, output_dir=self.input_path())
return [('task1', ct)]

def run(self):
self.make_output_dir()

s = self._make_scheduler()

# When
task = RunAll(
simulation_dir=self.sim_dir, output_dir=self.output_dir,
problem_classes=[A]
)
t = TaskRunner(tasks=[task], scheduler=s)
n_error = t.run(wait=0.1)

# Then
self.assertTrue(n_error > 0)

# For the case where there is a match expression
# When
problem = A(self.sim_dir, self.output_dir)
problem.clean()

task = RunAll(
simulation_dir=self.sim_dir, output_dir=self.output_dir,
problem_classes=[A], match='*task1'
)
t = TaskRunner(tasks=[task], scheduler=s)
n_error = t.run(wait=0.1)

# Then
self.assertTrue(n_error > 0)

def test_task_runner_does_not_add_repeated_tasks(self):
# Given
s = self._make_scheduler()
Expand Down Expand Up @@ -459,7 +500,7 @@ def test_job_with_error_is_handled_correctly(self):
if s == 'done':
self.assertTrue(t.complete())
if s == 'error':
self.assertFalse(t.complete())
self.assertRaises(RuntimeError, t.complete)


class TestCommandTask(TestAutomationBase):
Expand Down Expand Up @@ -518,13 +559,16 @@ def test_command_tasks_handles_errors_correctly(self):
pass

# Then
self.assertFalse(t.complete())
self.assertRaises(RuntimeError, t.complete)
self.assertEqual(t.job_proxy.status(), 'error')

# A new command task should still detect that the run failed, even
# though the output directory exists.
# though the output directory exists. In this case, it should
# return False and not raise an error.

# Given
t = CommandTask(cmd, output_dir=self.sim_dir)

# When/Then
self.assertFalse(t.complete())

Expand Down

0 comments on commit bb33e7b

Please sign in to comment.