Merge pull request #15 from pypr/minor-fix

BUG: Fix more subtle errors.
pypr · Oct 24, 2018 · bb33e7b · bb33e7b
2 parents 170a9cf + d7c33f7
commit bb33e7b
Show file tree

Hide file tree

Showing 2 changed files with 90 additions and 22 deletions.
diff --git a/automan/automation.py b/automan/automation.py
@@ -103,14 +103,17 @@ def _check_status_of_requires(self, task):
             return 'running'
 
     def _check_status_of_task(self, task):
-        complete = False
-        try:
-            complete = task.complete()
-            self.task_status[task] = 'done' if complete else 'running'
-        except Exception:
-            complete = 'error'
-            self.task_status[task] = 'error'
-        return complete
+        if self.task_status.get(task) == 'not started':
+            return False
+        else:
+            complete = False
+            try:
+                complete = task.complete()
+                self.task_status[task] = 'done' if complete else 'running'
+            except Exception:
+                complete = 'error'
+                self.task_status[task] = 'error'
+            return complete
 
     def _get_tasks_with_status(self, status):
         return [
@@ -127,7 +130,8 @@ def _is_output_registered(self, task):
             self.repeat_tasks.add(task)
             return True
         else:
-            self.task_outputs.add(output_str)
+            if output:
+                self.task_outputs.add(output_str)
             return False
 
     def _run(self, task):
@@ -160,9 +164,9 @@ def _wait_for_running_tasks(self, wait):
             time.sleep(wait)
             running = self._get_tasks_with_status('running')
         errors = self._get_tasks_with_status('error')
-        print("{n_err} jobs had errors.".format(n_err=len(errors)))
-        print("Please fix the issues and re-run.")
-        return len(errors)
+        n_err = len(errors)
+        print("{n_err} jobs had errors.".format(n_err=n_err))
+        return n_err
 
     # #### Public protocol  ##############################################
 
@@ -213,6 +217,8 @@ def run(self, wait=5):
         n_errors = self._wait_for_running_tasks(wait)
         if n_errors == 0:
             print("Finished!")
+        else:
+            print("Please fix the issues and re-run.")
         return n_errors
 
 
@@ -256,17 +262,22 @@ def __init__(self, command, output_dir, job_info=None, depends=None):
         self._job = None
 
     def __str__(self):
-        return ('%s with output directory: %s ' %
-                (self.__class__.__name__, os.path.basename(self.output_dir)))
+        return ('%s, output in: %s ' %
+                (self.__class__.__name__, self.output_dir))
 
     # #### Public protocol ###########################################
 
     def complete(self):
         """Should return True/False indicating success of task.
         """
         job_proxy = self.job_proxy
-        if job_proxy is None or self._finished:
+        if job_proxy is None:
             return self._is_done()
+        elif self._finished:
+            if os.path.exists(self._error_status_file):
+                raise RuntimeError(
+                    'Error in task with output in %s.' % self.output_dir
+                )
         else:
             return self._copy_output_and_check_status()
 
@@ -382,11 +393,18 @@ def _is_done(self):
         """
         if not os.path.exists(self.output_dir):
             return False
-        info_fname = self._get_info_filename()
-        if not info_fname or not os.path.exists(info_fname):
+        job_status = self.job.status()
+        if job_status == 'error':
+            # If job information exists, it trumps everything else
+            # as it stores the process exit status which is usually
+            # a much better indicator of the job status.
             return False
-        d = json.load(open(info_fname))
-        return d.get('completed')
+        else:
+            info_fname = self._get_info_filename()
+            if not info_fname or not os.path.exists(info_fname):
+                return False
+            d = json.load(open(info_fname))
+            return d.get('completed')
 
     def _get_info_filename(self):
         files = glob.glob(os.path.join(self.output_dir, '*.info'))
@@ -836,6 +854,12 @@ def _make_task(self, obj):
     def __str__(self):
         return 'Problem named %s' % self.problem.get_name()
 
+    def complete(self):
+        if len(self.match) == 0:
+            return super(SolveProblem, self).complete()
+        else:
+            return all(r.complete() for r in self.requires())
+
     def output(self):
         return self.problem.get_outputs()
 

diff --git a/automan/tests/test_automation.py b/automan/tests/test_automation.py
@@ -164,6 +164,47 @@ def test_task_runner_checks_for_error_in_running_tasks(self, m_t_cores):
         self.assertTrue(os.path.exists(ct2_dir))
         self.assertFalse(os.path.exists(ct1_dir))
 
+    @mock.patch('automan.jobs.total_cores', return_value=2)
+    def test_task_runner_doesnt_block_on_problem_with_error(self, m_t_cores):
+        # Given
+        class A(Problem):
+            def get_requires(self):
+                cmd = ('python -c "import sys, time; time.sleep(0.1); '
+                       'sys.exit(1)"')
+                ct = CommandTask(cmd, output_dir=self.input_path())
+                return [('task1', ct)]
+
+            def run(self):
+                self.make_output_dir()
+
+        s = self._make_scheduler()
+
+        # When
+        task = RunAll(
+            simulation_dir=self.sim_dir, output_dir=self.output_dir,
+            problem_classes=[A]
+        )
+        t = TaskRunner(tasks=[task], scheduler=s)
+        n_error = t.run(wait=0.1)
+
+        # Then
+        self.assertTrue(n_error > 0)
+
+        # For the case where there is a match expression
+        # When
+        problem = A(self.sim_dir, self.output_dir)
+        problem.clean()
+
+        task = RunAll(
+            simulation_dir=self.sim_dir, output_dir=self.output_dir,
+            problem_classes=[A], match='*task1'
+        )
+        t = TaskRunner(tasks=[task], scheduler=s)
+        n_error = t.run(wait=0.1)
+
+        # Then
+        self.assertTrue(n_error > 0)
+
     def test_task_runner_does_not_add_repeated_tasks(self):
         # Given
         s = self._make_scheduler()
@@ -459,7 +500,7 @@ def test_job_with_error_is_handled_correctly(self):
             if s == 'done':
                 self.assertTrue(t.complete())
             if s == 'error':
-                self.assertFalse(t.complete())
+                self.assertRaises(RuntimeError, t.complete)
 
 
 class TestCommandTask(TestAutomationBase):
@@ -518,13 +559,16 @@ def test_command_tasks_handles_errors_correctly(self):
             pass
 
         # Then
-        self.assertFalse(t.complete())
+        self.assertRaises(RuntimeError, t.complete)
         self.assertEqual(t.job_proxy.status(), 'error')
 
         # A new command task should still detect that the run failed, even
-        # though the output directory exists.
+        # though the output directory exists.  In this case, it should
+        # return False and not raise an error.
+
         # Given
         t = CommandTask(cmd, output_dir=self.sim_dir)
+
         # When/Then
         self.assertFalse(t.complete())