From 0821c2a6fafee39c253f5338a2c1e47868cc2950 Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <karakasis@cscs.ch>
Date: Mon, 9 Sep 2019 22:33:23 +0200
Subject: [PATCH 1/4] Improve and extend topological sort of test cases

- Algorithm's complexity is now O(V+E)
- Return additional information that will help execution policies in executing
  the test cases. The nodes per level are returned as well as the nodes arranged
  per level that can be safely cleaned up.
---
 reframe/frontend/dependency.py | 112 +++++++++++++++------------------
 unittests/test_policies.py     |   2 +-
 2 files changed, 53 insertions(+), 61 deletions(-)

diff --git a/reframe/frontend/dependency.py b/reframe/frontend/dependency.py
index a1f1ffbfee..7084d4981e 100644
--- a/reframe/frontend/dependency.py
+++ b/reframe/frontend/dependency.py
@@ -145,69 +145,61 @@ def _reverse_deps(graph):
 
 
 def toposort(graph):
-    # NOTES on implementation:
-    #
-    # 1. This function assumes a directed acyclic graph.
-    # 2. The purpose of this function is to topologically sort the test cases,
-    #    not only the tests. However, since we do not allow cycles between
-    #    tests in any case (even if this could be classified a
-    #    pseudo-dependency), we first do a topological sort of the tests and we
-    #    subsequently sort the test cases by partition and by programming
-    #    environment.
-    # 3. To achieve this 3-step sorting with a single sort operations, we rank
-    #    the test cases by associating them with an integer key based on the
-    #    result of the topological sort of the tests and by choosing an
-    #    arbitrary ordering of the partitions and the programming environment.
-
     test_deps = _reduce_deps(graph)
     rev_deps  = _reverse_deps(test_deps)
+    levels = {}
+    visited = util.OrderedSet()
+
+    def dfs_visit(node, lvl):
+        lvl += 1
+        try:
+            levels[node] = max(levels[node], lvl)
+        except KeyError:
+            levels[node] = lvl
+
+        for adj in rev_deps[node]:
+            if adj not in visited:
+                dfs_visit(adj, lvl)
+
+        visited.add(node)
 
-    # We do a BFS traversal from each root
-    visited = {}
     roots = set(t for t, deps in test_deps.items() if not deps)
     for r in roots:
-        unvisited = util.OrderedSet([r])
-        visited[r] = util.OrderedSet()
-        while unvisited:
-            # Next node is one whose all dependencies are already visited
-            # FIXME: This makes sorting's complexity O(V^2)
-            node = None
-            for n in unvisited:
-                if test_deps[n] <= visited[r]:
-                    node = n
-                    break
-
-            # If node is None, graph has a cycle and this is a bug; this
-            # function assumes acyclic graphs only
-            assert node is not None
-
-            unvisited.remove(node)
-            adjacent = rev_deps[node]
-            unvisited |= util.OrderedSet(
-                n for n in adjacent if n not in visited
-            )
-            visited[r].add(node)
-
-    # Combine all individual sequences into a single one
-    ordered_tests = util.OrderedSet()
-    for tests in visited.values():
-        ordered_tests |= tests
-
-    # Get all partitions and programming environments from test cases
-    partitions = util.OrderedSet()
-    environs = util.OrderedSet()
+        dfs_visit(r, -1)
+
+    # Group by level number
+    nodes_per_level = {}
+    for node, lvl in levels.items():
+        try:
+            nodes_per_level[lvl].append(node)
+        except KeyError:
+            nodes_per_level[lvl] = [node]
+
+    # Index test cases by test name
+    cases_by_name = {}
     for c in graph.keys():
-        partitions.add(c.partition.fullname)
-        environs.add(c.environ.name)
-
-    # Rank test cases; we first need to calculate the base for the rank number
-    base = max(len(partitions), len(environs)) + 1
-    ranks = {}
-    for i, test in enumerate(ordered_tests):
-        for j, part in enumerate(partitions):
-            for k, env in enumerate(environs):
-                ranks[test, part, env] = i*base**2 + j*base + k
-
-    return sorted(graph.keys(),
-                  key=lambda x: ranks[x.check.name,
-                                      x.partition.fullname, x.environ.name])
+        try:
+            cases_by_name[c.check.name].append(c)
+        except KeyError:
+            cases_by_name[c.check.name] = [c]
+
+    # Now arrange the test cases based on the topologically sorted tests
+    ret = []
+    for lvl, nodes in nodes_per_level.items():
+        for n in nodes:
+            ret += cases_by_name[n]
+
+    # Assign nodes to the levels that they can be safely cleaned up
+    cleanup_seq = {}
+    for node, deps in rev_deps.items():
+        if deps:
+            lvl = max(levels[n] for n in deps)
+        else:
+            lvl = levels[node]
+
+        try:
+            cleanup_seq[lvl].append(node)
+        except KeyError:
+            cleanup_seq[lvl] = [node]
+
+    return ret, nodes_per_level, cleanup_seq
diff --git a/unittests/test_policies.py b/unittests/test_policies.py
index 0da4acde96..b26db4aff9 100644
--- a/unittests/test_policies.py
+++ b/unittests/test_policies.py
@@ -742,7 +742,7 @@ def test_toposort(self):
             executors.generate_testcases([t0, t1, t2, t3, t4,
                                           t5, t6, t7, t8])
         )
-        cases = dependency.toposort(deps)
+        cases, _, _ = dependency.toposort(deps)
         cases_order = []
         tests = util.OrderedSet()
         visited_tests = set()

From 6fef25d90128503fe8ea6f91e407b3760324d00d Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <karakasis@cscs.ch>
Date: Mon, 9 Sep 2019 22:48:16 +0200
Subject: [PATCH 2/4] Minor style improvement

---
 unittests/test_policies.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unittests/test_policies.py b/unittests/test_policies.py
index b26db4aff9..7ac76f5fae 100644
--- a/unittests/test_policies.py
+++ b/unittests/test_policies.py
@@ -742,7 +742,7 @@ def test_toposort(self):
             executors.generate_testcases([t0, t1, t2, t3, t4,
                                           t5, t6, t7, t8])
         )
-        cases, _, _ = dependency.toposort(deps)
+        cases, *_ = dependency.toposort(deps)
         cases_order = []
         tests = util.OrderedSet()
         visited_tests = set()

From d3acbc146b2d16c124aae8eba0258d56f7c27c8b Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <karakasis@cscs.ch>
Date: Wed, 11 Sep 2019 00:04:05 +0200
Subject: [PATCH 3/4] Simplify topological sort algorithm and fix a bug in its
 impl

- The algorithm is just a DFS visit of the nodes. The trick is to mark a node as
  visited after all of its outcoming edges have been explored.
- The algorithm operates directly on the test dependencies and does not
  construct the reverse dependency graph.
---
 reframe/frontend/dependency.py | 70 +++++++---------------------------
 unittests/test_policies.py     |  2 +-
 2 files changed, 15 insertions(+), 57 deletions(-)

diff --git a/reframe/frontend/dependency.py b/reframe/frontend/dependency.py
index 7084d4981e..982da42bf0 100644
--- a/reframe/frontend/dependency.py
+++ b/reframe/frontend/dependency.py
@@ -2,6 +2,9 @@
 # Test case graph functionality
 #
 
+import collections
+import itertools
+
 import reframe as rfm
 import reframe.utility as util
 from reframe.core.exceptions import DependencyError
@@ -46,7 +49,9 @@ def resolve_dep(target, from_map, *args):
     # e stands for environment
     # t stands for target
 
-    graph = {}
+    # We use an ordered dict here, because we need to keep the order of
+    # partitions and environments
+    graph = collections.OrderedDict()
     for c in cases:
         cname = c.check.name
         pname = c.partition.fullname
@@ -131,49 +136,21 @@ def validate_deps(graph):
         sources -= visited
 
 
-def _reverse_deps(graph):
-    ret = {}
-    for n, deps in graph.items():
-        ret.setdefault(n, util.OrderedSet({}))
-        for d in deps:
-            try:
-                ret[d] |= {n}
-            except KeyError:
-                ret[d] = util.OrderedSet({n})
-
-    return ret
-
-
 def toposort(graph):
     test_deps = _reduce_deps(graph)
-    rev_deps  = _reverse_deps(test_deps)
-    levels = {}
     visited = util.OrderedSet()
 
-    def dfs_visit(node, lvl):
-        lvl += 1
-        try:
-            levels[node] = max(levels[node], lvl)
-        except KeyError:
-            levels[node] = lvl
-
-        for adj in rev_deps[node]:
+    def visit(node):
+        # Do a DFS visit of all the adjacent nodes
+        for adj in test_deps[node]:
             if adj not in visited:
-                dfs_visit(adj, lvl)
+                visit(adj)
 
         visited.add(node)
 
-    roots = set(t for t, deps in test_deps.items() if not deps)
-    for r in roots:
-        dfs_visit(r, -1)
-
-    # Group by level number
-    nodes_per_level = {}
-    for node, lvl in levels.items():
-        try:
-            nodes_per_level[lvl].append(node)
-        except KeyError:
-            nodes_per_level[lvl] = [node]
+    for r in test_deps.keys():
+        if r not in visited:
+            visit(r)
 
     # Index test cases by test name
     cases_by_name = {}
@@ -183,23 +160,4 @@ def dfs_visit(node, lvl):
         except KeyError:
             cases_by_name[c.check.name] = [c]
 
-    # Now arrange the test cases based on the topologically sorted tests
-    ret = []
-    for lvl, nodes in nodes_per_level.items():
-        for n in nodes:
-            ret += cases_by_name[n]
-
-    # Assign nodes to the levels that they can be safely cleaned up
-    cleanup_seq = {}
-    for node, deps in rev_deps.items():
-        if deps:
-            lvl = max(levels[n] for n in deps)
-        else:
-            lvl = levels[node]
-
-        try:
-            cleanup_seq[lvl].append(node)
-        except KeyError:
-            cleanup_seq[lvl] = [node]
-
-    return ret, nodes_per_level, cleanup_seq
+    return list(itertools.chain(*(cases_by_name[n] for n in visited)))
diff --git a/unittests/test_policies.py b/unittests/test_policies.py
index 7ac76f5fae..0da4acde96 100644
--- a/unittests/test_policies.py
+++ b/unittests/test_policies.py
@@ -742,7 +742,7 @@ def test_toposort(self):
             executors.generate_testcases([t0, t1, t2, t3, t4,
                                           t5, t6, t7, t8])
         )
-        cases, *_ = dependency.toposort(deps)
+        cases = dependency.toposort(deps)
         cases_order = []
         tests = util.OrderedSet()
         visited_tests = set()

From 18904fee89e57bc56a1b791bf944c516e1dbedeb Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <karakasis@cscs.ch>
Date: Sat, 14 Sep 2019 18:57:28 +0200
Subject: [PATCH 4/4] Add path detection to the topological sort

Passing cyclic graphs is a bug, so we just raise and AssertionError
---
 reframe/frontend/dependency.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/reframe/frontend/dependency.py b/reframe/frontend/dependency.py
index 982da42bf0..192ccb7424 100644
--- a/reframe/frontend/dependency.py
+++ b/reframe/frontend/dependency.py
@@ -140,17 +140,23 @@ def toposort(graph):
     test_deps = _reduce_deps(graph)
     visited = util.OrderedSet()
 
-    def visit(node):
+    def visit(node, path):
+        # We assume an acyclic graph
+        assert node not in path
+
+        path.add(node)
+
         # Do a DFS visit of all the adjacent nodes
         for adj in test_deps[node]:
             if adj not in visited:
-                visit(adj)
+                visit(adj, path)
 
+        path.pop()
         visited.add(node)
 
     for r in test_deps.keys():
         if r not in visited:
-            visit(r)
+            visit(r, util.OrderedSet())
 
     # Index test cases by test name
     cases_by_name = {}