Modify connectome-based benchmark scripts to enable varying the numbe…

…r of LPUs from Shih et al. used in the benchmark taken in order of their maximal contrib to total number of connections.
neurokernel · Oct 2, 2015 · 9af04bd · 9af04bd
1 parent eff0259
commit 9af04bd
Show file tree

Hide file tree

Showing 3 changed files with 125 additions and 8 deletions.
diff --git a/examples/timing_connectome/get_index_order.py b/examples/timing_connectome/get_index_order.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+
+"""
+Given a matrix of integers x that represent the number of connections between row
+i and column j (where x[i, i] = 0) and starting with the indices i and j for 
+which x[i, j]+x[j, i] is maximal over all (i, j), order the pairs of indices 
+(i, j) such that each x[i_curr, j_curr]+x[j_curr, i_curr] increases the total
+sum of x[i, j]+x[j, i] over already seen indices by the maximum amount possible
+for the remaining index pairs.
+"""
+
+import itertools
+
+import numpy as np
+
+def interleave(a, b):
+    """
+    Interleave elements of two lists of equal length.
+    """
+
+    return list(itertools.chain.from_iterable(itertools.izip(a, b)))
+
+def get_index_order(x):
+    x = np.asarray(x)
+    assert x.shape[0] == x.shape[1]
+    N = x.shape[0]
+
+    # Find indices (i, j) of elements x[i, j], x[j, i] such that x[i, j]+x[j, i] is
+    # maximized (elements x[i, i] are ignored):
+    first = [(i, j) for i, j in itertools.product(xrange(N), xrange(N)) if i > j]
+    second = [(j, i) for i, j in itertools.product(xrange(N), xrange(N)) if i > j]
+
+    inds = interleave(first, second)
+    x_ordered = [x[i, j] for i, j in inds]
+    x_ordered_summed = [a+b for a, b in zip(x_ordered[::2], x_ordered[1::2])]
+    i_ordered_summed_max = np.argmax(x_ordered_summed)
+    i_ordered_max = i_ordered_summed_max*2
+    ind_max = inds[i_ordered_max]
+
+    # Indices already added:
+    added_inds = [ind_max[0], ind_max[1]]
+
+    # Remaining indices to consider:
+    remaining_inds = range(N)
+    for ind in added_inds:
+        remaining_inds.remove(ind)
+
+    while remaining_inds:
+
+        # For each remaining index i and each index j already added, compute
+        # increase due to adding values x[i, j]+x[j, i]:
+        sums = []
+        for i in remaining_inds:
+            s = sum([x[i, j]+x[j, i] for j in added_inds])
+            sums.append(s)
+
+        # Add the index corresponding to the maximum increase to added_inds and
+        # remove it from remaining_inds:
+        i_max = np.argmax(sums)
+        added_inds.append(remaining_inds[i_max])
+        del remaining_inds[i_max]
+    return np.asarray(added_inds)
+
+if __name__ == '__main__':
+    # Create example matrix:
+    N = 10
+    x = np.random.randint(0, 20, (N, N))
+    for i in xrange(N):
+        x[i, i] = 0
+
+    add_inds = get_index_order(x)
diff --git a/examples/timing_connectome/timing_connectome_demo_gpu.py b/examples/timing_connectome/timing_connectome_demo_gpu.py
@@ -507,10 +507,11 @@ def emulate(conn_mat, scaling, n_gpus, steps, use_mps, cache_file='cache.db'):
 
 if __name__ == '__main__':
     import neurokernel.mpi_relaunch
+    import get_index_order
 
     conn_mat_file = 's2.xlsx'
     scaling = 1
-    max_steps = 50
+    max_steps = 500
     n_gpus = 4
     use_mps = False
 
@@ -526,6 +527,8 @@ def emulate(conn_mat, scaling, n_gpus, steps, use_mps, cache_file='cache.db'):
                         help='Connection number scaling factor [default: %s]' % scaling)
     parser.add_argument('-m', '--max_steps', default=max_steps, type=int,
                         help='Maximum number of steps [default: %s]' % max_steps)
+    parser.add_argument('-u', '--lpus', default=n_gpus, type=int,
+                        help='Number of LPUs [default: %s]' % n_gpus)
     parser.add_argument('-g', '--gpus', default=n_gpus, type=int,
                         help='Number of GPUs [default: %s]' % n_gpus)
     parser.add_argument('-p', '--use_mps', action='store_true',
@@ -542,7 +545,27 @@ def emulate(conn_mat, scaling, n_gpus, steps, use_mps, cache_file='cache.db'):
                           mpi_comm=MPI.COMM_WORLD,
                           multiline=True)
 
-    conn_mat = pd.read_excel('s2.xlsx',
-                             sheetname='Connectivity Matrix').astype(int).as_matrix()
+    df = pd.read_excel('s2.xlsx',
+                       sheetname='Connectivity Matrix')
 
-    print (args.gpus,)+emulate(conn_mat, args.scaling, args.gpus, args.max_steps, args.use_mps)
+    # Select only main LPUs in olfaction, vision, and central complex systems:
+    lpu_list = ['AL', 'al', 'MB', 'mb', 'LH', 'lh', 'MED', 'med', 'LOB', 'lob',
+                'LOP', 'lop', 'OG', 'og', 'EB', 'FB', 'NOD', 'nod', 'PCB']
+    conn_mat = df.ix[lpu_list][lpu_list].astype(int).as_matrix()
+
+    # Get order in which LPUs (denoted by index into `conn_mat`) should be added
+    # to maximize added number of ports for each additional LPU:
+    ind_order = get_index_order.get_index_order(conn_mat)
+
+    # Make sure specified number of LPUs to partition over GPUs is at least as
+    # large as the number of GPUs and no larger than the list of allowed LPUs
+    # above:
+    assert args.lpus >= args.gpus
+    assert args.lpus <= len(lpu_list)
+
+    # Select which LPUs to partition over GPUs:
+    conn_mat_sub = conn_mat[ind_order[:args.lpus, None], ind_order[:args.lpus]]
+
+    print (args.gpus, args.lpus)+emulate(conn_mat_sub, args.scaling,
+                                         args.gpus, args.max_steps,
+                                         args.use_mps)
diff --git a/examples/timing_connectome/timing_connectome_demo_gpu_slow.py b/examples/timing_connectome/timing_connectome_demo_gpu_slow.py
@@ -518,10 +518,11 @@ def emulate(conn_mat, scaling, n_gpus, steps, use_mps, cache_file='cache.db'):
 
 if __name__ == '__main__':
     import neurokernel.mpi_relaunch
+    import get_index_order
 
     conn_mat_file = 's2.xlsx'
     scaling = 1
-    max_steps = 50
+    max_steps = 500
     n_gpus = 4
     use_mps = False
 
@@ -537,6 +538,8 @@ def emulate(conn_mat, scaling, n_gpus, steps, use_mps, cache_file='cache.db'):
                         help='Connection number scaling factor [default: %s]' % scaling)
     parser.add_argument('-m', '--max_steps', default=max_steps, type=int,
                         help='Maximum number of steps [default: %s]' % max_steps)
+    parser.add_argument('-u', '--lpus', default=n_gpus, type=int,
+                        help='Number of LPUs [default: %s]' % n_gpus)
     parser.add_argument('-g', '--gpus', default=n_gpus, type=int,
                         help='Number of GPUs [default: %s]' % n_gpus)
     parser.add_argument('-p', '--use_mps', action='store_true',
@@ -553,7 +556,27 @@ def emulate(conn_mat, scaling, n_gpus, steps, use_mps, cache_file='cache.db'):
                           mpi_comm=MPI.COMM_WORLD,
                           multiline=True)
 
-    conn_mat = pd.read_excel('s2.xlsx',
-                             sheetname='Connectivity Matrix').astype(int).as_matrix()
+    df = pd.read_excel('s2.xlsx',
+                       sheetname='Connectivity Matrix')
 
-    print (args.gpus,)+emulate(conn_mat, args.scaling, args.gpus, args.max_steps, args.use_mps)
+    # Select only main LPUs in olfaction, vision, and central complex systems:
+    lpu_list = ['AL', 'al', 'MB', 'mb', 'LH', 'lh', 'MED', 'med', 'LOB', 'lob',
+                'LOP', 'lop', 'OG', 'og', 'EB', 'FB', 'NOD', 'nod', 'PCB']
+    conn_mat = df.ix[lpu_list][lpu_list].astype(int).as_matrix()
+
+    # Get order in which LPUs (denoted by index into `conn_mat`) should be added
+    # to maximize added number of ports for each additional LPU:
+    ind_order = get_index_order.get_index_order(conn_mat)
+
+    # Make sure specified number of LPUs to partition over GPUs is at least as
+    # large as the number of GPUs and no larger than the list of allowed LPUs
+    # above:
+    assert args.lpus >= args.gpus
+    assert args.lpus <= len(lpu_list)
+
+    # Select which LPUs to partition over GPUs:
+    conn_mat_sub = conn_mat[ind_order[:args.lpus, None], ind_order[:args.lpus]]
+
+    print (args.gpus, args.lpus)+emulate(conn_mat_sub, args.scaling,
+                                         args.gpus, args.max_steps,
+                                         args.use_mps)