Also include average step synchronization time in collected timing da…

…ta, add scripts for running timing demos scaled over number of LPUs.
neurokernel · Mar 15, 2015 · a328961 · a328961
1 parent f0ae794
commit a328961
Show file tree

Hide file tree

Showing 7 changed files with 117 additions and 33 deletions.
diff --git a/examples/timing/run.py b/examples/timing/run.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 """
-Run timing test (non-GPU).
+Run timing test (non-GPU) scaled over number of ports.
 """
 
 import numpy as np
@@ -15,16 +15,20 @@
 
 w = csv.writer(sys.stdout)
 for spikes in np.linspace(500, 15000, 10, dtype=int):
+    average_step_sync_time_list = []
     average_throughput_list = []
     total_throughput_list = []
     runtime_list = []
     for i in xrange(2):
         out = subprocess.check_output(['python', script_name,
                         '-u', '2', '-s', str(spikes), '-g', '0', '-m', '100'])
-        average_throughput, total_throughput, runtime = out.strip('()\n\"').split(', ')
+        average_step_sync_time, average_throughput, total_throughput, runtime = out.strip('()\n\"').split(', ')
+        average_step_sync_time_list.append(float(average_step_sync_time))
         average_throughput_list.append(float(average_throughput))
         total_throughput_list.append(float(total_throughput))
         runtime_list.append(float(runtime))
-    w.writerow([spikes, np.average(average_throughput_list),
+    w.writerow([spikes,
+                np.average(average_step_sync_time_list),
+                np.average(average_throughput_list),
                 np.average(total_throughput_list),
                 np.average(runtime_list)])
diff --git a/examples/timing/run_gpu.py b/examples/timing/run_gpu.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 """
-Run timing test (GPU).
+Run timing test (GPU) scaled over number of ports.
 """
 
 import numpy as np
@@ -15,16 +15,20 @@
 
 w = csv.writer(sys.stdout)
 for spikes in np.linspace(500, 15000, 10, dtype=int):
+    average_step_sync_time_list = []
     average_throughput_list = []
     total_throughput_list = []
     runtime_list = []
     for i in xrange(2):
         out = subprocess.check_output(['python', script_name,
                         '-u', '2', '-s', str(spikes), '-g', '0', '-m', '100'])
-        average_throughput, total_throughput, runtime = out.strip('()\n\"').split(', ')
+        average_step_sync_time, average_throughput, total_throughput, runtime = out.strip('()\n\"').split(', ')
+        average_step_sync_time_list.append(float(average_step_sync_time))
         average_throughput_list.append(float(average_throughput))
         total_throughput_list.append(float(total_throughput))
         runtime_list.append(float(runtime))
-    w.writerow([spikes, np.average(average_throughput_list),
+    w.writerow([spikes,
+                np.average(average_step_sync_time_list),
+                np.average(average_throughput_list),
                 np.average(total_throughput_list),
                 np.average(runtime_list)])
diff --git a/examples/timing/run_lpu.py b/examples/timing/run_lpu.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+"""
+Run timing test (non-GPU) scaled over number of LPUs.
+"""
+
+import numpy as np
+
+import csv
+import re
+import subprocess
+import sys
+
+script_name = 'timing_demo.py'
+
+w = csv.writer(sys.stdout)
+for lpus in xrange(2, 9):
+    average_step_sync_time_list = []
+    average_throughput_list = []
+    total_throughput_list = []
+    runtime_list = []
+    for i in xrange(3):
+        out = subprocess.check_output(['srun', '-n', '1', '-c', str(lpus),
+                                       'python', script_name,
+                                       '-u', str(lpus), '-s', '1000', '-g', '0',
+                                       '-m', '100'])
+        average_step_sync_time, average_throughput, total_throughput, runtime = out.strip('()\n\"').split(', ')
+        average_step_sync_time_list.append(float(average_step_sync_time))
+        average_throughput_list.append(float(average_throughput))
+        total_throughput_list.append(float(total_throughput))
+        runtime_list.append(float(runtime))
+    w.writerow([lpus,
+                np.average(average_step_sync_time_list),
+                np.average(average_throughput_list),
+                np.average(total_throughput_list),
+                np.average(runtime_list)])
diff --git a/examples/timing/run_lpu_gpu.py b/examples/timing/run_lpu_gpu.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+"""
+Run timing test (GPU) scaled over number of LPUs.
+"""
+
+import numpy as np
+
+import csv
+import re
+import subprocess
+import sys
+
+script_name = 'timing_demo_gpu.py'
+
+w = csv.writer(sys.stdout)
+for lpus in xrange(2,8):
+    average_step_sync_time_list = []
+    average_throughput_list = []
+    total_throughput_list = []
+    runtime_list = []
+    for i in xrange(2):
+        out = subprocess.check_output(['srun', '-n', '1', '-c', str(lpus),
+                                       '--gres=gpu:%i' % lpus,
+                                       'python', script_name,
+                                       '-u', str(lpus), '-s', '1000', '-g', '0', '-m', '100'])
+        average_step_sync_time, average_throughput, total_throughput, runtime = out.strip('()\n\"').split(', ')
+        average_step_sync_time_list.append(float(average_step_sync_time))
+        average_throughput_list.append(float(average_throughput))
+        total_throughput_list.append(float(total_throughput))
+        runtime_list.append(float(runtime))
+    w.writerow([lpus,
+                np.average(average_step_sync_time_list),
+                np.average(average_throughput_list),
+                np.average(total_throughput_list),
+                np.average(runtime_list)])
diff --git a/examples/timing/timing_demo.py b/examples/timing/timing_demo.py
@@ -54,11 +54,11 @@ def gen_sels(n_lpu, n_spike, n_gpot):
     n_lpu : int
         Number of LPUs. Must be at least 2.
     n_spike : int
-        Total number of input and output spiking ports any 
+        Total number of input and output spiking ports any
         single LPU exposes to any other LPU. Each LPU will therefore
         have 2*n_spike*(n_lpu-1) total spiking ports.
     n_gpot : int
-        Total number of input and output graded potential ports any 
+        Total number of input and output graded potential ports any
         single LPU exposes to any other LPU. Each LPU will therefore
         have 2*n_gpot*(n_lpu-1) total graded potential ports.
 
@@ -72,11 +72,11 @@ def gen_sels(n_lpu, n_spike, n_gpot):
         Ports in pattern interfaces; the keys are tuples containing the two
         module IDs connected by the pattern and the values are pairs of tuples
         containing the respective selectors for all source ports, all
-        destination ports, all input ports connected to the first module, 
-        all output ports connected to the first module, all graded potential ports 
+        destination ports, all input ports connected to the first module,
+        all output ports connected to the first module, all graded potential ports
         connected to the first module, all spiking ports connected to the first
-        module, all input ports connected to the second module, 
-        all output ports connected to the second  module, all graded potential ports 
+        module, all input ports connected to the second module,
+        all output ports connected to the second  module, all graded potential ports
         connected to the second module, and all spiking ports connected to the second
         module.
     """
@@ -155,11 +155,11 @@ def emulate(n_lpu, n_spike, n_gpot, steps):
         Number of LPUs. Must be at least 2 and no greater than the number of
         local GPUs.
     n_spike : int
-        Total number of input and output spiking ports any 
+        Total number of input and output spiking ports any
         single LPU exposes to any other LPU. Each LPU will therefore
         have 2*n_spike*(n_lpu-1) total spiking ports.
     n_gpot : int
-        Total number of input and output graded potential ports any 
+        Total number of input and output graded potential ports any
         single LPU exposes to any other LPU. Each LPU will therefore
         have 2*n_gpot*(n_lpu-1) total graded potential ports.
     steps : int
@@ -182,7 +182,7 @@ def emulate(n_lpu, n_spike, n_gpot, steps):
 
     # Generate selectors for configuring modules and patterns:
     mod_sels, pat_sels = gen_sels(n_lpu, n_spike, n_gpot)
-    
+
     # Set up modules:
     for i in xrange(n_lpu):
         lpu_i = 'lpu%s' % i
@@ -215,7 +215,7 @@ def emulate(n_lpu, n_spike, n_gpot, steps):
     man.start(steps=steps)
     man.stop()
     t = man.get_throughput()
-    return t[0], t[1], (time.time()-start)
+    return t[0], t[1], t[2], (time.time()-start)
 
 if __name__ == '__main__':
     num_lpus = 2

diff --git a/examples/timing/timing_demo_gpu.py b/examples/timing/timing_demo_gpu.py
@@ -67,11 +67,11 @@ def gen_sels(n_lpu, n_spike, n_gpot):
     n_lpu : int
         Number of LPUs. Must be at least 2.
     n_spike : int
-        Total number of input and output spiking ports any 
+        Total number of input and output spiking ports any
         single LPU exposes to any other LPU. Each LPU will therefore
         have 2*n_spike*(n_lpu-1) total spiking ports.
     n_gpot : int
-        Total number of input and output graded potential ports any 
+        Total number of input and output graded potential ports any
         single LPU exposes to any other LPU. Each LPU will therefore
         have 2*n_gpot*(n_lpu-1) total graded potential ports.
 
@@ -85,11 +85,11 @@ def gen_sels(n_lpu, n_spike, n_gpot):
         Ports in pattern interfaces; the keys are tuples containing the two
         module IDs connected by the pattern and the values are pairs of tuples
         containing the respective selectors for all source ports, all
-        destination ports, all input ports connected to the first module, 
-        all output ports connected to the first module, all graded potential ports 
+        destination ports, all input ports connected to the first module,
+        all output ports connected to the first module, all graded potential ports
         connected to the first module, all spiking ports connected to the first
-        module, all input ports connected to the second module, 
-        all output ports connected to the second  module, all graded potential ports 
+        module, all input ports connected to the second module,
+        all output ports connected to the second  module, all graded potential ports
         connected to the second module, and all spiking ports connected to the second
         module.
     """
@@ -204,7 +204,7 @@ def emulate(n_lpu, n_spike, n_gpot, steps):
     # Set up modules:
     for i in xrange(n_lpu):
         lpu_i = 'lpu%s' % i
-        sel, sel_in, sel_out, sel_gpot, sel_spike = mod_sels[lpu_i]        
+        sel, sel_in, sel_out, sel_gpot, sel_spike = mod_sels[lpu_i]
         m = MyModule(sel, sel_in, sel_out,
                      sel_gpot, sel_spike,
                      port_data=man.port_data, port_ctrl=man.port_ctrl,
@@ -233,7 +233,7 @@ def emulate(n_lpu, n_spike, n_gpot, steps):
     man.start(steps=steps)
     man.stop()
     t = man.get_throughput()
-    return t[0], t[1], (time.time()-start)
+    return t[0], t[1], t[2], (time.time()-start)
 
 if __name__ == '__main__':
     num_lpus = 2

diff --git a/neurokernel/base.py b/neurokernel/base.py
@@ -896,8 +896,6 @@ def __init__(self, port_ctrl, port_time, ids=set()):
         assert isinstance(ids, set)
         self.ids = ids
 
-        self.timing_data = {}
-
         # Queue for returning timing results to parent process:
         self.queue = mp.Queue()
 
@@ -920,6 +918,7 @@ def run(self):
         total_nbytes = 0.0
         received_data = {}
         self.average_throughput = 0.0
+        self.average_step_sync_time = 0.0
         while True:
             if sock_time.poll(10):
 
@@ -940,17 +939,19 @@ def run(self):
 
                     # The duration an execution is assumed to be the longest of
                     # the received intervals:
-                    step_time = max([(d[1]-d[0]) for d in received_data[steps].values()])
+                    step_sync_time = max([(d[1]-d[0]) for d in received_data[steps].values()])
 
                     # Obtain the total number of bytes received by all of the
                     # modules during the execution step:
                     step_nbytes = sum([d[2] for d in received_data[steps].values()])
 
-                    total_time += step_time
+                    total_time += step_sync_time
                     total_nbytes += step_nbytes
 
                     self.average_throughput = (self.average_throughput*counter+\
-                                               step_nbytes/step_time)/(counter+1)
+                                               step_nbytes/step_sync_time)/(counter+1)
+                    self.average_step_sync_time = (self.average_step_sync_time*counter+\
+                                                   step_sync_time)/(counter+1)
 
                     # Clear the data for the processed execution step so that
                     # that the received_data dict doesn't consume unnecessary memory:
@@ -967,13 +968,16 @@ def run(self):
             self.total_throughput = total_nbytes/total_time
         else:
             self.total_throughput = 0.0
-        self.log_info('average per-step/total transmission throughputs: %s, %s bytes/s' % \
-                      (self.average_throughput, self.total_throughput))
-        self.queue.put((self.average_throughput, self.total_throughput))
+        self.log_info('avg step sync time (s)/avg per-step throughput (b/s)' \
+                      '/total transmission throughput (bs) : %s, %s, %s' % \
+                      (self.average_step_sync_time, self.average_throughput, 
+                       self.total_throughput))
+        self.queue.put((self.average_step_sync_time, self.average_throughput,
+                        self.total_throughput))
 
     def get_throughput(self):
         """
-        Retrieve average per-step and total transmission throughputs.
+        Retrieve average step sync time, average per-step throughput, and total transmission throughput.
         """
 
         return self.queue.get()