Time execution of inside of Module.run() method excluding port/port i…

…ndex extraction and network setup.
neurokernel · Mar 19, 2015 · 08594b2 · 08594b2
1 parent 3c7bbc9
commit 08594b2
Show file tree

Hide file tree

Showing 8 changed files with 101 additions and 57 deletions.
diff --git a/examples/timing/run.py b/examples/timing/run.py
@@ -20,19 +20,22 @@
     total_throughput_list = []
     runtime_all_list = []
     runtime_main_list = []
+    runtime_loop_list = []
     for i in xrange(2):
         out = subprocess.check_output(['python', script_name,
                         '-u', '2', '-s', str(spikes), '-g', '0', '-m', '50'])
         average_step_sync_time, average_throughput, total_throughput, \
-            runtime_all, runtime_main = out.strip('()\n\"').split(', ')
+            runtime_all, runtime_main, runtime_loop = out.strip('()\n\"').split(', ')
         average_step_sync_time_list.append(float(average_step_sync_time))
         average_throughput_list.append(float(average_throughput))
         total_throughput_list.append(float(total_throughput))
         runtime_all_list.append(float(runtime_all))
         runtime_main_list.append(float(runtime_main))
+        runtime_loop_list.append(float(runtime_loop))
     w.writerow([spikes,
                 np.average(average_step_sync_time_list),
                 np.average(average_throughput_list),
                 np.average(total_throughput_list),
                 np.average(runtime_all_list),
-                np.average(runtime_main_list)])
+                np.average(runtime_main_list),
+                np.average(runtime_loop_list)])
diff --git a/examples/timing/run_gpu.py b/examples/timing/run_gpu.py
@@ -20,19 +20,22 @@
     total_throughput_list = []
     runtime_all_list = []
     runtime_main_list = []
+    runtime_loop_list = []
     for i in xrange(2):
         out = subprocess.check_output(['python', script_name,
                         '-u', '2', '-s', str(spikes), '-g', '0', '-m', '50'])
         average_step_sync_time, average_throughput, total_throughput, \
-            runtime_all, runtime_main = out.strip('()\n\"').split(', ')
+            runtime_all, runtime_main, runtime_loop = out.strip('()\n\"').split(', ')
         average_step_sync_time_list.append(float(average_step_sync_time))
         average_throughput_list.append(float(average_throughput))
         total_throughput_list.append(float(total_throughput))
         runtime_all_list.append(float(runtime_all))
         runtime_main_list.append(float(runtime_main))
+        runtime_loop_list.append(float(runtime_loop))
     w.writerow([spikes,
                 np.average(average_step_sync_time_list),
                 np.average(average_throughput_list),
                 np.average(total_throughput_list),
                 np.average(runtime_all_list),
-                np.average(runtime_main_list)])
+                np.average(runtime_main_list),
+                np.average(runtime_loop_list)])
diff --git a/examples/timing/run_lpu.py b/examples/timing/run_lpu.py
@@ -20,21 +20,24 @@
     total_throughput_list = []
     runtime_all_list = []
     runtime_main_list = []
+    runtime_loop_list = []
     for i in xrange(2):
         out = subprocess.check_output(['srun', '-n', '1', '-c', str(lpus),
                                        'python', script_name,
                                        '-u', str(lpus), '-s', '1000', '-g', '0',
                                        '-m', '50'])
         average_step_sync_time, average_throughput, total_throughput, \
-            runtime_all, runtime_main  = out.strip('()\n\"').split(', ')
+            runtime_all, runtime_main, runtime_loop  = out.strip('()\n\"').split(', ')
         average_step_sync_time_list.append(float(average_step_sync_time))
         average_throughput_list.append(float(average_throughput))
         total_throughput_list.append(float(total_throughput))
         runtime_all_list.append(float(runtime_all))
         runtime_main_list.append(float(runtime_main))
+        runtime_loop_list.append(float(runtime_loop))
     w.writerow([lpus,
                 np.average(average_step_sync_time_list),
                 np.average(average_throughput_list),
                 np.average(total_throughput_list),
                 np.average(runtime_all_list),
-                np.average(runtime_main_list)])
+                np.average(runtime_main_list),
+                np.average(runtime_loop_list)])
diff --git a/examples/timing/run_lpu_gpu.py b/examples/timing/run_lpu_gpu.py
@@ -14,27 +14,31 @@
 script_name = 'timing_demo_gpu.py'
 
 w = csv.writer(sys.stdout)
-for lpus in [8]:
+for lpus in xrange(2, 9):
     average_step_sync_time_list = []
     average_throughput_list = []
     total_throughput_list = []
     runtime_all_list = []
     runtime_main_list = []
+    runtime_loop_list = []
     for i in xrange(2):
         out = subprocess.check_output(['srun', '-n', '1', '-c', str(lpus),
                                        '--gres=gpu:%i' % lpus,
                                        'python', script_name,
-                                       '-u', str(lpus), '-s', '1000', '-g', '0', '-m', '50'])
+                                       '-u', str(lpus), '-s', '1000', '-g', '0',
+                                       '-m', '50'])
         average_step_sync_time, average_throughput, total_throughput, \
-            runtime_all, runtime_main = out.strip('()\n\"').split(', ')
+            runtime_all, runtime_main, runtime_loop = out.strip('()\n\"').split(', ')
         average_step_sync_time_list.append(float(average_step_sync_time))
         average_throughput_list.append(float(average_throughput))
         total_throughput_list.append(float(total_throughput))
         runtime_all_list.append(float(runtime_all))
         runtime_main_list.append(float(runtime_main))
+        runtime_loop_list.append(float(runtime_loop))
     w.writerow([lpus,
                 np.average(average_step_sync_time_list),
                 np.average(average_throughput_list),
                 np.average(total_throughput_list),
                 np.average(runtime_all_list),
-                np.average(runtime_main_list)])
+                np.average(runtime_main_list),
+                np.average(runtime_loop_list)])
diff --git a/examples/timing/timing_demo.py b/examples/timing/timing_demo.py
@@ -219,7 +219,7 @@ def emulate(n_lpu, n_spike, n_gpot, steps):
     man.stop()
     stop_main = time.time()
     t = man.get_throughput()
-    return t[0], t[1], t[2], (time.time()-start_all), (stop_main-start_main)
+    return t[0], t[1], t[2], (time.time()-start_all), (stop_main-start_main), t[3]
 
 if __name__ == '__main__':
     num_lpus = 2

diff --git a/examples/timing/timing_demo_gpu.py b/examples/timing/timing_demo_gpu.py
@@ -236,7 +236,7 @@ def emulate(n_lpu, n_spike, n_gpot, steps):
     man.stop()
     stop_main = time.time()
     t = man.get_throughput()
-    return t[0], t[1], t[2], (time.time()-start_all), (stop_main-start_main)
+    return t[0], t[1], t[2], (time.time()-start_all), (stop_main-start_main), t[3]
 
 if __name__ == '__main__':
     num_lpus = 2

diff --git a/neurokernel/base.py b/neurokernel/base.py
@@ -539,8 +539,8 @@ def _sync(self):
             stop = time.time()
             if self.time_sync:
                 self.log_info('sent timing data to master')
-                self.sock_time.send(msgpack.packb((self.id, self.steps,
-                                                   start, stop, nbytes)))
+                self.sock_time.send(msgpack.packb((self.id, self.steps, 'sync',
+                                                   (start, stop, nbytes))))
 
     def pre_run(self, *args, **kwargs):
         """
@@ -655,6 +655,10 @@ def run(self):
 
             self.running = True
             self.steps = 0
+            if self.time_sync:
+                self.sock_time.send(msgpack.packb((self.id, self.steps, 'start',
+                                                   time.time())))
+                self.log_info('sent start time to master')
             while self.steps < self.max_steps:
                 self.log_info('execution step: %s/%s' % (self.steps, self.max_steps))
 
@@ -698,6 +702,10 @@ def run(self):
                     break
 
                 self.steps += 1
+            if self.time_sync:
+                self.sock_time.send(msgpack.packb((self.id, self.steps, 'stop',
+                                                   time.time())))
+                self.log_info('sent stop time to master')
             self.log_info('maximum number of steps reached')
 
             # Perform any post-emulation operations:
@@ -920,70 +928,84 @@ def run(self):
         self.log_info('time port initialized')
         self.running = True
         counter = 0
-        total_time = 0.0
-        total_nbytes = 0.0
+        total_sync_time = 0.0
+        total_sync_nbytes = 0.0
         received_data = {}
+        self.start_time = 0.0
+        self.stop_time = 0.0
         self.average_throughput = 0.0
         self.average_step_sync_time = 0.0
         while True:
             if sock_time.poll(10):
 
                 # Receive timing data:
                 id, data = sock_time.recv_multipart()
-                id, steps, start, stop, nbytes = msgpack.unpackb(data)
-                self.log_info('time data: %s' % \
-                              str(msgpack.unpackb(data)))
-
-                # Collect timing data for each execution step:
-                if steps not in received_data:
-                    received_data[steps] = {}                    
-                received_data[steps][id] = (start, stop, nbytes)
-
-                # After adding the latest timing data for a specific step, check
-                # whether data from all modules has arrived for that step:
-                if set(received_data[steps].keys()) == self.ids:
-
-                    # The duration an execution is assumed to be the longest of
-                    # the received intervals:
-                    step_sync_time = max([(d[1]-d[0]) for d in received_data[steps].values()])
-
-                    # Obtain the total number of bytes received by all of the
-                    # modules during the execution step:
-                    step_nbytes = sum([d[2] for d in received_data[steps].values()])
+                id, steps, time_type, data = msgpack.unpackb(data)
+                self.log_info('time data: %s, %s, %s, %s' % (id, steps,
+                                                             time_type, str(data)))
+
+                # The time_type may be 'start' (emulation run loop start time), 
+                # 'stop' (emulation loop stop time), or 'sync' (emulation sync
+                # time data):
+                if time_type == 'start':
+                    self.start_time = data
+                elif time_type == 'stop':
+                    self.stop_time = data
+                elif time_type == 'sync':
+                    start, stop, nbytes = data
+
+                    # Collect timing data for each execution step:
+                    if steps not in received_data:
+                        received_data[steps] = {}                    
+                    received_data[steps][id] = (start, stop, nbytes)
+
+                    # After adding the latest timing data for a specific step, check
+                    # whether data from all modules has arrived for that step:
+                    if set(received_data[steps].keys()) == self.ids:
+
+                        # The duration an execution is assumed to be the longest of
+                        # the received intervals:
+                        step_sync_time = max([(d[1]-d[0]) for d in received_data[steps].values()])
+
+                        # Obtain the total number of bytes received by all of the
+                        # modules during the execution step:
+                        step_nbytes = sum([d[2] for d in received_data[steps].values()])
+
+                        total_sync_time += step_sync_time
+                        total_sync_nbytes += step_nbytes
+
+                        self.average_throughput = (self.average_throughput*counter+\
+                                                   step_nbytes/step_sync_time)/(counter+1)
+                        self.average_step_sync_time = (self.average_step_sync_time*counter+\
+                                                       step_sync_time)/(counter+1)
+
+                        # Clear the data for the processed execution step so that
+                        # that the received_data dict doesn't consume unnecessary memory:
+                        del received_data[steps]
+
+                        counter += 1
 
-                    total_time += step_sync_time
-                    total_nbytes += step_nbytes
-
-                    self.average_throughput = (self.average_throughput*counter+\
-                                               step_nbytes/step_sync_time)/(counter+1)
-                    self.average_step_sync_time = (self.average_step_sync_time*counter+\
-                                                   step_sync_time)/(counter+1)
-
-                    # Clear the data for the processed execution step so that
-                    # that the received_data dict doesn't consume unnecessary memory:
-                    del received_data[steps]
-
-                    counter += 1
-
             if not self.running:
                 self.log_info('stopping run loop')
                 break
         self.log_info('done')
 
-        if total_time > 0.0:
-            self.total_throughput = total_nbytes/total_time
+        if total_sync_time > 0.0:
+            self.total_throughput = total_sync_nbytes/total_sync_time
         else:
             self.total_throughput = 0.0
-        self.log_info('avg step sync time (s)/avg per-step throughput (b/s)' \
-                      '/total transmission throughput (bs) : %s, %s, %s' % \
+        self.log_info('avg step sync time/avg per-step throughput' \
+                      '/total transm throughput/run loop duration:' \
+                      '%s, %s, %s, %s' % \
                       (self.average_step_sync_time, self.average_throughput, 
-                       self.total_throughput))
+                       self.total_throughput, self.stop_time-self.start_time))
         self.queue.put((self.average_step_sync_time, self.average_throughput,
-                        self.total_throughput))
+                        self.total_throughput, self.stop_time-self.start_time))
 
     def get_throughput(self):
         """
-        Retrieve average step sync time, average per-step throughput, and total transmission throughput.
+        Retrieve average step sync time, average per-step throughput, total
+        transmission throughput, and run loop duration.
         """
 
         return self.queue.get()

diff --git a/neurokernel/core.py b/neurokernel/core.py
@@ -9,6 +9,7 @@
 import time
 
 import bidict
+import msgpack
 import numpy as np
 import twiggy
 
@@ -423,6 +424,10 @@ def run(self):
 
             self.running = True
             self.steps = 0
+            if self.time_sync:
+                self.sock_time.send(msgpack.packb((self.id, self.steps, 'start',
+                                                   time.time())))
+                self.log_info('sent start time to master')
             while self.steps < self.max_steps:
                 self.log_info('execution step: %s/%s' % (self.steps, self.max_steps))
 
@@ -470,6 +475,10 @@ def run(self):
                     break
 
                 self.steps += 1
+            if self.time_sync:
+                self.sock_time.send(msgpack.packb((self.id, self.steps, 'stop',
+                                                   time.time())))
+                self.log_info('sent stop time to master')
             self.log_info('maximum number of steps reached')
 
             # Perform any post-emulation operations: