Skip to content

Commit

Permalink
Also include average step synchronization time in collected timing da…
Browse files Browse the repository at this point in the history
…ta, add scripts for running timing demos scaled over number of LPUs.
  • Loading branch information
lebedov committed Mar 15, 2015
1 parent f0ae794 commit a328961
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 33 deletions.
10 changes: 7 additions & 3 deletions examples/timing/run.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python

"""
Run timing test (non-GPU).
Run timing test (non-GPU) scaled over number of ports.
"""

import numpy as np
Expand All @@ -15,16 +15,20 @@

w = csv.writer(sys.stdout)
for spikes in np.linspace(500, 15000, 10, dtype=int):
average_step_sync_time_list = []
average_throughput_list = []
total_throughput_list = []
runtime_list = []
for i in xrange(2):
out = subprocess.check_output(['python', script_name,
'-u', '2', '-s', str(spikes), '-g', '0', '-m', '100'])
average_throughput, total_throughput, runtime = out.strip('()\n\"').split(', ')
average_step_sync_time, average_throughput, total_throughput, runtime = out.strip('()\n\"').split(', ')
average_step_sync_time_list.append(float(average_step_sync_time))
average_throughput_list.append(float(average_throughput))
total_throughput_list.append(float(total_throughput))
runtime_list.append(float(runtime))
w.writerow([spikes, np.average(average_throughput_list),
w.writerow([spikes,
np.average(average_step_sync_time_list),
np.average(average_throughput_list),
np.average(total_throughput_list),
np.average(runtime_list)])
10 changes: 7 additions & 3 deletions examples/timing/run_gpu.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python

"""
Run timing test (GPU).
Run timing test (GPU) scaled over number of ports.
"""

import numpy as np
Expand All @@ -15,16 +15,20 @@

w = csv.writer(sys.stdout)
for spikes in np.linspace(500, 15000, 10, dtype=int):
average_step_sync_time_list = []
average_throughput_list = []
total_throughput_list = []
runtime_list = []
for i in xrange(2):
out = subprocess.check_output(['python', script_name,
'-u', '2', '-s', str(spikes), '-g', '0', '-m', '100'])
average_throughput, total_throughput, runtime = out.strip('()\n\"').split(', ')
average_step_sync_time, average_throughput, total_throughput, runtime = out.strip('()\n\"').split(', ')
average_step_sync_time_list.append(float(average_step_sync_time))
average_throughput_list.append(float(average_throughput))
total_throughput_list.append(float(total_throughput))
runtime_list.append(float(runtime))
w.writerow([spikes, np.average(average_throughput_list),
w.writerow([spikes,
np.average(average_step_sync_time_list),
np.average(average_throughput_list),
np.average(total_throughput_list),
np.average(runtime_list)])
36 changes: 36 additions & 0 deletions examples/timing/run_lpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env python

"""
Run timing test (non-GPU) scaled over number of LPUs.
"""

import numpy as np

import csv
import re
import subprocess
import sys

script_name = 'timing_demo.py'

w = csv.writer(sys.stdout)
for lpus in xrange(2, 9):
average_step_sync_time_list = []
average_throughput_list = []
total_throughput_list = []
runtime_list = []
for i in xrange(3):
out = subprocess.check_output(['srun', '-n', '1', '-c', str(lpus),
'python', script_name,
'-u', str(lpus), '-s', '1000', '-g', '0',
'-m', '100'])
average_step_sync_time, average_throughput, total_throughput, runtime = out.strip('()\n\"').split(', ')
average_step_sync_time_list.append(float(average_step_sync_time))
average_throughput_list.append(float(average_throughput))
total_throughput_list.append(float(total_throughput))
runtime_list.append(float(runtime))
w.writerow([lpus,
np.average(average_step_sync_time_list),
np.average(average_throughput_list),
np.average(total_throughput_list),
np.average(runtime_list)])
36 changes: 36 additions & 0 deletions examples/timing/run_lpu_gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env python

"""
Run timing test (GPU) scaled over number of LPUs.
"""

import numpy as np

import csv
import re
import subprocess
import sys

script_name = 'timing_demo_gpu.py'

w = csv.writer(sys.stdout)
for lpus in xrange(2,8):
average_step_sync_time_list = []
average_throughput_list = []
total_throughput_list = []
runtime_list = []
for i in xrange(2):
out = subprocess.check_output(['srun', '-n', '1', '-c', str(lpus),
'--gres=gpu:%i' % lpus,
'python', script_name,
'-u', str(lpus), '-s', '1000', '-g', '0', '-m', '100'])
average_step_sync_time, average_throughput, total_throughput, runtime = out.strip('()\n\"').split(', ')
average_step_sync_time_list.append(float(average_step_sync_time))
average_throughput_list.append(float(average_throughput))
total_throughput_list.append(float(total_throughput))
runtime_list.append(float(runtime))
w.writerow([lpus,
np.average(average_step_sync_time_list),
np.average(average_throughput_list),
np.average(total_throughput_list),
np.average(runtime_list)])
20 changes: 10 additions & 10 deletions examples/timing/timing_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ def gen_sels(n_lpu, n_spike, n_gpot):
n_lpu : int
Number of LPUs. Must be at least 2.
n_spike : int
Total number of input and output spiking ports any
Total number of input and output spiking ports any
single LPU exposes to any other LPU. Each LPU will therefore
have 2*n_spike*(n_lpu-1) total spiking ports.
n_gpot : int
Total number of input and output graded potential ports any
Total number of input and output graded potential ports any
single LPU exposes to any other LPU. Each LPU will therefore
have 2*n_gpot*(n_lpu-1) total graded potential ports.
Expand All @@ -72,11 +72,11 @@ def gen_sels(n_lpu, n_spike, n_gpot):
Ports in pattern interfaces; the keys are tuples containing the two
module IDs connected by the pattern and the values are pairs of tuples
containing the respective selectors for all source ports, all
destination ports, all input ports connected to the first module,
all output ports connected to the first module, all graded potential ports
destination ports, all input ports connected to the first module,
all output ports connected to the first module, all graded potential ports
connected to the first module, all spiking ports connected to the first
module, all input ports connected to the second module,
all output ports connected to the second module, all graded potential ports
module, all input ports connected to the second module,
all output ports connected to the second module, all graded potential ports
connected to the second module, and all spiking ports connected to the second
module.
"""
Expand Down Expand Up @@ -155,11 +155,11 @@ def emulate(n_lpu, n_spike, n_gpot, steps):
Number of LPUs. Must be at least 2 and no greater than the number of
local GPUs.
n_spike : int
Total number of input and output spiking ports any
Total number of input and output spiking ports any
single LPU exposes to any other LPU. Each LPU will therefore
have 2*n_spike*(n_lpu-1) total spiking ports.
n_gpot : int
Total number of input and output graded potential ports any
Total number of input and output graded potential ports any
single LPU exposes to any other LPU. Each LPU will therefore
have 2*n_gpot*(n_lpu-1) total graded potential ports.
steps : int
Expand All @@ -182,7 +182,7 @@ def emulate(n_lpu, n_spike, n_gpot, steps):

# Generate selectors for configuring modules and patterns:
mod_sels, pat_sels = gen_sels(n_lpu, n_spike, n_gpot)

# Set up modules:
for i in xrange(n_lpu):
lpu_i = 'lpu%s' % i
Expand Down Expand Up @@ -215,7 +215,7 @@ def emulate(n_lpu, n_spike, n_gpot, steps):
man.start(steps=steps)
man.stop()
t = man.get_throughput()
return t[0], t[1], (time.time()-start)
return t[0], t[1], t[2], (time.time()-start)

if __name__ == '__main__':
num_lpus = 2
Expand Down
16 changes: 8 additions & 8 deletions examples/timing/timing_demo_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@ def gen_sels(n_lpu, n_spike, n_gpot):
n_lpu : int
Number of LPUs. Must be at least 2.
n_spike : int
Total number of input and output spiking ports any
Total number of input and output spiking ports any
single LPU exposes to any other LPU. Each LPU will therefore
have 2*n_spike*(n_lpu-1) total spiking ports.
n_gpot : int
Total number of input and output graded potential ports any
Total number of input and output graded potential ports any
single LPU exposes to any other LPU. Each LPU will therefore
have 2*n_gpot*(n_lpu-1) total graded potential ports.
Expand All @@ -85,11 +85,11 @@ def gen_sels(n_lpu, n_spike, n_gpot):
Ports in pattern interfaces; the keys are tuples containing the two
module IDs connected by the pattern and the values are pairs of tuples
containing the respective selectors for all source ports, all
destination ports, all input ports connected to the first module,
all output ports connected to the first module, all graded potential ports
destination ports, all input ports connected to the first module,
all output ports connected to the first module, all graded potential ports
connected to the first module, all spiking ports connected to the first
module, all input ports connected to the second module,
all output ports connected to the second module, all graded potential ports
module, all input ports connected to the second module,
all output ports connected to the second module, all graded potential ports
connected to the second module, and all spiking ports connected to the second
module.
"""
Expand Down Expand Up @@ -204,7 +204,7 @@ def emulate(n_lpu, n_spike, n_gpot, steps):
# Set up modules:
for i in xrange(n_lpu):
lpu_i = 'lpu%s' % i
sel, sel_in, sel_out, sel_gpot, sel_spike = mod_sels[lpu_i]
sel, sel_in, sel_out, sel_gpot, sel_spike = mod_sels[lpu_i]
m = MyModule(sel, sel_in, sel_out,
sel_gpot, sel_spike,
port_data=man.port_data, port_ctrl=man.port_ctrl,
Expand Down Expand Up @@ -233,7 +233,7 @@ def emulate(n_lpu, n_spike, n_gpot, steps):
man.start(steps=steps)
man.stop()
t = man.get_throughput()
return t[0], t[1], (time.time()-start)
return t[0], t[1], t[2], (time.time()-start)

if __name__ == '__main__':
num_lpus = 2
Expand Down
22 changes: 13 additions & 9 deletions neurokernel/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,8 +896,6 @@ def __init__(self, port_ctrl, port_time, ids=set()):
assert isinstance(ids, set)
self.ids = ids

self.timing_data = {}

# Queue for returning timing results to parent process:
self.queue = mp.Queue()

Expand All @@ -920,6 +918,7 @@ def run(self):
total_nbytes = 0.0
received_data = {}
self.average_throughput = 0.0
self.average_step_sync_time = 0.0
while True:
if sock_time.poll(10):

Expand All @@ -940,17 +939,19 @@ def run(self):

# The duration an execution is assumed to be the longest of
# the received intervals:
step_time = max([(d[1]-d[0]) for d in received_data[steps].values()])
step_sync_time = max([(d[1]-d[0]) for d in received_data[steps].values()])

# Obtain the total number of bytes received by all of the
# modules during the execution step:
step_nbytes = sum([d[2] for d in received_data[steps].values()])

total_time += step_time
total_time += step_sync_time
total_nbytes += step_nbytes

self.average_throughput = (self.average_throughput*counter+\
step_nbytes/step_time)/(counter+1)
step_nbytes/step_sync_time)/(counter+1)
self.average_step_sync_time = (self.average_step_sync_time*counter+\
step_sync_time)/(counter+1)

# Clear the data for the processed execution step so that
# that the received_data dict doesn't consume unnecessary memory:
Expand All @@ -967,13 +968,16 @@ def run(self):
self.total_throughput = total_nbytes/total_time
else:
self.total_throughput = 0.0
self.log_info('average per-step/total transmission throughputs: %s, %s bytes/s' % \
(self.average_throughput, self.total_throughput))
self.queue.put((self.average_throughput, self.total_throughput))
self.log_info('avg step sync time (s)/avg per-step throughput (b/s)' \
'/total transmission throughput (bs) : %s, %s, %s' % \
(self.average_step_sync_time, self.average_throughput,
self.total_throughput))
self.queue.put((self.average_step_sync_time, self.average_throughput,
self.total_throughput))

def get_throughput(self):
"""
Retrieve average per-step and total transmission throughputs.
Retrieve average step sync time, average per-step throughput, and total transmission throughput.
"""

return self.queue.get()
Expand Down

0 comments on commit a328961

Please sign in to comment.