Permalink
Browse files

SGE tweaks

  • Loading branch information...
1 parent 5bb60ef commit 60a193b21951be6de757ceb7c840d9ef42ece4f5 @minrk committed Jun 9, 2011
@@ -911,7 +911,7 @@ def start(self, n, cluster_dir):
class SGELauncher(PBSLauncher):
"""Sun GridEngine is a PBS clone with slightly different syntax"""
- job_array_regexp = CUnicode('#$$\W+-t\W+[\w\d\-\$]+')
+ job_array_regexp = CUnicode('#\$\$\W+\-t')
job_array_template = CUnicode('#$$ -t 1-$n')
queue_regexp = CUnicode('#$$\W+-q\W+\$?\w+')
queue_template = CUnicode('#$$ -q $queue')
@@ -165,19 +165,19 @@ def prepare_communication (self):
size1 = self.subd_hi_ix[1]-self.subd_lo_ix[1]+1
if self.lower_neighbors[0]>=0:
- self.in_lower_buffers[0] = zeros(size1, float)
- self.out_lower_buffers[0] = zeros(size1, float)
+ self.in_lower_buffers[0] = zeros(size1)
+ self.out_lower_buffers[0] = zeros(size1)
if self.upper_neighbors[0]>=0:
- self.in_upper_buffers[0] = zeros(size1, float)
- self.out_upper_buffers[0] = zeros(size1, float)
+ self.in_upper_buffers[0] = zeros(size1)
+ self.out_upper_buffers[0] = zeros(size1)
size0 = self.subd_hi_ix[0]-self.subd_lo_ix[0]+1
if self.lower_neighbors[1]>=0:
- self.in_lower_buffers[1] = zeros(size0, float)
- self.out_lower_buffers[1] = zeros(size0, float)
+ self.in_lower_buffers[1] = zeros(size0)
+ self.out_lower_buffers[1] = zeros(size0)
if self.upper_neighbors[1]>=0:
- self.in_upper_buffers[1] = zeros(size0, float)
- self.out_upper_buffers[1] = zeros(size0, float)
+ self.in_upper_buffers[1] = zeros(size0)
+ self.out_upper_buffers[1] = zeros(size0)
def get_num_loc_cells(self):
return [self.subd_hi_ix[0]-self.subd_lo_ix[0],\
@@ -220,7 +220,7 @@ def update_internal_boundary (self, solution_array):
else:
for i in xrange(0,loc_ny+1):
self.out_lower_buffers[0][i] = solution_array[1,i]
- mpi.Isend(self.out_lower_buffers[0], lower_x_neigh)
+ mpi.Send(self.out_lower_buffers[0], lower_x_neigh)
if upper_x_neigh>-1:
mpi.Recv(self.in_upper_buffers[0], upper_x_neigh)
@@ -231,7 +231,7 @@ def update_internal_boundary (self, solution_array):
for i in xrange(0,loc_ny+1):
solution_array[loc_nx,i] = self.in_upper_buffers[0][i]
self.out_upper_buffers[0][i] = solution_array[loc_nx-1,i]
- mpi.Isend(self.out_upper_buffers[0], upper_x_neigh)
+ mpi.Send(self.out_upper_buffers[0], upper_x_neigh)
if lower_x_neigh>-1:
mpi.Recv(self.in_lower_buffers[0], lower_x_neigh)
@@ -248,7 +248,7 @@ def update_internal_boundary (self, solution_array):
else:
for i in xrange(0,loc_nx+1):
self.out_lower_buffers[1][i] = solution_array[i,1]
- mpi.Isend(self.out_lower_buffers[1], lower_y_neigh)
+ mpi.Send(self.out_lower_buffers[1], lower_y_neigh)
if upper_y_neigh>-1:
mpi.Recv(self.in_upper_buffers[1], upper_y_neigh)
@@ -259,7 +259,7 @@ def update_internal_boundary (self, solution_array):
for i in xrange(0,loc_nx+1):
solution_array[i,loc_ny] = self.in_upper_buffers[1][i]
self.out_upper_buffers[1][i] = solution_array[i,loc_ny-1]
- mpi.Isend(self.out_upper_buffers[1], upper_y_neigh)
+ mpi.Send(self.out_upper_buffers[1], upper_y_neigh)
if lower_y_neigh>-1:
mpi.Recv(self.in_lower_buffers[1], lower_y_neigh)
@@ -153,6 +153,7 @@ def bc(x,y,t):
view.execute('mpi.barrier()')
# setup remote solvers
view.apply_sync(setup_solver, I,f,c,bc,Lx,Ly,partitioner=Reference('partitioner'), dt=0,implementation=impl)
+ # raise SystemExit(1)
# lambda for calling solver.solve:
_solve = lambda *args, **kwargs: solver.solve(*args, **kwargs)
@@ -161,7 +162,7 @@ def bc(x,y,t):
impl['inner'] = 'scalar'
# run first with element-wise Python operations for each cell
t0 = time.time()
- ar = view.apply_async(_solve, tstop, dt=0, verbose=True, final_test=final_test, user_action=user_action)
+ ar = view.apply_async(_solve, tstop, dt=0, final_test=final_test, user_action=user_action)
if final_test:
# this sum is performed element-wise as results finish
s = sum(ar)
@@ -179,15 +180,21 @@ def bc(x,y,t):
# run again with numpy vectorized inner-implementation
t0 = time.time()
- ar = view.apply_async(_solve, tstop, dt=0, verbose=True, final_test=final_test)#, user_action=wave_saver)
+ ar = view.apply_async(_solve, tstop, dt=0, final_test=final_test)#, user_action=wave_saver)
+ # this sum is performed element-wise as results finish
+ # the L2 norm (RMS) of the result:
+ s = sum([ r[-1] for r in ar ])
if final_test:
- # this sum is performed element-wise as results finish
- s = sum(ar)
- # the L2 norm (RMS) of the result:
norm = sqrt(s/num_cells)
else:
norm = -1
t1 = time.time()
+ # extract times:
+ times = [ r[:-1] for r in ar ]
+ walltimes, usertimes, systimes = zip(*times)
+ print walltimes
+ print usertimes
+ print systimes
print 'vector inner-version, Wtime=%g, norm=%g'%(t1-t0, norm)
# if ns.save is True, then u_hist stores the history of u as a list
@@ -52,6 +52,9 @@ def wave_saver(u, x, y, t):
u_hist.append(1.0*u)
+def print_times(name, times):
+ mean = 1.0*sum(times) / len(times)
+ print "%s time: mean: %.3f, min: %.3f, max: %.3f"%(name, mean, min(times), max(times))
# main program:
if __name__ == '__main__':
@@ -106,7 +109,7 @@ def wave_saver(u, x, y, t):
num_procs = len(rc.ids)
if partition is None:
- partition = [num_procs,1]
+ partition = [1,num_procs]
else:
num_procs = min(num_procs, partition[0]*partition[1])
@@ -170,16 +173,22 @@ def bc(x,y,t):
# run first with element-wise Python operations for each cell
t0 = time.time()
- ar = view.apply_async(_solve, tstop, dt=0, verbose=True, final_test=final_test, user_action=user_action)
+ ar = view.apply_async(_solve, tstop, dt=0, verbose=False, final_test=final_test, user_action=user_action)
+ # this sum is performed element-wise as results finish
+ # the L2 norm (RMS) of the result:
+ s = sum([ r[-1] for r in ar ])
if final_test:
- # this sum is performed element-wise as results finish
- s = sum(ar)
- # the L2 norm (RMS) of the result:
norm = sqrt(s/num_cells)
else:
norm = -1
t1 = time.time()
+ # extract times:
+ times = [ r[:-1] for r in ar ]
+ walltimes, usertimes, systimes = zip(*times)
print 'scalar inner-version, Wtime=%g, norm=%g'%(t1-t0, norm)
+ print_times('wall', walltimes)
+ print_times('user', usertimes)
+ print_times('sys', systimes)
# run again with faster numpy-vectorized inner implementation:
impl['inner'] = 'vectorized'
@@ -188,16 +197,22 @@ def bc(x,y,t):
t0 = time.time()
- ar = view.apply_async(_solve, tstop, dt=0, verbose=True, final_test=final_test)#, user_action=wave_saver)
+ ar = view.apply_async(_solve, tstop, dt=0, verbose=False, final_test=final_test)#, user_action=wave_saver)
+ # this sum is performed element-wise as results finish
+ # the L2 norm (RMS) of the result:
+ s = sum([ r[-1] for r in ar ])
if final_test:
- # this sum is performed element-wise as results finish
- s = sum(ar)
- # the L2 norm (RMS) of the result:
norm = sqrt(s/num_cells)
else:
norm = -1
t1 = time.time()
+ # extract times:
+ times = [ r[:-1] for r in ar ]
+ walltimes, usertimes, systimes = zip(*times)
print 'vector inner-version, Wtime=%g, norm=%g'%(t1-t0, norm)
+ print_times('wall', walltimes)
+ print_times('user', usertimes)
+ print_times('sys', systimes)
# if ns.save is True, then u_hist stores the history of u as a list
# If the partion scheme is Nx1, then u can be reconstructed via 'gather':
@@ -14,6 +14,8 @@
from numpy import exp, zeros, newaxis, sqrt, arange
+from IPython.utils.timing import clock2
+
def iseq(start=0, stop=None, inc=1):
"""
Generate integers from start to (and including!) stop,
@@ -94,7 +96,7 @@ def __init__(self, I, f, c, bc, Lx, Ly, partitioner=None, dt=-1,
dt = (1/float(c))*(1/sqrt(1/dx**2 + 1/dy**2)) # max time step
Cx2 = (c*dt/dx)**2; Cy2 = (c*dt/dy)**2; dt2 = dt**2 # help variables
- u = zeros((nx+1,ny+1)) # solution array
+ u = zeros((nx+1,ny+1), float) # solution array
u_1 = u.copy() # solution at t-dt
u_2 = u.copy() # solution at t-2*dt
@@ -165,7 +167,10 @@ def __init__(self, I, f, c, bc, Lx, Ly, partitioner=None, dt=-1,
def solve(self, tstop, dt=-1, user_action=None, verbose=False, final_test=False):
- t0=time.time()
+ # wall tic
+ wtic=time.time()
+ # user/sys tic
+ utic, stic = clock2()
f=self.f
c=self.c
bc=self.bc
@@ -250,18 +255,24 @@ def solve(self, tstop, dt=-1, user_action=None, verbose=False, final_test=False)
# update data structures for next step
u_2, u_1, u = u_1, u, u_2
- t1 = time.time()
+ # wall toc
+ wtoc=time.time()
+ # user/sys toc
+ utoc, stoc = clock2()
+
print 'my_id=%2d, dt=%g, %s version, slice_copy=%s, net Wtime=%g'\
%(partitioner.my_id,dt,implementation['inner'],\
- partitioner.slice_copy,t1-t0)
+ partitioner.slice_copy,wtoc-wtic)
# save the us
self.us = u,u_1,u_2
# check final results; compute discrete L2-norm of the solution
if final_test:
- loc_res = 0.0
- for i in iseq(start=1, stop=nx-1):
- for j in iseq(start=1, stop=ny-1):
- loc_res += u_1[i,j]**2
- return loc_res
- return dt
+ loc_res = (u_1[1:-1,1:-1]**2).sum()
+ # for i in iseq(start=1, stop=nx-1):
+ # for j in iseq(start=1, stop=ny-1):
+ # loc_res += u_1[i,j]**2
+ else:
+ loc_res = -1
+ # return
+ return wtoc-wtic, utoc-utic, stoc-stic, loc_res

0 comments on commit 60a193b

Please sign in to comment.