In [None]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

plt.rcParams.update({'font.size': 22})

Scaling data measured with UNIX's `time`

In [None]:
# Number of cpus
cpus = 2**np.linspace(0,6,7);
# Clustered set of 1e6 particles (theta=0.7)
css = np.array([83.74,63.63,56.56,52.52,49.51,50.82,57.39])
# Uniformly distributed set of 1e6 particles (theta=0.4)
uss = np.array([197.2,115.6,72.73,58.50,48.09,49.77])

In [None]:
# Speedups
u_spdp = uss[0]/uss
c_spdp = css[0]/css

In [None]:
# Problem size for weak scaling
nparts = np.array([10,30,100,300,1000,3000,10000, 30000, 100000, 300000,  1000000])
# Total time for weak scaling (4CPUS)
uws4=np.array([1.919,1.778,1.779,1.806,1.757,1.887,2.234,3.019,7.426,20.008,67.454])
# Total time for weak scaling (8CPUS)
uws8=np.array([2.366,2.082,2.149,2.295,3.175,2.542,2.528,3.357,6.703,17.382,58.501])
# Total time for weak scaling (16CPUS)
uws16=np.array([2.163,2.186,2.160,2.158,2.126,2.378,2.517,3.491,6.106,14.622,48.651])
# Time per CPU for weak scaling
pt_uws4 = uws4/4
pt_uws8 = uws8/8
pt_uws16 = uws16/16

In [None]:
plt.rcParams['figure.figsize'] = 2*np.array([4, 3])
strong = plt.figure()
plt.semilogx(cpus[:-1],css[:-1],label='Clustered')
plt.semilogx(cpus[:-1],uss,label='Uniform')
plt.xticks(cpus,labels=cpus.astype(str))
plt.xlabel('CPUs')
plt.ylabel('Time (s)')
plt.xlim((cpus[0],cpus[-2]))
plt.grid(True,which='both',axis='both')
plt.title('1e6 particles, 10 iterations')
plt.legend()
plt.tight_layout()
plt.savefig('strong_time.png',dpi=600, transparent=True)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = 2*np.array([4, 3])
strong = plt.figure()
plt.loglog(nparts,pt_uws4,label='Uniform/4CPUs')
plt.loglog(nparts,pt_uws8,label='Uniform/8CPUs')
plt.loglog(nparts,pt_uws16,label='Uniform/16CPUs')
plt.xlim((1e1,1e6))
plt.grid(True,axis='both',which='both')
plt.xlabel('Particles')
plt.ylabel('Time (per CPU)')
plt.title('10 iterations')
plt.legend()
plt.tight_layout()
plt.savefig('weak_time.png',dpi=600, transparent=True)
plt.show()

In [None]:
def amdahl(n,p):
    return 1/(1-p+p/n)

plt.rcParams['figure.figsize'] = 2*np.array([4, 3])
strong = plt.figure()
plt.loglog(cpus[:-1],css[0]/css[:-1],label='Clusters')
plt.loglog(cpus[:-1],uss[0]/uss,label='Uniform')
plt.loglog(cpus[:-1],amdahl(cpus[:-1],1.),linestyle='--',label='Ideal')
plt.loglog(cpus[:-1],amdahl(cpus[:-1],.8),linestyle='--',label='80%')
plt.loglog(cpus[:-1],amdahl(cpus[:-1],.4),linestyle='--',label='40%')
plt.xticks(cpus,labels=cpus.astype(str))
plt.xlabel('CPUs')
plt.ylabel('Speed Up')
plt.xlim((cpus[0],cpus[-2]))
plt.ylim((1,10))
plt.legend()
plt.grid(True,axis='both',which='both')
plt.tight_layout()
plt.savefig('strong_speedup.png',dpi=600, transparent=True)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = 2*np.array([4, 3])
weak = plt.figure()
plt.loglog(nparts,(pt_uws4[0]/nparts[0])/(pt_uws4/nparts),label='4CPUS/Uniform')
plt.loglog(nparts,(pt_uws8[0]/nparts[0])/(pt_uws8/nparts),label='8CPUS/Uniform')
plt.loglog(nparts,(pt_uws16[0]/nparts[0])/(pt_uws16/nparts),label='16CPUS/Uniform')
plt.loglog(nparts,nparts/10,linestyle='--')
plt.xlabel('Size (particles)')
plt.ylabel('Speed up (per thread, particle)')
plt.grid(True,axis='both',which='major')
plt.xlim((10,1e6))
plt.ylim((1,1e4))
plt.legend(loc='lower right')
plt.tight_layout()
plt.savefig('weak_speedup.png',dpi=600, transparent=True)
plt.show()

In [None]:
pt_uws