# Computing the mean

In this notebook, we compute the mean of an array of random numbers.  

Rather than import NumPy, we will use the `%pylab` magic command, which essentially is equivalent

    from numpy import *
    from scipy import *
    from matplotlib.pyplot import *

In [None]:
%matplotlib notebook
%pylab

In [None]:
import time, multiprocessing
import pandas

In [None]:
N = 2**20
x = random.rand(N)
print("memory = {:.4f} (MB)".format(8*N/1024**2))

In [None]:
def vsum(x,cpipe):
    s = sum(x)
    cpipe.send(s)
    
def compute_mean(x,np):
    N = len(x)
    jobs = []
    pp = []
    for i in range(np):
        ppipe, cpipe = multiprocessing.Pipe()
        i1 = int(i*N/np)
        i2 = int((i+1)*N/np)
        
        job = multiprocessing.Process(target=vsum,args=(x[i1:i2],cpipe))
        pp.append(ppipe)
        jobs.append(job)
        
    for job in jobs:
        job.start()
    
    for job in jobs:
        job.join()

    s = 0
    for p in pp:
        s += p.recv()        
        
    return s/N    

In [None]:
%time a1 = compute_mean(x,8)
print("a1 = {:24.16f}".format(a1))
print("")


%time a2 = mean(x)
print("a2 = {:24.16f}".format(a2))


In [None]:
# Use 'timeit' loop (see 'using_timeit' notebook.)
procs = 2**arange(0,6)
tdata = empty(procs.shape)
for i,np in enumerate(procs):
    tr = %timeit -n 1 -r 5 -q -o pass; compute_mean(x,np)
    tdata[i] = tr.best    

In [None]:
p = pandas.Series(procs,dtype='d')
t = pandas.Series(tdata,dtype='d')
df = pandas.DataFrame({'p' : p, 't' : t})
display(df)

## Plot results

Use the Pandas front end to Matplotlib.

In [None]:
di = {'p' : 'Processors', 't' : 'Time (s)'}
style = {di['t'] : '.-'}

df.rename(columns=di).plot(x=di['p'], logx=True, logy=True, style=style,markersize=15)
xlabel(di['p'],fontsize=16)
ylabel(di['t'],fontsize=16)
title('Computing the mean')

## Add best-fit line to data frame

In [None]:
c = polyfit(log(df['p'][:3]), log(df['t'][:3]),1)
di['best-fit'] = 'Best fit (slope={:.2f})'.format(c[0])
style[di['best-fit']] = 'r-'
df['best-fit'] = exp(polyval(c,log(p)))

df.rename(columns=di).plot(x=di['p'], logx=True, logy=True, style=style,markersize=15)