In [181]:
import numpy as np
import multiprocessing as mp
from random import random
from itertools import repeat

N = 10**3 * 3
A = [ [random() for i in range(N)] for j in range(N) ]
x = [random() for i in range(N)]

## In Serial

In [182]:
def in_serial(A,x):
    return [sum(a+b for a,b in zip(r,x)) for r in A ]

In [183]:
b_serial = in_serial(A,x)

In [184]:
%%timeit
b = in_serial(A,x)

934 ms ± 20.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## In Parallel by Row

In [185]:
def sub_matrix_mult(i, A1, x1, ret):
    ret[i] = in_serial(A1,x1)

In [186]:
def sub_matrix_mult(i, A1, x1, ret):
    ret[i] = in_serial(A1,x1)

def in_parallel(A,x,p):
    manager = mp.Manager()
    ret = manager.list([0]*p)
    rows_per_proc = len(A)//p
    
    if N%p != 0:
        rows_per_proc += 1
    submatrices = [A[i:i+rows_per_proc] for i in range(0, N, rows_per_proc)]
    if len(submatrices)<p:
        submatrices += [A[rows_per_proc*(p-1):]]

    jobs = [mp.Process(target=sub_matrix_mult, 
            args=(i, submatrix, x, ret))
            for i, submatrix in enumerate(submatrices)]
    for job in jobs: job.start()
    for job in jobs: job.join()
    b = []
    for b1 in ret: b += b1
    return b

In [187]:
b_parallel = in_parallel(A,x,4)

In [188]:
sum([ (s-b)**2 for s,b in zip(b_serial, b_parallel)])

0.0

In [189]:
len(b_parallel)

3000

In [190]:
%%timeit
b_parallel = in_parallel(A,x,1)

1.11 s ± 13.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [191]:
%%timeit
b_parallel = in_parallel(A,x,2)

590 ms ± 5.84 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [192]:
%%timeit
b_parallel = in_parallel(A,x,3)

418 ms ± 11.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [193]:
%%timeit
b_parallel = in_parallel(A,x,4)

385 ms ± 18.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [194]:
%%timeit
b_parallel = in_parallel(A,x,5)

418 ms ± 45.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [195]:
%%timeit
b_parallel = in_parallel(A,x,6)

400 ms ± 34.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## In Parallel by Column

In [196]:
def add_matrices(matrices):
    rows = len(matrices[0])
    columns = len(matrices[0][0])
    return [ [ sum([A[i][j] for A in matrices]) for j in range(columns)] for i in range(rows) ]
def add_vectors(vectors):
    return [sum([v[i] for v in vectors]) for i in range(len(vectors[0]))]

In [197]:
add_vectors( (range(10), range(10)))

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [198]:
A[0][0]*3

2.4786498196692612

In [199]:
def in_parallel(A,x,p):
    manager = mp.Manager()
    ret = manager.list([0]*p)
    
    if N%p == 0:
        cols_per_proc = len(A)//p
    else:
        cols_per_proc = len(A)//p + 1
    
    submatrices = [ [A[r][i*cols_per_proc:(i+1)*cols_per_proc]  for r in range(len(A))] for i in range(p-1)]
    subvectors = [x[i*cols_per_proc:(i+1)*cols_per_proc] for i in range(p-1)]
    
    if len(submatrices)<p:
        submatrices.append([ A[r][cols_per_proc*(p-1):] for r in range(len(A))])
        #submatrices += [ A[r][cols_per_proc*(p-1):] for r in range(len(A))]
        subvectors += [x[cols_per_proc*(p-1):]]     
        
    jobs = [mp.Process(target=sub_matrix_mult, 
            args=(i, submatrix, subvector, ret))
            for i, (submatrix, subvector) in enumerate(zip(submatrices, subvectors))]
    for job in jobs: job.start()
    for job in jobs: job.join()
    return add_vectors(ret)

In [200]:
b_parallel = in_parallel(A,x,3)
sum([ (s-b)**2 for s,b in zip(b_serial, b_parallel)])

7.169774074941192e-20

In [201]:
%%timeit
b_parallel = in_parallel(A,x,1)

2.38 s ± 26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [202]:
%%timeit
b_parallel = in_parallel(A,x,2)

2.69 s ± 41 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [203]:
%%timeit
b_parallel = in_parallel(A,x,3)

3.28 s ± 49.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [204]:
%%timeit
b_parallel = in_parallel(A,x,4)

4.07 s ± 115 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [205]:
%%timeit
b_parallel = in_parallel(A,x,5)

4.82 s ± 111 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [206]:
%%timeit
b_parallel = in_parallel(A,x,6)

5.64 s ± 143 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [207]:
%%timeit
b_parallel = in_parallel(A,x,7)

6.5 s ± 79.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
