# Matrix-vector Multiplication

In [1]:
%matplotlib notebook
%pylab
import numpy as np
import multiprocessing as mp
from operator import itemgetter
import pandas

Using matplotlib backend: nbAgg
Populating the interactive namespace from numpy and matplotlib


In [2]:
NumProcessors = mp.cpu_count()
#create a Random Matrix A (assuming A to be square matrix i.e. m==n) of size 2048x2048 
A = np.matrix(np.random.rand(1024,1024))
#create a Random vector x of size 2048x1 
x = np.matrix(np.random.rand(1024,1))

In [3]:
%%time
test_result = (A*x)

CPU times: user 4.68 ms, sys: 15.5 ms, total: 20.2 ms
Wall time: 6.89 ms


In [4]:
test_result

matrix([[262.0912843 ],
        [260.63419291],
        [241.30988274],
        ...,
        [253.93432478],
        [258.38050117],
        [262.25466482]])

## Distributing rows of the matrix across processors

In [5]:
print("Number of Processors = %i"%(NumProcessors))
rowsplit = np.split(A, NumProcessors, axis = 0)

Number of Processors = 4


In [6]:
def matrix_mult_row_split(MatrixRows,pos,dotProdQueue):
    row_list = MatrixRows.tolist()
    for i in range(len(row_list)):
        mult = row_list[i]*x
        dotProdQueue.put((pos,mult))

In [7]:
%%time
jobs = []
results = []
dotProdQueue = mp.Queue()
for i in range(NumProcessors):
    job = mp.Process(target=matrix_mult_row_split,args=[rowsplit[i], i, dotProdQueue])
    job.start()
    jobs.append(job)
    
for i in range(NumProcessors):
    jobs[i].join

CPU times: user 2.22 ms, sys: 13 ms, total: 15.3 ms
Wall time: 14.2 ms


In [8]:
results = [dotProdQueue.get() for p in range(len(x))]
results = sorted(results,key=itemgetter(0))
results = [r[1] for r in results]
results = np.row_stack(results)

### Checking Results

In [9]:
results #Result from Row Distributed Dot-Product

matrix([[262.0912843 ],
        [260.63419291],
        [241.30988274],
        ...,
        [253.93432478],
        [258.38050117],
        [262.25466482]])

In [10]:
error_count = 0
for i in range(len(test_result)): # assuming the lists are of the same length
    if abs(test_result[i]-results[i]) > (1e-6):
                error_count +=1
                print("Not Matched at Index = %i" %(i))
if(error_count == 0):
    print("Results Matched!!!")

Results Matched!!!


## Distributing dot product computations

In [11]:
def matrix_mult_dotProd(MatrixRows,pos,dotProdQueue):
    row_list = MatrixRows.tolist()
    for i in range(len(row_list)):
        dotProd = 0
        for j in range(np.shape(MatrixRows[0])[1]):
            dotProd += row_list[i][j]*x[j]
        dotProdQueue.put((pos,dotProd))

In [12]:
%%time
jobs = []
dp_results = []
dotProdQueue = mp.Queue()
for i in range(NumProcessors):
    job = mp.Process(target=matrix_mult_dotProd,args=[rowsplit[i], i, dotProdQueue])
    job.start()
    jobs.append(job)
    
for i in range(NumProcessors):
    jobs[i].join

CPU times: user 11.2 ms, sys: 5.17 ms, total: 16.3 ms
Wall time: 37.5 ms


In [13]:
dp_results = [dotProdQueue.get() for p in range(len(x))]
dp_results = sorted(dp_results,key=itemgetter(0))
dp_results = [r[1] for r in dp_results]
dp_results = np.row_stack(dp_results)

### Checking Results

In [14]:
dp_results

matrix([[262.0912843 ],
        [260.63419291],
        [241.30988274],
        ...,
        [253.93432478],
        [258.38050117],
        [262.25466482]])

In [15]:
error_count = 0
for i in range(len(test_result)): # assuming the lists are of the same length
    if abs(test_result[i]-dp_results[i]) > (1e-6):
                error_count +=1
                print("Not Matched at Index = %i" %(i))
if(error_count == 0):
    print("Results Matched!!!")

Results Matched!!!


## Performance on Multiple CPU's

In [16]:
def row_dist_method(processors):
    jobs = []
    dp_results = []
    dotProdQueue = mp.Queue()
    row1 = np.split(A, int(processors), axis = 0)
    for i in range(processors):
        job = mp.Process(target=matrix_mult_row_split,args=[row1[i], i, dotProdQueue])
        job.start()
        jobs.append(job)
    for i in range(processors):
        jobs[i].join
        

def dotproduct_dist_method(processors):
    jobs = []
    dp_results = []
    dotProdQueue = mp.Queue()
    for i in range(NumProcessors):
        job = mp.Process(target=matrix_mult_dotProd,args=[rowsplit[i], i, dotProdQueue])
        job.start()
        jobs.append(job)
    
    for i in range(NumProcessors):
        jobs[i].join

In [26]:
procs = 2**(np.arange(0,4))
tdata_row_dis = np.empty(procs.shape)
tdata_dp_dis = np.empty(procs.shape)
for i,numproc in enumerate(procs):
    tr_row_dis = %timeit -n 2 -r 5 -q -o pass; row_dist_method(numproc)
    tdata_row_dis[i] = tr_row_dis.best

for i,numproc in enumerate(procs):
    tr_dp_dis = %timeit -n 2 -r 5 -q -o pass; dotproduct_dist_method(numproc)
    tdata_dp_dis[i] = tr_dp_dis.best

Sp_row = [tdata_row_dis[0]/items for items in tdata_row_dis]
Ep_row = [Sp_row[i]/procs[i] for i in range(0,len(Sp_row))]

Sp_dp = [tdata_dp_dis[0]/items for items in tdata_dp_dis]
Ep_dp = [Sp_dp[i]/procs[i] for i in range(0,len(Sp_dp))]

In [27]:
p = pandas.Series(procs,dtype='d')
trow = pandas.Series(tdata_row_dis,dtype='d')
tdp = pandas.Series(tdata_dp_dis, dtype='d')

df = pandas.DataFrame({'Processors' : p,
                       'Mult. by Row Dist.(sec)' : trow,
                       'Ep Row Dist.' : Ep_row,
                       'Mult. by DP Dist.(sec)' : tdp,
                       'Ep Dot Prod. Dist.' : Ep_dp,                       
                    })
df = df.reindex(['Processors','Mult. by Row Dist.(sec)','Ep Row Dist.',
                 'Mult. by DP Dist.(sec)', 'Ep Dot Prod. Dist.'], axis=1)
df.style
pandas.io.formats.style.Styler(df, caption ='Timing Report for Matrix-Vector Multiplication Using Multiple CPUs')


Unnamed: 0,Processors,Mult. by Row Dist.(sec),Ep Row Dist.,Mult. by DP Dist.(sec),Ep Dot Prod. Dist.
0,1,0.00609584,1.0,0.0459096,1.0
1,2,0.0289005,0.105462,0.353179,0.0649947
2,4,0.0345528,0.0441053,0.740807,0.0154931
3,8,0.0591378,0.0128848,1.42963,0.0040141


The efficieny plot was skipped because of poor efficiency per processor. The result for this problem are obtained on personal computer and not on the Virtual Machine. 