In [13]:
%%writefile tutorial1.py
from mpi4py import MPI

comm = MPI.COMM_WORLD
rank = comm.Get_rank()

if rank == 0:
    data = {'a': 7, 'b': 3.14}
    comm.send(data, dest=1, tag=11) # tag may be omitted
elif rank == 1:
    data = comm.recv(source=0, tag=11)
    print(rank, data)


Overwriting tutorial1.py


In [14]:
%%bash
mpirun -n 4 python tutorial1.py

1 {'a': 7, 'b': 3.14}


---

In [None]:
%%writefile tutorial2.py
from mpi4py import MPI

comm = MPI.COMM_WORLD
rank = comm.Get_rank()

if rank == 0:
    data = {'a': 7, 'b': 3.14}
    req = comm.isend(data, dest=1, tag=11)
    req.wait()
elif rank == 1:
    req = comm.irecv(source=0, tag=11)
    data = req.wait()
    print(rank, data)

In [12]:
%%bash
mpirun -n 4 python tutorial2.py

1 {'a': 7, 'b': 3.14}


---

In [25]:
%%writefile tutorial3.py
from mpi4py import MPI
import numpy

comm = MPI.COMM_WORLD
rank = comm.Get_rank()

# passing MPI datatypes explicitly
if rank == 0:
    data = numpy.arange(20, dtype='i')
    print('a', rank, data, data.dtype)
    comm.Send([data, MPI.INT], dest=1, tag=77)
elif rank == 1:
    data = numpy.empty(20, dtype='i')
    comm.Recv([data, MPI.INT], source=0, tag=77)
    print('b', rank, data, data.dtype)
    

# automatic MPI datatype discovery
if rank == 0:
    data = numpy.arange(10, dtype=numpy.float64)
    print('a', rank, data, data.dtype)
    comm.Send(data, dest=1, tag=13)
elif rank == 1:
    data = numpy.empty(10, dtype=numpy.float64)
    comm.Recv(data, source=0, tag=13)
    print('b', rank, data, data.dtype)

Overwriting tutorial3.py


In [26]:
%%bash
mpirun -n 4 python tutorial3.py

a 0 [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19] int32
a 0 [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.] float64
b 1 [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19] int32
b 1 [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.] float64


---
bcast

In [5]:
%%writefile tutorial4.py
from mpi4py import MPI

comm = MPI.COMM_WORLD
rank = comm.Get_rank()

if rank == 0:
    data = {'key1' : [7, 2.72, 2+3j],
            'key2' : ( 'abc', 'xyz')}
else:
    data = None
    
data = comm.bcast(data, root=0)

print(rank, data)

Overwriting tutorial4.py


In [4]:
%%bash
mpirun -n 4 python tutorial4.py

0 {'key1': [7, 2.72, (2+3j)], 'key2': ('abc', 'xyz')}
2 {'key1': [7, 2.72, (2+3j)], 'key2': ('abc', 'xyz')}
1 {'key1': [7, 2.72, (2+3j)], 'key2': ('abc', 'xyz')}
3 {'key1': [7, 2.72, (2+3j)], 'key2': ('abc', 'xyz')}


---
scatter

In [30]:
%%writefile tutorial5.py
from mpi4py import MPI

comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()

if rank == 0:
    data = [(i+1)**2 for i in range(size)]
else:
    data = None
data = comm.scatter(data, root=0)
assert data == (rank+1)**2

Overwriting tutorial5.py


In [31]:
%%bash
mpirun -n 4 python tutorial5.py

---
gather

In [34]:
%%writefile tutorial6.py
from mpi4py import MPI

comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()

data = (rank+1)**2
data = comm.gather(data, root=0)
if rank == 0:
    for i in range(size):
        assert data[i] == (i+1)**2
else:
    assert data is None
    
print(rank, data, type(data))

Overwriting tutorial6.py


In [35]:
%%bash
mpirun -n 4 python tutorial6.py

2 None <class 'NoneType'>
3 None <class 'NoneType'>
1 None <class 'NoneType'>
0 [1, 4, 9, 16] <class 'list'>


---
Bcast

In [50]:
%%writefile tutorial7.py
from mpi4py import MPI
import numpy as np

comm = MPI.COMM_WORLD
rank = comm.Get_rank()

if rank == 0:
    data = np.arange(10, dtype='int64')
else:
    data = np.empty(10, dtype='int64')
    
# print('a', rank, data)
    
comm.Bcast(data, root=0)
for i in range(10):
    assert data[i] == i

print('b', rank, data, data.dtype)

Overwriting tutorial7.py


In [51]:
%%bash
mpiexec -n 4 python tutorial7.py

b 0 [0 1 2 3 4 5 6 7 8 9] int64
b 1 [0 1 2 3 4 5 6 7 8 9] int64
b 2 [0 1 2 3 4 5 6 7 8 9] int64
b 3 [0 1 2 3 4 5 6 7 8 9] int64


---
Scatter

In [63]:
%%writefile tutorial8.py

from mpi4py import MPI
import numpy as np

comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()

sendbuf = None

if rank == 0:
    sendbuf = np.empty([size, 10], dtype='int32')
    sendbuf.T[:,:] = range(size)
    
print(rank, sendbuf)

recvbuf = np.empty(10, dtype='int32')
comm.Scatter(sendbuf, recvbuf, root=0)
assert np.allclose(recvbuf, rank)

print(rank, recvbuf)

Overwriting tutorial8.py


In [64]:
%%bash
mpiexec -n 4 python tutorial8.py

1 None
2 None
0 [[0 0 0 0 0 0 0 0 0 0]
 [1 1 1 1 1 1 1 1 1 1]
 [2 2 2 2 2 2 2 2 2 2]
 [3 3 3 3 3 3 3 3 3 3]]
0 [0 0 0 0 0 0 0 0 0 0]
2 [2 2 2 2 2 2 2 2 2 2]
1 [1 1 1 1 1 1 1 1 1 1]
3 None
3 [3 3 3 3 3 3 3 3 3 3]


---
Gather

In [88]:
%%writefile tutorial9.py

from mpi4py import MPI
import numpy as np

time0 = MPI.Wtime()

comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()

sendbuf = np.zeros(10, dtype='int32') + rank
recvbuf = None
if rank == 0:
    recvbuf = np.empty((size, 10), dtype='int32')
    
comm.Gather(sendbuf, recvbuf, root=0)
if rank == 0:
    for i in range(size):
        assert np.allclose(recvbuf[i,:], i)
        
time1 = MPI.Wtime()
        
print(rank, recvbuf)
print()

Overwriting tutorial9.py


In [89]:
%%bash
mpiexec -n 4 python tutorial9.py

2 None
3 None
1 None
0 [[0 0 0 0 0 0 0 0 0 0]
 [1 1 1 1 1 1 1 1 1 1]
 [2 2 2 2 2 2 2 2 2 2]
 [3 3 3 3 3 3 3 3 3 3]]


---
Reduce

In [4]:
import numpy as np

In [10]:
arr = np.array([1,2], dtype='float32')

In [17]:
np.empty(1, 'float32')

array([3.], dtype=float32)

In [22]:
%%writefile tutorial10.py

from mpi4py import MPI
import numpy as np

comm = MPI.COMM_WORLD
comm.Barrier()
time0 = MPI.Wtime()

rank = comm.Get_rank()
size = comm.Get_size()

n = 4
dtype = "float32"

if n % size != 0:
    raise ValueError("The number of processes n needs to be a power of 2.")

dim1 = int(n // size)

# Create random a vector A at one thread
if rank == 0:
    A_sendbuf = np.random.rand(n).astype(dtype)
else:
    A_sendbuf = None

# and scatter A to all threads
A_recvbuf = np.empty(dim1, dtype)
comm.Scatter(A_sendbuf, A_recvbuf, root=0)

print(rank, A_recvbuf)

part_sum = np.sum(A_recvbuf).reshape(1)
sum_ = np.empty(1, dtype)
comm.Reduce(part_sum, sum_, root=0)

if rank == 0:
    print(sum_)

Overwriting tutorial10.py


In [23]:
%%bash
mpiexec -n 2 python tutorial10.py

0 [0.05181958 0.09680613]
1 [0.6857301  0.11715732]
[0.9515132]


---
matrix times vector

In [25]:
%%writefile tutorial11.py

from mpi4py import MPI
import numpy as np

comm = MPI.COMM_WORLD
comm.Barrier()
time0 = MPI.Wtime()

rank = comm.Get_rank()
size = comm.Get_size()

if rank == 0:
    A_sendbuf = 2 * np.eye(size, dtype='float64')
    x = np.ones(size, dtype='float64')
else:
    A_sendbuf = None
    x = np.empty(size, dtype='float64')
    
A_recvbuf = np.empty(size, dtype='float64')
comm.Scatter(A_sendbuf, A_recvbuf, root=0)
comm.Bcast(x, root=0)

y_sendbuf = A_recvbuf @ x

if rank == 0:
    y_recvbuf = np.empty(size, dtype='float64')
else:
    y_recvbuf = None

comm.Gather(y_sendbuf, y_recvbuf, root=0)

comm.Barrier()
time1 = MPI.Wtime()

print(f"{rank})", "y =", y_recvbuf)
print(f"{rank})", f"Finished in {time1 - time0} sec.")

Overwriting tutorial10.py


In [26]:
%%bash
mpiexec -n 2 python tutorial11.py

1) y = None
1) Finished in 0.00010699099993871641 sec.
0) y = [2. 2.]
0) Finished in 0.00010391599994363787 sec.


---
# zad. 1.
Napisać równoległa implementację operacji dodawania wektora B i wektora C Wynik zapisywany jest w wektorze A. Tzn: A[i] = B[i] + C[i], i=0....n. n=1048576. Waktory B i C należy wypełnić losowymi liczbami z zakresu [0:1]. Jako wynik wypisać A[0] i A[n-1]. Wektory mogą być w dowolny sposób rozmieszczone w pamięci. 

Aplikacje należy uruchomić na 1, 2, 4 procesorach (rdzeniach, wątkach) (może być na laptopie) i podać czasy wykonania.

In [44]:
%%writefile zad1.py
from mpi4py import MPI
import numpy as np

comm = MPI.COMM_WORLD
comm.Barrier()
time0 = MPI.Wtime()
rank = comm.Get_rank()
size = comm.Get_size()

n = 1048576 # 2**20
dtype = "float32"

if n % size != 0:
    raise ValueError("The number of processes n needs to be a power of 2.")

dim1 = int(n // size)

# Create random vectors B, C at one thread
if rank == 0:
    B_sendbuf = np.random.rand(n).astype(dtype)    
    C_sendbuf = np.random.rand(n).astype(dtype)
else:
    B_sendbuf = None
    C_sendbuf = None

# print(f"{rank}) B_sendbuf={B_sendbuf}")
# print(f"{rank}) C_sendbuf={C_sendbuf}")    

# Scatter B and C to all threads
B_recvbuf = np.empty(dim1, dtype)
C_recvbuf = np.empty(dim1, dtype)

comm.Scatter(B_sendbuf, B_recvbuf, root=0)
comm.Scatter(C_sendbuf, C_recvbuf, root=0)

# print(f"{rank}) B_recvbuf={B_recvbuf}")
# print(f"{rank}) C_recvbuf={C_recvbuf}")

# Do A = B + C piecewise at each thread
A_sendbuf = B_recvbuf + C_recvbuf

# print(f"{rank}) {A_sendbuf}")

# Gather pieces of A at thread 0
if rank == 0:
    A_recvbuf = np.empty(n, dtype)
else:
    A_recvbuf = None
    
comm.Gather(A_sendbuf, A_recvbuf, root=0)

# Print first and last element of A to screen
# Stop the timer
if rank == 0:
    A = A_recvbuf
    print(f"A[0]={A[0]}, A[-1]={A[-1]}")
    time1 = MPI.Wtime()
    print(f"n={size}. Finished in {round(time1-time0, 4)} sec.")


Overwriting zad1.py


In [40]:
%%bash
mpiexec -n 1 python zad1.py

A[0]=1.3715472221374512, A[-1]=0.8749533891677856
n=1. Finished in 0.0382 sec.


In [41]:
%%bash
mpiexec -n 2 python zad1.py

A[0]=0.7797716856002808, A[-1]=0.9926798939704895
n=2. Finished in 0.037 sec.


In [42]:
%%bash
mpiexec -n 4 python zad1.py

A[0]=1.4049322605133057, A[-1]=0.8619338274002075
n=4. Finished in 0.0508 sec.


---
# zad. 2.
Napisać program równoległy, który wyznaczy iloczyn skalarny wektorów A[i] i B[i] (i=1,1024). Tablice należy wypełnic liczbami losowymi z przedziału [0:1]. Wynik należy zabrać na procesorze 0 i wypisać na standardowe wyjście. Dla przypomnienia, iloczyn skalarny wektorów  to ∑A[i]*B[i].

In [26]:
%%writefile zad2.py
from mpi4py import MPI
import numpy as np

comm = MPI.COMM_WORLD
comm.Barrier()
time0 = MPI.Wtime()
size = comm.Get_size()
rank = comm.Get_rank()

n = 1048576
dtype = 'float32'

if n % size != 0:
    raise ValueError()

dim1 = int(n // size)

if rank == 0:
    A_sendbuf = np.random.rand(n).astype(dtype)
    B_sendbuf = np.random.rand(n).astype(dtype)
else:
    A_sendbuf = None
    B_sendbuf = None

A_recvbuf = np.empty(dim1, dtype)
B_recvbuf = np.empty(dim1, dtype)

comm.Scatter(A_sendbuf, A_recvbuf, root=0)
comm.Scatter(B_sendbuf, B_recvbuf, root=0)

part_sum = np.sum(A_recvbuf * B_recvbuf).reshape(1)
sum_ = np.empty(1, dtype)
comm.Reduce(part_sum, sum_, root=0)

if rank == 0:
    print(f"The dot product equals {sum_}")
    time1 = MPI.Wtime()
    print(f"n={size}. Finished in {round(time1-time0, 4)} sec.")

Overwriting zad2.py


In [27]:
%%bash
mpiexec -n 2 python zad2.py

The dot product equals [262208.94]
n=2. Finished in 0.059 sec.


---
---
old

In [73]:
%%writefile playground.py
from mpi4py import MPI
import numpy as np
import sys

time1 = MPI.Wtime()
n = 1048576
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()

if n % size != 0:
    raise ValueError(f"The number of processes n needs to be a power of 2. Sorry!")

m = n // size

B_chunk = np.random.rand(m)
C_chunk = np.random.rand(m)
A_chunk = B_chunk + C_chunk

if rank == 0:
    print(A_chunk[0])
elif rank == size-1:
    print(A_chunk[-1])

sys.stdout.flush()
comm.Barrier()
time2 = MPI.Wtime()
if rank == 0:
    print(f"Finished in {time2 - time1} sec.")


Overwriting playground.py


In [77]:
%%bash
mpirun -n 2 python playground.py

0.3482898489632621
0.7875747398064287
Finished in 0.03380012512207031 sec.


In [76]:
%%bash
mpirun -n 4 python playground.py

1.4266662031540376
1.5166515249076427
Finished in 0.031232118606567383 sec.


In [75]:
%%bash
mpirun -n 8 python playground.py

1.000501044818679
1.1425933173122513
Finished in 0.07453513145446777 sec.


In [74]:
%%bash
mpirun -n 16 python playground.py

1.0410288035406272
1.0512516150574411
Finished in 0.06569218635559082 sec.


In [71]:
%%writefile playground2.py
from mpi4py import MPI
import numpy as np
import time
import sys

comm = MPI.COMM_WORLD
# comm.Barrier()
t_start = MPI.Wtime()

n = 1024 # 1048576
rank = comm.Get_rank()
size = comm.Get_size()

if n % size != 0:
    raise ValueError(f"Vector of length {n} needs to be split evenly!")

m = n // size

B_chunk = np.random.rand(m)
C_chunk = np.random.rand(m)
A_chunk = B_chunk * C_chunk
sum_chunk = np.sum(A_chunk)
sum_ = 0

comm.Reduce(
    [sum_chunk, MPI.DOUBLE],
    [sum_, MPI.DOUBLE],
    op = MPI.SUM,
    root = 0
)



comm.Barrier()
t_end = MPI.Wtime()
if rank == 0:
    print(f"Suma: {sum_}.")
    print(f"Finished in {t_end - t_start} sec.")

Overwriting playground2.py


In [72]:
%%bash
mpirun -n 4 python playground2.py

Suma: 0.
Finished in 0.017780065536499023 sec.
