## Derived datatypes

In this exercise you will pass data around a ring with a derived datatype instead of an integer or an array like we did so far. Your send and receive buffer will be a struct with one integer and one floating point. 

In the exercise you will fill out the blank spaces and modify the call routines to use the new datatype. 

1. Set MPI datatypes for sending and receiving partial sums with the routines that you have learned in the previous step. You should use `MPI_Type_create_struct`. You are using the same fixed memory layout for send and receive buffer. 

2. Initialize the struct intigers with `rank` and `10*rank`. Therefore we will pass around two values and calculate two separate sums: rank integer sum and rank floating point sum. 

3. Use the new datatype in the send and receive routine calls. 

In [None]:
?MPI::MPI_Get_address

In [None]:
?MPI::MPI_Type_create_struct

***
#### C program

In [None]:
#include <mpi.h>

In [None]:
%%executable  a.x -- -lmpi

int rank, size;
int i, right, left;

struct buff {
    int i;
    float f;
} snd_buf, rcv_buf, sum;

int          array_of_blocklengths[2];
MPI_Aint     array_of_displacements[2], first_var_address, second_var_address;
MPI_Datatype array_of_types[2], send_recv_type;

MPI_Status status;
MPI_Request request;

MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);

right = (rank+1)      % size;
left  = (rank-1+size) % size;

// Set MPI datatypes for sending and receiving partial sums
// number of elements = ?
array_of_blocklengths[0] = ___;
array_of_blocklengths[1] = ___;

MPI_Get_address(&snd_buf.i, &first_var_address);
MPI_Get_address(&snd_buf.f, &second_var_address);

array_of_displacements[0] = (MPI_Aint) 0;
array_of_displacements[1] = ____________;

//datatype handles used to describe the structure
array_of_types[0] = ______;
array_of_types[1] = ______;

MPI_Type_create_struct(___...___, &send_recv_type);
MPI_________(&send_recv_type);

// Init
snd_buf.i = rank;
snd_buf.f = 10*rank;
sum.i = 0;
sum.f = 0;

// Modify the send and receive calls to use the derived datatype
for(i = 0; i < size; i++) 
{
    MPI_Issend(&snd_buf, 1, MPI_INT, right, 17, MPI_COMM_WORLD, &request);
    MPI_Recv(&rcv_buf, 1, MPI_INT, left, 17, MPI_COMM_WORLD, &status);
    MPI_Wait(&request, &status);
    snd_buf = rcv_buf;
    sum.i += rcv_buf.i;
    sum.f += rcv_buf.f;
}

printf ("PE%i:\tSum = %i\t%f\n", rank, sum.i, sum.f);

MPI_Finalize();

In [None]:
!mpirun -np 4 a.x

***
#### Python program

In [None]:
%%file ring.py
from mpi4py import MPI
import numpy as np

comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
status = MPI.Status()

right = (rank+1) % size
left = (rank-1+size) % size

# Produce new send-receive datatype
# Set MPI datatypes for sending and receiving partial sums
# datatype handles used to describe the structure
field_names = ['i', 'j']
field_dtypes = [np.int32, _______]
dtypes = list(zip(field_names, field_dtypes))
a = np.zeros(2, dtype=dtypes)
offsets = [a.dtype.fields[field][1] for field in field_names]
mpitype_dict = {np.int32:MPI.INT, np.float64:MPI.FLOAT}
field_mpitypes = [mpitype_dict[dtype] for dtype in field_dtypes]
send_recv_type = MPI.Datatype.Create_struct([1]*len(field_names), offsets, field_mpitypes)
send_recv_type.__________

# Init
snd_buf = np.zeros(2, dtype=float)
snd_buf[0] = rank
snd_buf[1] = 10*rank
rcv_buf = np.zeros(2, dtype=float)
sum = np.zeros(2, dtype=float)

# Modify the send and receive calls to use the derived datatype
for i in range (0,size):
    request = comm.issend(snd_buf, dest=right)
    rcv_buf = comm.recv(source=left)
    request.wait()
    snd_buf = rcv_buf
    sum[0] += rcv_buf[0]
    sum[1] += rcv_buf[1]
    
print("PE%i:\tSum = %i\t%f" % (rank, sum[0], sum[1]))

In [None]:
!mpirun -np 4 python ring.py

***
#### Fortran program

In [None]:
%%file ring.f90
program ring
use mpi

integer ( kind = 4 ) error
integer :: rank, size
integer :: i, right, left

type buff
    sequence
    integer :: i
    real :: r
end type buff
type(buff), asynchronous :: snd_buf
type(buff) :: rcv_buf, sum

integer(kind=MPI_ADDRESS_KIND) :: first_var_address, second_var_address
integer :: send_recv_type

integer :: array_of_block_length(2)
integer :: array_of_types(2)
integer(kind=MPI_ADDRESS_KIND) :: array_of_displacements(2)

integer :: status(MPI_STATUS_SIZE)
integer :: request

call MPI_Init(error)
call MPI_Comm_rank(MPI_COMM_WORLD, rank, error)
call MPI_Comm_size(MPI_COMM_WORLD, size, error)

right = mod(rank+1,      size)
left  = mod(rank-1+size, size)

! Create derived datatype
! number of elements = ?
array_of_block_length(1) = ___
array_of_block_length(2) = ___

! datatype handles used to describe the structure
array_of_types(1) = ______
array_of_types(2) = ______

call MPI_Get_address(snd_buf%i, first_var_address, error)
call MPI_Get_address(snd_buf%r, second_var_address, error)

array_of_displacements(1) = 0
array_of_displacements(2) = _________
    
call MPI_Type_create_struct(______...______, send_recv_type, error)
call MPI_______(send_recv_type, error)

! Init
snd_buf%i = rank
snd_buf%r = real(10*rank)
sum%i = 0
sum%r = 0

! Modify the send and receive calls to use the derived datatype
do i = 1, size
    call MPI_Issend(snd_buf, 1, MPI_INTEGER, right, 17, MPI_COMM_WORLD, request, error)
    call MPI_Recv(rcv_buf, 1, MPI_INTEGER, left, 17, MPI_COMM_WORLD, status, error)
    call MPI_Wait(request, status, error)
    if (.NOT.MPI_ASYNC_PROTECTS_NONBLOCKING) call MPI_F_sync_reg(snd_buf)
    snd_buf = rcv_buf
    sum%i = sum%i + rcv_buf%i
    sum%r = sum%r + rcv_buf%r
end do
print *, 'PE', rank, ': Sum =', sum%i, sum%r
call MPI_Finalize(error)
end

In [None]:
!mpif90 ring.f90 && mpirun -np 4 a.out

***
### You can compare with our solution:

***
#### C solution

In [None]:
%%executable  a.x -- -lmpi

int rank, size;
int i, right, left;

struct buff {
    int i;
    float f;
} snd_buf, rcv_buf, sum;

int          array_of_blocklengths[2];
MPI_Aint     array_of_displacements[2], first_var_address, second_var_address;
MPI_Datatype array_of_types[2], send_recv_type;

MPI_Status status;
MPI_Request request;

MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);

right = (rank+1)      % size;
left  = (rank-1+size) % size;

// Set MPI datatypes for sending and receiving partial sums
// number of elements = 1
array_of_blocklengths[0] = 1;
array_of_blocklengths[1] = 1;

MPI_Get_address(&snd_buf.i, &first_var_address);
MPI_Get_address(&snd_buf.f, &second_var_address);

array_of_displacements[0] = (MPI_Aint) 0;
array_of_displacements[1] = MPI_Aint_diff(second_var_address, first_var_address);

//datatype handles used to describe the structure
array_of_types[0] = MPI_INT;
array_of_types[1] = MPI_FLOAT;

MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements, array_of_types, &send_recv_type);
MPI_Type_commit(&send_recv_type);

// Init
snd_buf.i = rank;
snd_buf.f = 10*rank;
sum.i = 0;
sum.f = 0;

// Modify the send and receive calls to use the derived datatype
for(i = 0; i < size; i++) 
{
    MPI_Issend(&snd_buf, 1, send_recv_type, right, 17, MPI_COMM_WORLD, &request);
    MPI_Recv(&rcv_buf, 1, send_recv_type, left,  17, MPI_COMM_WORLD, &status);
    MPI_Wait(&request, &status);
    snd_buf = rcv_buf;
    sum.i += rcv_buf.i;
    sum.f += rcv_buf.f;
}

printf ("PE%i:\tSum = %i\t%f\n", rank, sum.i, sum.f);

MPI_Finalize();

In [None]:
!mpirun -np 4 a.x

***
#### Python solution

In [None]:
%%file ring.py
from mpi4py import MPI
import numpy as np

comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
status = MPI.Status()

right = (rank+1) % size
left = (rank-1+size) % size

# Produce new send-receive datatype
# Set MPI datatypes for sending and receiving partial sums
# datatype handles used to describe the structure
field_names = ['i', 'j']
field_dtypes = [np.int32, np.float64]
dtypes = list(zip(field_names, field_dtypes))
a = np.zeros(2, dtype=dtypes)
offsets = [a.dtype.fields[field][1] for field in field_names]
mpitype_dict = {np.int32:MPI.INT, np.float64:MPI.FLOAT}
field_mpitypes = [mpitype_dict[dtype] for dtype in field_dtypes]
send_recv_type = MPI.Datatype.Create_struct([1]*len(field_names), offsets, field_mpitypes)
send_recv_type.Commit()

# Init
snd_buf = np.zeros(2, dtype=float)
snd_buf[0] = rank
snd_buf[1] = 10*rank
rcv_buf = np.zeros(2, dtype=float)
sum = np.zeros(2, dtype=float)

# Modify the send and receive calls to use the derived datatype
for i in range (0,size):
    request = comm.Issend([snd_buf, send_recv_type], dest=right)
    comm.Recv([rcv_buf, send_recv_type], source=left)
    request.wait()
    snd_buf = rcv_buf
    sum[0] += rcv_buf[0]
    sum[1] += rcv_buf[1]
    
print("PE%i:\tSum = %i\t%f" % (rank, sum[0], sum[1]))

In [None]:
!mpirun -np 4 python ring.py

***
#### Fortran solution

In [None]:
%%file ring.f90
program ring
use mpi

integer ( kind = 4 ) error
integer :: rank, size
integer :: i, right, left

type buff
    sequence
    integer :: i
    real :: r
end type buff
type(buff), asynchronous :: snd_buf
type(buff) :: rcv_buf, sum

integer(kind=MPI_ADDRESS_KIND) :: first_var_address, second_var_address
integer :: send_recv_type

integer :: array_of_block_length(2)
integer :: array_of_types(2)
integer(kind=MPI_ADDRESS_KIND) :: array_of_displacements(2)

integer :: status(MPI_STATUS_SIZE)
integer :: request

call MPI_Init(error)
call MPI_Comm_rank(MPI_COMM_WORLD, rank, error)
call MPI_Comm_size(MPI_COMM_WORLD, size, error)

right = mod(rank+1,      size)
left  = mod(rank-1+size, size)

! Create derived datatype
! number of elements = 1
array_of_block_length(1) = 1
array_of_block_length(2) = 1

! datatype handles used to describe the structure
array_of_types(1) = MPI_INTEGER
array_of_types(2) = MPI_REAL

call MPI_Get_address(snd_buf%i, first_var_address, error)
call MPI_Get_address(snd_buf%r, second_var_address, error)

array_of_displacements(1) = 0
array_of_displacements(2) = MPI_Aint_diff(second_var_address, first_var_address)
    
call MPI_Type_create_struct(2, array_of_block_length, array_of_displacements, array_of_types, send_recv_type, error)
call MPI_Type_commit(send_recv_type, error)

! Init
snd_buf%i = rank
snd_buf%r = real(10*rank)
sum%i = 0
sum%r = 0

! Modify the send and receive calls to use the derived datatype
do i = 1, size
    call MPI_Issend(snd_buf, 1, send_recv_type, right, 17, MPI_COMM_WORLD, request, error)
    call MPI_Recv(rcv_buf, 1, send_recv_type, left, 17, MPI_COMM_WORLD, status, error)
    call MPI_Wait(request, status, error)
    if (.NOT.MPI_ASYNC_PROTECTS_NONBLOCKING) call MPI_F_sync_reg(snd_buf)
    snd_buf = rcv_buf
    sum%i = sum%i + rcv_buf%i
    sum%r = sum%r + rcv_buf%r
end do
print *, 'PE', rank, ': Sum =', sum%i, sum%r
call MPI_Finalize(error)
end

In [None]:
!mpif90 ring.f90 && mpirun -np 4 a.out