## One sided communication in a ring

In this exercise the goal is to substitute nonblocking communication with one sided communication. 

What you need to do is create a window for the receive buffer and substitute the sending and receiving by calling MPI_Put on the process that previously called MPI_Send. Also don't forget to do synchronization with MPI_Win_fence. 

1. Fill out the skeleton to create all `rcv_buf` as windows in their processes. Don't forget to free the window when you are done. 

2. Substitute the Issend/Recv/Wait with Win_fence/Put/Win_fence sequence. 

* There are two solutions to substituting nonblocking communication with one-sided communication. Do you have any idea, why would we preffer using MPI_Put instead of MPI_Get? What is your preferred way, and why?

In [None]:
?MPI::MPI_Win_create

In [None]:
?MPI::MPI_Put

In [None]:
?MPI::MPI_Get

In [None]:
?MPI::MPI_Win_fence

***
#### C skeleton

In [None]:
%%file ring.c
#include <stdio.h>
#include <mpi.h>

int main()
{
    int rank, size;
    int snd_buf, rcv_buf;
    int right, left;
    int sum, i;
    MPI_Status  status;
    MPI_Request request;

    ___________ win;

    MPI_Init(NULL, NULL);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    right = (rank+1)      % size;
    left  = (rank-1+size) % size;

    /* Create the window. */
    MPI_Win_create(____________________________________, &win);

    sum = 0;
    snd_buf = rank;

    for(i = 0; i < size; i++) 
    {
        MPI_Issend(&snd_buf, 1, MPI_INT, right, 17, MPI_COMM_WORLD, &request);
        MPI_Recv  (&rcv_buf, 1, MPI_INT, left,  17, MPI_COMM_WORLD, &status);
        MPI_Wait(&request, &status);

        snd_buf = rcv_buf;
        sum += rcv_buf;
    }

    printf("PE%i:\tSum = %i\n", rank, sum);

    MPI_Finalize();
}

In [None]:
!mpicc ring.c && mpirun -np 4 --allow-run-as-root a.out

***
#### Python skeleton

In [None]:
%%file ring.py
from mpi4py import rc
rc.initialize = False
rc.thread_level = 'single'
from mpi4py import MPI
import numpy as np
MPI.Init()

comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
status = MPI.Status()

right = (rank+1) % size
left = (rank-1+size) % size
sum = 0
snd_buf = np.zeros(1, dtype='d')
snd_buf[0] = rank
rcv_buf = np.zeros(1, dtype='d')

# Create the window
win = MPI.Win.Create(__________)
    
for i in range (0,size):
    request = comm.issend(snd_buf, dest=right)
    rcv_buf = comm.recv(source=left)
    request.wait()
    
    snd_buf = rcv_buf
    sum += rcv_buf[0]

print("PE%i:\tSum = %i" % (rank, sum))

MPI.Finalize()

In [None]:
!mpirun -np 4 --allow-run-as-root python ring.py

***
#### Fortran skeleton
Additional hints:

1. In Fortran you must have variables because you are doing call by reference, not by value. 
~~~Fortran
integer :: disp_unit
integer(KIND=MPI_ADDRESS_KIND) :: rcv_buf_size, lb, extent
! get the extent of the integer
call MPI_Type_get_extent(MPI_INTEGER, lb, extent, error)
...
disp_unit = extent
! multiplied by the number of elements
rcv_buf_size = disp_unit * 1
call MPI_Win_create(rcv_buf, rcv_buf_size, disp_unit, MPI_INFO_NULL, ..., error)
~~~

2. Both buffers must be asynchronous. Also write additional statements to protect the receive buffer before the first fence and after the second fence and protect the send buffer after the second fence. 
~~~Fortran
if (.NOT.MPI_ASYNC_PROTECTS_NONBLOCKING) call MPI_F_sync_reg(snd_buf)
~~~

In [None]:
%%file ring.f90
program ring
use mpi

integer ( kind = 4 ) error
integer :: rank, size
integer :: right, left
integer :: i, sum
integer, asynchronous :: snd_buf
integer :: rcv_buf
integer :: status(MPI_STATUS_SIZE)
integer :: request
_____________ :: win 
_____________ :: disp_unit
_____________ :: extent, lb
_____________ :: rcv_buf_size, target_disp

call MPI_Init(error)
call MPI_Comm_rank(MPI_COMM_WORLD, rank, error)
call MPI_Comm_size(MPI_COMM_WORLD, size, error)

right = mod(rank+1,      size)
left  = mod(rank-1+size, size)

! create the window
call MPI_Type_get_extent(MPI_INTEGER, lb, extent, error)
disp_unit = __________________
rcv_buf_size = _______________
call MPI_Win_create( _________________ , win, error) 

sum = 0
snd_buf = rank

do i = 1, size
    call MPI_Issend(snd_buf, 1, MPI_INTEGER, right, 17, MPI_COMM_WORLD, request, error)
    call MPI_Recv(rcv_buf, 1, MPI_INTEGER, left, 17, MPI_COMM_WORLD, status, error)
    call MPI_Wait(request, status, error)
    if (.NOT.MPI_ASYNC_PROTECTS_NONBLOCKING) call MPI_F_sync_reg(snd_buf)
    snd_buf = rcv_buf
    sum = sum + rcv_buf
end do

print *, 'PE', rank, ': Sum =', sum
call MPI_Win_free(win, error)
call MPI_Finalize(error)
end

In [None]:
!mpif90 ring.f90 && mpirun -np 4 --allow-run-as-root a.out

***
### You can compare with our solution:

***
#### C solution

In [None]:
%%file ring.c
#include <stdio.h>
#include <mpi.h>

int main()
{
    int rank, size;
    int snd_buf, rcv_buf;
    int right, left;
    int sum, i;

    MPI_Win win;

    MPI_Init(NULL, NULL);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    right = (rank+1)      % size;
    left  = (rank-1+size) % size;

    /* Create the window. */
    MPI_Win_create(&rcv_buf, (MPI_Aint) sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);

    sum = 0;
    snd_buf = rank;

    for(i = 0; i < size; i++) 
    {
        MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPRECEDE, win);
        MPI_Put(&snd_buf, 1, MPI_INT, right, (MPI_Aint) 0, 1, MPI_INT, win);
        MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, win);

        snd_buf = rcv_buf;
        sum += rcv_buf;
    }

    printf("PE%i:\tSum = %i\n", rank, sum);

    MPI_Win_free(&win);

    MPI_Finalize();
}

In [None]:
!mpicc ring.c && mpirun -np 4 --allow-run-as-root a.out

***
#### Python solution

In [None]:
%%file ring.py
from mpi4py import rc
rc.initialize = False
rc.thread_level = 'single'
from mpi4py import MPI
import numpy as np
MPI.Init()

comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
status = MPI.Status()

right = (rank+1) % size
left = (rank-1+size) % size
sum = 0
snd_buf = np.zeros(1, dtype='d')
snd_buf[0] = rank
rcv_buf = np.zeros(1, dtype='d')

# Create the window
win = MPI.Win.Create(rcv_buf, 1, MPI.INFO_NULL, comm)
    
for i in range (0,size):
    MPI.Win.Fence(win)
    MPI.Win.Put(win, snd_buf, right)
    MPI.Win.Fence(win)

    snd_buf = rcv_buf
    sum += rcv_buf[0]

print("PE%i:\tSum = %i" % (rank, sum))

MPI.Win.Free(win)
MPI.Finalize()

In [None]:
!mpirun -np 4 --allow-run-as-root python ring.py

***
#### Fortran solution

In [None]:
%%file ring.f90
program ring
use mpi

integer ( kind = 4 ) error
integer :: rank, size
integer :: right, left
integer :: i, sum

! both buffers must be asynchronous
integer, asynchronous :: snd_buf, rcv_buf
integer :: win
integer :: disp_unit
integer(KIND=MPI_ADDRESS_KIND) :: extent, lb
integer(KIND=MPI_ADDRESS_KIND) :: rcv_buf_size, target_disp

call MPI_Init(error)
call MPI_Comm_rank(MPI_COMM_WORLD, rank, error)
call MPI_Comm_size(MPI_COMM_WORLD, size, error)

right = mod(rank+1,      size)
left  = mod(rank-1+size, size)

! create the window
call MPI_Type_get_extent(MPI_INTEGER, lb, extent, error)
disp_unit = extent
rcv_buf_size = 1 * disp_unit
call MPI_Win_create(rcv_buf, rcv_buf_size, disp_unit, MPI_INFO_NULL, MPI_COMM_WORLD, win, error)

sum = 0
snd_buf = rank

! protect the receive buffer before and after the two fences
! protect the send buffer because after put and second fence, the buffer must not be modified
do i = 1, size
    if (.NOT.MPI_ASYNC_PROTECTS_NONBLOCKING) call MPI_F_sync_reg(rcv_buf)

    call MPI_Win_fence(MPI_MODE_NOSTORE + MPI_MODE_NOPRECEDE, win, error)
    target_disp = 0
    call MPI_Put(snd_buf, 1, MPI_INTEGER, right, target_disp, 1, MPI_INTEGER, win, error)
    call MPI_Win_fence(MPI_MODE_NOSTORE + MPI_MODE_NOPUT + MPI_MODE_NOSUCCEED, win, error)
    
    if (.NOT.MPI_ASYNC_PROTECTS_NONBLOCKING) CALL MPI_F_sync_reg(rcv_buf)

    if (.NOT.MPI_ASYNC_PROTECTS_NONBLOCKING) CALL MPI_F_sync_reg(snd_buf)

    snd_buf = rcv_buf
    sum = sum + rcv_buf
end do

print *, 'PE', rank, ': Sum =', sum

call MPI_Win_free(win, error)

call MPI_Finalize(error)
end

In [None]:
!mpif90 ring.f90 && mpirun -np 4 --allow-run-as-root a.out