## Gather

This provided skeleton first gathers the data into an array at process 0 and then process 0 prints the whole array. 

In the below skeleton you should substitute the point-to-point communication by one call to MPI_Gather, which means you have to remove the whole gathering loop. 

***
#### C skeleton

In [None]:
#include <mpi.h>

In [None]:
%%executable  a.x -- -lmpi

int n; double result;
double *result_array;
int my_rank, num_procs, rank;

MPI_Init(NULL, NULL); 
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &num_procs);

// doing some application work in each process, e.g.:
result = 100.0 + 1.0 * my_rank;
printf("I am process %i out of %i, result = %f \n", my_rank, num_procs, result);

if (my_rank == 0) {
    // memory allocation needed only on root process
    result_array = (double *)malloc(sizeof(double) * num_procs);
}

// start of gathering
if (my_rank != 0) { // sending some results from all processes (except 0) to process 0
    MPI_Send(&result, 1, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD);
} else { // only in "root" process 0
    result_array[0] = result; // process 0's own result
    // receiving all these messages
    for (rank=1; rank<num_procs; rank++)
    { // result of processes 1, 2, ...
        MPI_Recv(&result_array[rank], 1, MPI_DOUBLE, rank, 99, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }
}
// end of gathering

if (my_rank == 0) {
    for (rank=0; rank<num_procs; rank++)
        printf("I'm proc 0: result of process %i is %f \n", rank, result_array[rank]); 
}

MPI_Finalize();

Now compile it and run it with 4 processes. 

In [None]:
!mpirun -np 4 a.x

***
#### Python skeleton

In [None]:
%%file gather.py
from mpi4py import MPI

comm = MPI.COMM_WORLD
my_rank = comm.Get_rank()
num_procs = comm.Get_size()
    
# doing some application work in each process, e.g.:
result = 100.0 + 1.0 * my_rank
print("I am process %i out of %i, result = %f" % (my_rank, num_procs, result))

if my_rank == 0:
    # memory allocation needed only on root process
    result_array = [None] * num_procs

# start of gathering
if my_rank != 0: # sending some results from all processes (except 0) to process 0
    comm.send(result, dest=0)
if my_rank == 0: # only in "root" process 0
    result_array[0] = result # process 0's own result
    # receiving all these messages
    for rank in range (1,num_procs): # result of processes 1, 2, ...
        result_array[rank] = comm.recv(source=rank)
# end of gathering

if my_rank == 0:
    for rank in range(0,num_procs):
        print("I am proc 0: result of process %i is %f" % (rank, result_array[rank]))

Now compile it and run it with 4 processes. 

In [None]:
!mpirun -np 4 python gather.py

***
#### Fortran skeleton

In [None]:
%%file gather.f90
program gather
use mpi

integer ( kind = 4 ) error
integer :: n
double precision :: result
double precision, allocatable, dimension (:) :: result_array
integer :: my_rank, num_procs, rank

call MPI_Init(error)
call MPI_Comm_rank(MPI_COMM_WORLD, my_rank, error)
call MPI_Comm_size(MPI_COMM_WORLD, num_procs, error)

! doing some application work in each process, e.g.:
result = 100.0 + 1.0 * my_rank
print *, 'I am process', my_rank, 'out of', num_procs, 'result = ', result

if (my_rank == 0) then
    ! memory allocation needed only on root process
    allocate(result_array(0:num_procs-1))
endif

! start of gathering
if (my_rank /= 0) then ! sending some results from all processes (except 0) to process 0
    call MPI_Send(result, 1, MPI_DOUBLE_PRECISION, 0, 99, MPI_COMM_WORLD, error)
else ! only in "root" process 0
    result_array(0) = result ! process 0's own result
    ! receiving all these messages
    do rank = 1, num_procs-1 ! result of processes 1, 2, ...
        call MPI_Recv(result_array(rank), 1, MPI_DOUBLE_PRECISION, rank, 99, MPI_COMM_WORLD, MPI_STATUS_IGNORE, error)
    end do
endif
! end of gathering

if (my_rank == 0) then
    do rank = 0, num_procs-1
        print *, "I'm proc 0: result of process", rank, 'is', result_array(rank)
    end do
endif

call MPI_Finalize(error)
end

Now compile it and run it with 4 processes. 

In [None]:
!mpif90 gather.f90 && mpirun -np 4 a.out

***

### You can compare with our solution:

***
#### C solution

In [None]:
%%executable  a.x -- -lmpi

int n; double result;
double *result_array;
int my_rank, num_procs, rank;

MPI_Init(NULL, NULL); 
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &num_procs);

// doing some application work in each process, e.g.:
result = 100.0 + 1.0 * my_rank;
printf("I am process %i out of %i, result = %f \n", my_rank, num_procs, result);

if (my_rank == 0) {
    // memory allocation needed only on root process
    result_array = (double *)malloc(sizeof(double) * num_procs);
}

// start of gathering
MPI_Gather(&result, 1, MPI_DOUBLE, result_array, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// end of gathering

if (my_rank == 0) {
    for (rank=0; rank<num_procs; rank++)
        printf("I'm proc 0: result of process %i is %f \n", rank, result_array[rank]); 
}

MPI_Finalize();

In [None]:
!mpirun -np 4 a.x

***
#### Python solution

In [None]:
%%file gather.py
from mpi4py import MPI

comm = MPI.COMM_WORLD
my_rank = comm.Get_rank()
num_procs = comm.Get_size()
    
# doing some application work in each process, e.g.:
result = 100.0 + 1.0 * my_rank
print("I am process %i out of %i, result = %f" % (my_rank, num_procs, result))

if my_rank == 0:
    # memory allocation needed only on root process
    result_array = [None] * num_procs
    
# start of gathering
result_array = comm.gather(result, root=0)
# end of gathering

if my_rank == 0:
    for rank in range(0,num_procs):
        print("I am proc 0: result of process %i is %f" % (rank, result_array[rank]))

In [None]:
!mpirun -np 4 python gather.py

***
#### Fortran solution

In [None]:
%%file gather.f90
program gather
use mpi

integer ( kind = 4 ) error
integer :: n
double precision :: result
double precision, allocatable, dimension (:) :: result_array
integer :: my_rank, num_procs, rank

call MPI_Init(error)
call MPI_Comm_rank(MPI_COMM_WORLD, my_rank, error)
call MPI_Comm_size(MPI_COMM_WORLD, num_procs, error)

! doing some application work in each process, e.g.:
result = 100.0 + 1.0 * my_rank
print *, 'I am process', my_rank, 'out of', num_procs, 'result = ', result

if (my_rank == 0) then
    ! memory allocation needed only on root process
    allocate(result_array(0:num_procs-1))
endif

! start of gathering
call MPI_Gather(result, 1, MPI_DOUBLE_PRECISION, result_array, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, error)
! end of gathering

if (my_rank == 0) then
    do rank = 0, num_procs-1
        print *, "I'm proc 0: result of process", rank, 'is', result_array(rank)
    end do
endif

call MPI_Finalize(error)
end

In [None]:
!mpif90 gather.f90 && mpirun -np 4 a.out