diff --git a/cscs-checks/mch/src/Makefile b/cscs-checks/mch/src/Makefile index 5de7f6f0be..e8bd695e03 100644 --- a/cscs-checks/mch/src/Makefile +++ b/cscs-checks/mch/src/Makefile @@ -6,7 +6,7 @@ LD = $(FC) OBJS = compute_cuda.o openacc_cuda_mpi_cppstd.o std_cpp_call.o # OBJ2 = $(subst _,$(PE_ENV)_,$(OBJ)) -LIB = +LIB = -lcuda -lcudart -lstdc++ .SUFFIXES: .o .cu .cpp .F90 diff --git a/cscs-checks/mch/src/compute_cuda.cu b/cscs-checks/mch/src/compute_cuda.cu index bd77a2d23c..74a58a7fe9 100644 --- a/cscs-checks/mch/src/compute_cuda.cu +++ b/cscs-checks/mch/src/compute_cuda.cu @@ -1,5 +1,6 @@ #include #include "cuda.h" +#include "mpi.h" #define cudaCheckErrors(msg) \ do { \ @@ -35,10 +36,34 @@ void cuda_kernel_no_copy(float* a, float* b, int n) cudaCheckErrors("cuda error"); } +void mpi_hello_world(int comm) +{ + MPI_Init(NULL, NULL); + MPI_Comm comm2; + MPI_Comm_dup(MPI_Comm_f2c(comm), &comm2); + + // Get the rank of the process + int world_size; + MPI_Comm_size(comm2, &world_size); + + int world_rank; + MPI_Comm_rank(comm2, &world_rank); + + // Get the name of the processor + char processor_name[MPI_MAX_PROCESSOR_NAME]; + int name_len; + MPI_Get_processor_name(processor_name, &name_len); + + // Print off a hello world message + printf("Hallo world from processor %s, rank %d out of %d processors\n", + processor_name, world_rank, world_size); + +} void cuda_kernel_with_copy(float* a, float* b, int n) { const int THREADS_PER_BLOCK = 1; const int NUMBER_OF_BLOCKS = 10; + float* d_a; float* d_b; diff --git a/cscs-checks/mch/src/openacc_cuda_mpi_cppstd.F90 b/cscs-checks/mch/src/openacc_cuda_mpi_cppstd.F90 index 4568b9f4f5..2e81680440 100644 --- a/cscs-checks/mch/src/openacc_cuda_mpi_cppstd.F90 +++ b/cscs-checks/mch/src/openacc_cuda_mpi_cppstd.F90 @@ -35,7 +35,6 @@ program openacc_cuda_mpi_cppstd !$acc end host_data !$acc end data - if(mpi_rank == 0) then ! Allocate and initialize arrays on the GPU @@ -75,9 +74,9 @@ program openacc_cuda_mpi_cppstd if (sum(f1) /= EXPECTED_CUDA_SUM) then write (*,*) "Result : FAIL" write (*,*) "Expected value sum(f1): ", EXPECTED_CUDA_SUM, "actual value:", sum(f1) - else if (sum(f3) /= EXPECTED_CUDA_SUM) then + else if (sum(f3) /= EXPECTED_CPP_STD_SUM) then write (*,*) "Result : FAIL" - write (*,*) "Expected value sum(f3): ", EXPECTED_CUDA_SUM, "actual value:", sum(f3) + write (*,*) "Expected value sum(f3): ", EXPECTED_CPP_STD_SUM, "actual value:", sum(f3) else if (data_sum(1) /= ref_val) then write (*,*) "Result : FAIL" write (*,*) "Expected value data_sum: ", ref_val, "actual value:", data_sum(1) @@ -93,7 +92,6 @@ program openacc_cuda_mpi_cppstd deallocate(f1) deallocate(f2) deallocate(f3) - write (*,*) "Result: OK" end if call MPI_Finalize(ierr);