forked from mrnorman/miniWeather
-
Notifications
You must be signed in to change notification settings - Fork 0
/
simple_yakl_tests.cpp
67 lines (53 loc) · 2.04 KB
/
simple_yakl_tests.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
//////////////////////////////////////////////////////////////////////////////////////////
// simple yakl tests
// Author: Mark Petersen
//////////////////////////////////////////////////////////////////////////////////////////
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
#include <iostream>
#include "const.h" // This contains the yakl definitions
typedef yakl::Array<real ,3,yakl::memDevice> real3d;
typedef yakl::Array<real ,3,yakl::memHost> real3dHost;
int main() {
yakl::init();
int nx = 3;
int ny = 3;
int nz = 3;
real3d workArray ( "workArray" , nx,ny,nz ); // work array on the device (gpu)
real3dHost workArray_cpu( "workArray" , nx,ny,nz ); // work array on the host (cpu)
// initialize array on device
// Note that the first argument is option, may be a string or
// parallel_for( YAKL_AUTO_LABEL(), ...
// Then the loops are labeled in the nvidia diagnostics output here with:
// srun -n 1 -G4 nsys nvprof ./simple_yakl_tests // on perlmutter
// jsrun -n 1 -a 1 -c 1 -g 1 nvprof ./simple_yakl_tests // on summit
parallel_for( "init array 1", SimpleBounds<3>(nx,ny,nz) , YAKL_LAMBDA (int i, int j, int k) {
workArray(i,j,k) = i*100.0 + j*10.0 + k;
});
yakl::fence();
std::cout << "workArray before copy" << std::endl;
std::cout << workArray << std::endl;
std::cout << "workArray_cpu before copy" << std::endl;
std::cout << workArray_cpu << std::endl;
// Copy from GPU to host
workArray.deep_copy_to(workArray_cpu);
yakl::fence();
std::cout << "workArray_cpu after copy" << std::endl;
std::cout << workArray_cpu << std::endl;
// alter array on cpu
for (int i=0; i<nx; i++) {
for (int j=0; j<ny; j++) {
for (int k=0; k<nz; k++) {
workArray_cpu(i,j,k) += 0.4;
}
}
}
// Copy from host to GPU
workArray_cpu.deep_copy_to(workArray);
yakl::fence();
std::cout << "workArray after copy back" << std::endl;
std::cout << workArray << std::endl;
std::cout << "workArray_cpu after copy back" << std::endl;
std::cout << workArray_cpu << std::endl;
}