Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preparation of test case, flag, and consistency for merge openacc #12

Merged
merged 27 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
d9e86e4
benchmarks-cores for OMP consistency
chaseshyu Oct 20, 2023
673a3af
Add .cfg for benchmarks-cores.
chaseshyu Oct 20, 2023
442e73a
add test cases of openmp
chaseshyu Oct 20, 2023
caa922d
fix bug in omp compile
chaseshyu Oct 20, 2023
5e08e0d
add vscode in gitignore
chaseshyu Oct 20, 2023
939af35
Formal test case for following openacc merge
chaseshyu Oct 20, 2023
aa11887
fix omp lib error when load nvhpc module
chaseshyu Oct 20, 2023
beaf305
add flags for openacc
chaseshyu Oct 20, 2023
67ee5aa
fix bug when switch compiler g++ and nvc++
chaseshyu Oct 20, 2023
34ab9b0
include two versions of matprops for openmp and openacc
chaseshyu Oct 21, 2023
03e17f5
two versions of matprops for openmp and openacc
chaseshyu Oct 21, 2023
0a1da1d
remove user boost path
chaseshyu Oct 21, 2023
47384ec
declare tmp arrary for two stage element calculation
chaseshyu Oct 21, 2023
f84754a
two stage for compute_metric_field
chaseshyu Oct 21, 2023
f9a265d
two stage for update_force
chaseshyu Oct 21, 2023
70b14b3
two stage for compute_mass
chaseshyu Oct 21, 2023
c57dccb
two stage for compute_dvoldt
chaseshyu Oct 21, 2023
975765d
two stage for NMD_stress
chaseshyu Oct 21, 2023
5ff15d6
two stage for update_temperature
chaseshyu Oct 21, 2023
cd7a10f
free compute_shape_fn from egroup
chaseshyu Oct 21, 2023
f2ec6de
remove egroup
chaseshyu Oct 21, 2023
4c14b11
Two stage element-node calculation for multicore consistency
chaseshyu Oct 21, 2023
6e99133
fix bug of omp share variable
chaseshyu Oct 21, 2023
1691ffa
sum and print total remesh, output, execute time
chaseshyu Oct 21, 2023
df5a698
remove const signs of int and double for omp loop
chaseshyu Oct 21, 2023
24eb501
Fix bug of mmg
chaseshyu Oct 21, 2023
b071cc6
fix bug in mmg remeshing omp shared const
chaseshyu Oct 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,7 @@ libadaptivity/Makefile
libadaptivity/*/Makefile
confdefs.h
libadaptivity/include
tetgen/tetgen
triangle/triangle
.vscode
*.code-workspace
53 changes: 46 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
##
## ndims = 3: 3D code; 2: 2D code
## opt = 1 ~ 3: optimized build; others: debugging build
## openacc = 1: enable OpenACC
## openmp = 1: enable OpenMP
## useadapt = 1: use libadaptivity for mesh optimization during remeshing
## adaptive_time_step = 1: use adaptive time stepping technique
Expand All @@ -17,6 +18,7 @@

ndims = 3
opt = 2
openacc = 0
openmp = 1
useadapt = 0
usemmg = 0
Expand Down Expand Up @@ -48,10 +50,22 @@ ifeq ($(useadapt), 1)
#LIB_MPIFORTRAN = -lmpi_mpifh # OpenMPI 1.10.2. Other possibilities: -lmpifort, -lfmpich, -lmpi_f77
LIB_MPIFORTRAN = -lfmpich # OpenMPI 1.10.2. Other possibilities: -lmpifort, -lfmpich, -lmpi_f77
else
CXX = g++
ifeq ($(openacc), 1)
CXX = nvc++
openmp = 0 # force no openmp when using openacc
else
ifeq ($(nprof), 1)
CXX = nvc++
else
CXX = g++
endif
endif
CXX_BACKEND = ${CXX}
endif

## path to cuda's base directory
CUDA_DIR = # /cluster/nvidia/hpc_sdk/Linux_x86_64/21.2/cuda

## path to Boost's base directory, if not in standard system location
BOOST_ROOT_DIR =

Expand Down Expand Up @@ -117,6 +131,7 @@ endif
ifneq (, $(findstring g++, $(CXX_BACKEND))) # if using any version of g++
CXXFLAGS = -g -std=c++0x
LDFLAGS = -lm
TETGENFLAG = -Wno-unused-but-set-variable -Wno-int-to-pointer-cast

ifeq ($(opt), 1)
CXXFLAGS += -O1
Expand All @@ -129,8 +144,8 @@ ifneq (, $(findstring g++, $(CXX_BACKEND))) # if using any version of g++
endif

ifeq ($(openmp), 1)
CXXFLAGS += -fopenmp -DUSE_OMP
LDFLAGS += -fopenmp
CXXFLAGS += -fopenmp
LDFLAGS += -fopenmp -Wl,-rpath=/lib64
endif

ifeq ($(useadapt), 1)
Expand All @@ -154,7 +169,7 @@ else ifneq (, $(findstring icpc, $(CXX_BACKEND))) # if using intel compiler, tes
endif

ifeq ($(openmp), 1)
CXXFLAGS += -fopenmp -DUSE_OMP
CXXFLAGS += -fopenmp
LDFLAGS += -fopenmp
endif

Expand All @@ -163,7 +178,31 @@ else ifneq (, $(findstring icpc, $(CXX_BACKEND))) # if using intel compiler, tes
CXXFLAGS += -I$(VTK_INCLUDE)
endif
endif
else ifneq (, $(findstring nvc++, $(CXX)))
CXXFLAGS = -mno-fma -DNVCPP
LDFLAGS =
TETGENFLAGS =

ifeq ($(opt), 1)
CXXFLAGS += -O1
else ifeq ($(opt), 2)
CXXFLAGS += -O2
endif

ifeq ($(openacc), 1)
CXXFLAGS += -acc=gpu -gpu=managed,nofma -Mcuda -DACC
LDFLAGS += -acc=gpu -gpu=managed -Mcuda
endif

ifeq ($(openmp), 1)
CXXFLAGS += -fopenmp
LDFLAGS += -fopenmp
endif

ifeq ($(nprof), 1)
CXXFLAGS += -Minfo=mp,accel -I$(CUDA_DIR)/include -DUSE_NPROF
LDFLAGS += -L$(CUDA_DIR)/lib64 -Wl,-rpath,$(CUDA_DIR)/lib64 -lnvToolsExt -g
endif
else
# the only way to display the error message in Makefile ...
all:
Expand Down Expand Up @@ -377,13 +416,13 @@ tetgen/predicates.o: tetgen/predicates.cxx $(TET_INCS)
$(CXX) $(CXXFLAGS) -DTETLIBRARY -O0 -c $< -o $@

tetgen/tetgen.o: tetgen/tetgen.cxx $(TET_INCS)
$(CXX) $(CXXFLAGS) -DNDEBUG -DTETLIBRARY -Wno-unused-but-set-variable -Wno-int-to-pointer-cast -c $< -o $@
$(CXX) $(CXXFLAGS) -DNDEBUG -DTETLIBRARY $(TETGENFLAG) -c $< -o $@

tetgen/tetgen: tetgen/predicates.cxx tetgen/tetgen.cxx
$(CXX) $(CXXFLAGS) -O0 -DNDEBUG -Wno-unused-but-set-variable -Wno-int-to-pointer-cast tetgen/predicates.cxx tetgen/tetgen.cxx -o $@
$(CXX) $(CXXFLAGS) -O0 -DNDEBUG $(TETGENFLAG) tetgen/predicates.cxx tetgen/tetgen.cxx -o $@

$(C3X3_DIR)/lib$(C3X3_LIBNAME).a:
@+$(MAKE) -C $(C3X3_DIR)
@+$(MAKE) -C $(C3X3_DIR) openacc=$(openacc) CUDA_DIR=$(CUDA_DIR)

$(ANN_DIR)/lib/lib$(ANN_LIBNAME).a:
@+$(MAKE) -C $(ANN_DIR) linux-g++
Expand Down
79 changes: 79 additions & 0 deletions benchmarks-cores/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@

CASE := test-3d-tiny.cfg
#CASE := test-tiny-long.cfg
#CASE := test-big.cfg
FRAME := 4
OMP := 4
TFLAG = # time -v
DEBUG := 0
NSYS := 0
VTK := 0
NDIMS := 3
EXE := ../dynearthsol$(NDIMS)d
FUNC := compute_mass

GITID = $(shell git rev-parse HEAD | cut -c1-7)

ifeq ($(DEBUG), 1)
DEBUGFLAG = valgrind --tool=helgrind --log-file=valgrind-out.txt -v
endif
ifeq ($(NSYS), 1)
DEBUGFLAG = nsys profile --trace=cuda,nvtx,osrt,openmp --output=benchmark.${GITID}.${CASE}.${OMP}
# NSYSSUM = nsys stats -o benchmark.${GITID}.${CASE}.${OMP}.csv benchmark.${GITID}.${CASE}.${OMP}.qdrep
endif
ifeq ($(VTK), 1)
VTKFLAG = ${MAKE} vtk
endif


# Use single cpu for omp to avoid round-off errors
RUN_TEST = rm -f benchmark.*; OMP_NUM_THREADS=${OMP} ${TFLAG} ${DEBUGFLAG} ${EXE} ${CASE}
RESTART_TEST = OMP_NUM_THREADS=${OMP} ${TFLAG} ${DEBUGFLAG} ${EXE} ${CASE}

ORIG = orig-${CASE}
CURR = ~/data/jobs/${CASE}

all: cmp

auto:
${MAKE} CASE=test-big.cfg OMP=4
${MAKE} CASE=test-topo.cfg OMP=4
${MAKE} CASE=test-model.cfg OMP=4

set:
# run test, store test output to old/
${RUN_TEST}
${MAKE} store

cmp:
${RUN_TEST}
${NSYSSUM} > /dev/null
${VTKFLAG}
python3 compare.py ${ORIG} ${FRAME}
cp benchmark.info benchmark.${GITID}.${CASE}.${OMP}.info

restart:
${RESTART_TEST}
${NSYSSUM} > /dev/null
${VTKFLAG}
python3 compare.py ${ORIG} ${FRAME}
cp benchmark.info benchmark.${GITID}.${CASE}.${OMP}.info

store:
rm -rf ${ORIG}
mkdir -p ${ORIG}
mv benchmark.* ${ORIG}
cp ../snapshot.diff ${ORIG}

setjob:
# st job to compare
rm -rf orig-job
ln -s ~/data/jobs/${CASE} orig-job

cmpjob:
python3 compare.py orig-job ${CURR} ${FRAME}

vtk:
python ../2vtk.py -m benchmark

.PHONY: all set cmp store
188 changes: 188 additions & 0 deletions benchmarks-cores/compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#!/usr/bin/python3
from __future__ import print_function
import sys, os
import numpy as np
sys.path.append('../')
from Dynearthsol import Dynearthsol

def first_invariant(t):
nstr = t.shape[1]
ndims = 2 if (nstr == 3) else 3
return np.sum(t[:,:ndims], axis=1) / ndims


def second_invariant(t):
'''The second invariant of the deviatoric part of a symmetric tensor t,
where t[:,0:ndims] are the diagonal components;
and t[:,ndims:] are the off-diagonal components.'''
nstr = t.shape[1]

# second invariant: sqrt(0.5 * t_ij**2)
if nstr == 3: # 2D
return np.sqrt(0.25 * (t[:,0] - t[:,1])**2 + t[:,2]**2)
else: # 3D
a = (t[:,0] + t[:,1] + t[:,2]) / 3
return np.sqrt( 0.5 * ((t[:,0] - a)**2 + (t[:,1] - a)**2 + (t[:,2] - a)**2) +
t[:,3]**2 + t[:,4]**2 + t[:,5]**2)


class Stuff():
pass


def read_data(des, frame):
stuff = Stuff()

stuff.T = des.read_field(frame,'temperature')
coordinate = des.read_field(frame, 'coordinate')
stuff.x = np.array(coordinate[:,0])
stuff.z = np.array(coordinate[:,-1])
velocity = des.read_field(frame, 'velocity')
stuff.vx = np.array(velocity[:,0])
stuff.vz = np.array(velocity[:,-1])
stuff.pls = des.read_field(frame,'plastic strain')

stress = des.read_field(frame, 'stress')
stuff.tI = first_invariant(stress)
stuff.tII = second_invariant(stress)
strain = des.read_field(frame, 'strain')
stuff.sI = first_invariant(strain)
stuff.sII = second_invariant(strain)
strain_rate = des.read_field(frame, 'strain-rate')
stuff.srI = first_invariant(strain_rate)
stuff.srII = second_invariant(strain_rate)

stuff.visc = des.read_field(frame, 'viscosity')

marker_data = des.read_markers(frame, markersetname)
field = marker_data[markersetname + '.coord']
stuff.m_x = field[:,0]
stuff.m_z = field[:,1]
stuff.m_id = marker_data[markersetname + '.id']
stuff.m_mat = marker_data[markersetname + '.mattype']
# stuff.m_time = marker_data[markersetname + '.time']
return stuff


def reldiff(oldf, newf):
m = np.abs(oldf).max()
diff = np.abs(newf - oldf)
if m == 0.:
return diff.max(), diff.std()
else:
return diff.max()/m, diff.std()/m


def show_msg(kind,max,sigma):
if max+sigma > 1.e-8:
print(' %s:\t\t%.3e %.3e (> 1.e-8)'%(kind,max, sigma))
inc = 1
else:
print(' %s:\t\t%.3e %.3e'%(kind,max, sigma))
inc = 0
return inc


def compare(old, new):
inc = 0

max, sigma = reldiff(old.T, new.T)
inc += show_msg('Temperature',max,sigma)

max, sigma = reldiff(old.x, new.x)
inc += show_msg('X coordinate',max,sigma)

max, sigma = reldiff(old.z, new.z)
inc += show_msg('Z coordinate',max,sigma)

max, sigma = reldiff(old.vx, new.vx)
inc += show_msg('X velocity',max,sigma)

max, sigma = reldiff(old.vz, new.vz)
inc += show_msg('Z velocity',max,sigma)

max, sigma = reldiff(old.pls, new.pls)
inc += show_msg('Pl. strain',max,sigma)

max, sigma = reldiff(old.tI, new.tI)
inc += show_msg('Stress I',max,sigma)

max, sigma = reldiff(old.tII, new.tII)
inc += show_msg('Stress II',max,sigma)

max, sigma = reldiff(old.sI, new.sI)
inc += show_msg('Strain I',max,sigma)

max, sigma = reldiff(old.sII, new.sII)
inc += show_msg('Strain II',max,sigma)

max, sigma = reldiff(old.srI, new.srI)
inc += show_msg('S. rate I',max,sigma)

max, sigma = reldiff(old.srII, new.srII)
inc += show_msg('S. rate II',max,sigma)

max, sigma = reldiff(old.visc, new.visc)
inc += show_msg('Viscosity',max,sigma)

max, sigma = reldiff(old.m_x, new.m_x)
inc += show_msg('Marker X',max,sigma)

max, sigma = reldiff(old.m_z, new.m_z)
inc += show_msg('Marker Z',max,sigma)

max, sigma = reldiff(old.m_mat, new.m_mat)
inc += show_msg('Marker Mat',max,sigma)

# max, sigma = reldiff(old.m_time, new.m_time)
# inc += show_msg('Marker Time',max,sigma)

return inc


olddir = sys.argv[1]
curdir = os.getcwd()

if len(sys.argv) > 3:
frame = int(sys.argv[3])
newdir = sys.argv[2]
modelname = 'result'
else:
frame = int(sys.argv[2])
newdir = curdir
modelname = 'benchmark'

# name holder
old = 0
new = 0

markersetname = 'markerset'

try:
# read old and new results

os.chdir(olddir)
des = Dynearthsol(modelname)
old = read_data(des, frame)

os.chdir(newdir)
des = Dynearthsol(modelname)
new = read_data(des, frame)

# compare results
print()
print('Relative difference (max, stddev) of frame =', frame,
' step =', int(des.steps[frame]))
print(' ---')
inc = compare(old, new)
print('')
if inc == 0:
print(' Status: Normal round-off error~')
else:
print(' Status: !!!!!!!!!! SOMETHING WRONG !!!!!!!!!!')
print(' ---')


finally:
# restort to original directory
os.chdir(curdir)