tan2 · echoi · Oct 23, 2023 · Oct 20, 2023 · Oct 20, 2023 · Oct 20, 2023
diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,7 @@ libadaptivity/Makefile
 libadaptivity/*/Makefile
 confdefs.h
 libadaptivity/include
+tetgen/tetgen
+triangle/triangle
+.vscode
+*.code-workspace
diff --git a/Makefile b/Makefile
@@ -9,6 +9,7 @@
 ##
 ## ndims = 3: 3D code; 2: 2D code
 ## opt = 1 ~ 3: optimized build; others: debugging build
+## openacc = 1: enable OpenACC
 ## openmp = 1: enable OpenMP
 ## useadapt = 1: use libadaptivity for mesh optimization during remeshing
 ## adaptive_time_step = 1: use adaptive time stepping technique
@@ -17,6 +18,7 @@
 
 ndims = 3
 opt = 2
+openacc = 0
 openmp = 1
 useadapt = 0
 usemmg = 0
@@ -48,10 +50,22 @@ ifeq ($(useadapt), 1)
 	#LIB_MPIFORTRAN = -lmpi_mpifh # OpenMPI 1.10.2. Other possibilities: -lmpifort, -lfmpich, -lmpi_f77
 	LIB_MPIFORTRAN = -lfmpich # OpenMPI 1.10.2. Other possibilities: -lmpifort, -lfmpich, -lmpi_f77
 else
-	CXX = g++
+	ifeq ($(openacc), 1)
+		CXX = nvc++
+		openmp = 0 # force no openmp when using openacc
+	else
+		ifeq ($(nprof), 1)
+			CXX = nvc++
+		else
+			CXX = g++
+		endif
+	endif
 	CXX_BACKEND = ${CXX}
 endif
 
+## path to cuda's base directory
+CUDA_DIR = # /cluster/nvidia/hpc_sdk/Linux_x86_64/21.2/cuda
+
 ## path to Boost's base directory, if not in standard system location
 BOOST_ROOT_DIR =
 
@@ -117,6 +131,7 @@ endif
 ifneq (, $(findstring g++, $(CXX_BACKEND))) # if using any version of g++
 	CXXFLAGS = -g -std=c++0x
 	LDFLAGS = -lm
+	TETGENFLAG = -Wno-unused-but-set-variable -Wno-int-to-pointer-cast
 
 	ifeq ($(opt), 1)
 		CXXFLAGS += -O1
@@ -129,8 +144,8 @@ ifneq (, $(findstring g++, $(CXX_BACKEND))) # if using any version of g++
 	endif
 
 	ifeq ($(openmp), 1)
-		CXXFLAGS += -fopenmp -DUSE_OMP
-		LDFLAGS += -fopenmp
+		CXXFLAGS += -fopenmp
+		LDFLAGS += -fopenmp -Wl,-rpath=/lib64
 	endif
 
 	ifeq ($(useadapt), 1)
@@ -154,7 +169,7 @@ else ifneq (, $(findstring icpc, $(CXX_BACKEND))) # if using intel compiler, tes
 	endif
 
 	ifeq ($(openmp), 1)
-		CXXFLAGS += -fopenmp -DUSE_OMP
+		CXXFLAGS += -fopenmp
 		LDFLAGS += -fopenmp
 	endif
 
@@ -163,7 +178,31 @@ else ifneq (, $(findstring icpc, $(CXX_BACKEND))) # if using intel compiler, tes
 			CXXFLAGS += -I$(VTK_INCLUDE)
 		endif
 	endif
+else ifneq (, $(findstring nvc++, $(CXX)))
+	CXXFLAGS = -mno-fma -DNVCPP
+	LDFLAGS =
+	TETGENFLAGS = 
+
+	ifeq ($(opt), 1)
+		CXXFLAGS += -O1
+	else ifeq ($(opt), 2)
+		CXXFLAGS += -O2
+	endif
 
+	ifeq ($(openacc), 1)
+		CXXFLAGS += -acc=gpu -gpu=managed,nofma -Mcuda -DACC
+		LDFLAGS += -acc=gpu -gpu=managed -Mcuda
+	endif
+
+	ifeq ($(openmp), 1)
+		CXXFLAGS += -fopenmp
+		LDFLAGS += -fopenmp
+	endif
+
+	ifeq ($(nprof), 1)
+		CXXFLAGS += -Minfo=mp,accel -I$(CUDA_DIR)/include -DUSE_NPROF
+		LDFLAGS += -L$(CUDA_DIR)/lib64 -Wl,-rpath,$(CUDA_DIR)/lib64 -lnvToolsExt -g
+	endif
 else
 # the only way to display the error message in Makefile ...
 all:
@@ -377,13 +416,13 @@ tetgen/predicates.o: tetgen/predicates.cxx $(TET_INCS)
 	$(CXX) $(CXXFLAGS) -DTETLIBRARY -O0 -c $< -o $@
 
 tetgen/tetgen.o: tetgen/tetgen.cxx $(TET_INCS)
-	$(CXX) $(CXXFLAGS) -DNDEBUG -DTETLIBRARY -Wno-unused-but-set-variable -Wno-int-to-pointer-cast -c $< -o $@
+	$(CXX) $(CXXFLAGS) -DNDEBUG -DTETLIBRARY $(TETGENFLAG) -c $< -o $@
 
 tetgen/tetgen: tetgen/predicates.cxx tetgen/tetgen.cxx
-	$(CXX) $(CXXFLAGS) -O0 -DNDEBUG -Wno-unused-but-set-variable -Wno-int-to-pointer-cast tetgen/predicates.cxx tetgen/tetgen.cxx -o $@
+	$(CXX) $(CXXFLAGS) -O0 -DNDEBUG $(TETGENFLAG) tetgen/predicates.cxx tetgen/tetgen.cxx -o $@
 
 $(C3X3_DIR)/lib$(C3X3_LIBNAME).a:
-	@+$(MAKE) -C $(C3X3_DIR)
+	@+$(MAKE) -C $(C3X3_DIR) openacc=$(openacc) CUDA_DIR=$(CUDA_DIR)
 
 $(ANN_DIR)/lib/lib$(ANN_LIBNAME).a:
 	@+$(MAKE) -C $(ANN_DIR) linux-g++

diff --git a/benchmarks-cores/Makefile b/benchmarks-cores/Makefile
@@ -0,0 +1,79 @@
+
+CASE := test-3d-tiny.cfg
+#CASE := test-tiny-long.cfg
+#CASE := test-big.cfg
+FRAME := 4
+OMP := 4
+TFLAG = # time -v
+DEBUG := 0
+NSYS := 0
+VTK := 0
+NDIMS := 3
+EXE := ../dynearthsol$(NDIMS)d
+FUNC := compute_mass
+
+GITID = $(shell git rev-parse HEAD | cut -c1-7)
+
+ifeq ($(DEBUG), 1)
+	DEBUGFLAG = valgrind --tool=helgrind --log-file=valgrind-out.txt -v
+endif
+ifeq ($(NSYS), 1)
+	DEBUGFLAG = nsys profile --trace=cuda,nvtx,osrt,openmp --output=benchmark.${GITID}.${CASE}.${OMP}
+#	NSYSSUM = nsys stats -o benchmark.${GITID}.${CASE}.${OMP}.csv benchmark.${GITID}.${CASE}.${OMP}.qdrep 
+endif
+ifeq ($(VTK), 1)
+	VTKFLAG = ${MAKE} vtk
+endif
+
+
+# Use single cpu for omp to avoid round-off errors
+RUN_TEST = rm -f benchmark.*; OMP_NUM_THREADS=${OMP} ${TFLAG} ${DEBUGFLAG} ${EXE} ${CASE}
+RESTART_TEST = OMP_NUM_THREADS=${OMP} ${TFLAG} ${DEBUGFLAG} ${EXE} ${CASE}
+
+ORIG = orig-${CASE}
+CURR = ~/data/jobs/${CASE}
+
+all: cmp
+
+auto:
+	${MAKE} CASE=test-big.cfg OMP=4
+	${MAKE} CASE=test-topo.cfg OMP=4
+	${MAKE} CASE=test-model.cfg OMP=4
+
+set:
+	# run test, store test output to old/
+	${RUN_TEST}
+	${MAKE} store
+
+cmp:
+	${RUN_TEST}
+	${NSYSSUM} > /dev/null
+	${VTKFLAG}
+	python3 compare.py ${ORIG} ${FRAME}
+	cp benchmark.info benchmark.${GITID}.${CASE}.${OMP}.info
+
+restart:
+	${RESTART_TEST}
+	${NSYSSUM} > /dev/null
+	${VTKFLAG}
+	python3 compare.py ${ORIG} ${FRAME}
+	cp benchmark.info benchmark.${GITID}.${CASE}.${OMP}.info
+
+store:
+	rm -rf ${ORIG}
+	mkdir -p ${ORIG}
+	mv benchmark.* ${ORIG}
+	cp ../snapshot.diff ${ORIG}
+
+setjob:
+	# st job to compare
+	rm -rf orig-job
+	ln -s ~/data/jobs/${CASE} orig-job
+
+cmpjob:
+	python3 compare.py orig-job ${CURR} ${FRAME}
+
+vtk:
+	python ../2vtk.py -m  benchmark
+
+.PHONY: all set cmp store
diff --git a/benchmarks-cores/compare.py b/benchmarks-cores/compare.py
@@ -0,0 +1,188 @@
+#!/usr/bin/python3
+from __future__ import print_function
+import sys, os
+import numpy as np
+sys.path.append('../')
+from Dynearthsol import Dynearthsol
+
+def first_invariant(t):
+    nstr = t.shape[1]
+    ndims = 2 if (nstr == 3) else 3
+    return np.sum(t[:,:ndims], axis=1) / ndims
+
+
+def second_invariant(t):
+    '''The second invariant of the deviatoric part of a symmetric tensor t,
+    where t[:,0:ndims] are the diagonal components;
+      and t[:,ndims:] are the off-diagonal components.'''
+    nstr = t.shape[1]
+
+    # second invariant: sqrt(0.5 * t_ij**2)
+    if nstr == 3:  # 2D
+        return np.sqrt(0.25 * (t[:,0] - t[:,1])**2 + t[:,2]**2)
+    else:  # 3D
+        a = (t[:,0] + t[:,1] + t[:,2]) / 3
+        return np.sqrt( 0.5 * ((t[:,0] - a)**2 + (t[:,1] - a)**2 + (t[:,2] - a)**2) +
+                        t[:,3]**2 + t[:,4]**2 + t[:,5]**2)
+
+
+class Stuff():
+    pass
+
+
+def read_data(des, frame):
+    stuff = Stuff()
+
+    stuff.T = des.read_field(frame,'temperature')
+    coordinate = des.read_field(frame, 'coordinate')
+    stuff.x = np.array(coordinate[:,0])
+    stuff.z = np.array(coordinate[:,-1])
+    velocity = des.read_field(frame, 'velocity')
+    stuff.vx = np.array(velocity[:,0])
+    stuff.vz = np.array(velocity[:,-1])
+    stuff.pls = des.read_field(frame,'plastic strain')
+
+    stress = des.read_field(frame, 'stress')
+    stuff.tI = first_invariant(stress)
+    stuff.tII = second_invariant(stress)
+    strain = des.read_field(frame, 'strain')
+    stuff.sI = first_invariant(strain)
+    stuff.sII = second_invariant(strain)
+    strain_rate = des.read_field(frame, 'strain-rate')
+    stuff.srI = first_invariant(strain_rate)
+    stuff.srII = second_invariant(strain_rate)
+
+    stuff.visc = des.read_field(frame, 'viscosity')
+
+    marker_data = des.read_markers(frame, markersetname)
+    field = marker_data[markersetname + '.coord']
+    stuff.m_x = field[:,0]
+    stuff.m_z = field[:,1]
+    stuff.m_id = marker_data[markersetname + '.id']
+    stuff.m_mat = marker_data[markersetname + '.mattype']
+    # stuff.m_time = marker_data[markersetname + '.time']
+    return stuff
+
+
+def reldiff(oldf, newf):
+    m = np.abs(oldf).max()
+    diff = np.abs(newf - oldf)
+    if m == 0.:
+        return diff.max(), diff.std()
+    else:
+        return diff.max()/m, diff.std()/m
+
+
+def show_msg(kind,max,sigma):
+    if max+sigma > 1.e-8:
+        print('  %s:\t\t%.3e %.3e (> 1.e-8)'%(kind,max, sigma))
+        inc = 1
+    else:
+        print('  %s:\t\t%.3e %.3e'%(kind,max, sigma))
+        inc = 0
+    return inc
+
+
+def compare(old, new):
+    inc = 0
+
+    max, sigma = reldiff(old.T, new.T)
+    inc += show_msg('Temperature',max,sigma)
+
+    max, sigma = reldiff(old.x, new.x)
+    inc += show_msg('X coordinate',max,sigma)
+
+    max, sigma = reldiff(old.z, new.z)
+    inc += show_msg('Z coordinate',max,sigma)
+
+    max, sigma = reldiff(old.vx, new.vx)
+    inc += show_msg('X velocity',max,sigma)
+
+    max, sigma = reldiff(old.vz, new.vz)
+    inc += show_msg('Z velocity',max,sigma)
+
+    max, sigma = reldiff(old.pls, new.pls)
+    inc += show_msg('Pl. strain',max,sigma)
+
+    max, sigma = reldiff(old.tI, new.tI)
+    inc += show_msg('Stress I',max,sigma)
+
+    max, sigma = reldiff(old.tII, new.tII)
+    inc += show_msg('Stress II',max,sigma)
+
+    max, sigma = reldiff(old.sI, new.sI)
+    inc += show_msg('Strain I',max,sigma)
+
+    max, sigma = reldiff(old.sII, new.sII)
+    inc += show_msg('Strain II',max,sigma)
+
+    max, sigma = reldiff(old.srI, new.srI)
+    inc += show_msg('S. rate I',max,sigma)
+
+    max, sigma = reldiff(old.srII, new.srII)
+    inc += show_msg('S. rate II',max,sigma)
+
+    max, sigma = reldiff(old.visc, new.visc)
+    inc += show_msg('Viscosity',max,sigma)
+
+    max, sigma = reldiff(old.m_x, new.m_x)
+    inc += show_msg('Marker X',max,sigma)
+
+    max, sigma = reldiff(old.m_z, new.m_z)
+    inc += show_msg('Marker Z',max,sigma)
+
+    max, sigma = reldiff(old.m_mat, new.m_mat)
+    inc += show_msg('Marker Mat',max,sigma)
+
+    # max, sigma = reldiff(old.m_time, new.m_time)
+    # inc += show_msg('Marker Time',max,sigma)
+
+    return inc
+
+
+olddir = sys.argv[1]
+curdir = os.getcwd()
+
+if len(sys.argv) > 3:
+    frame = int(sys.argv[3])
+    newdir = sys.argv[2]
+    modelname = 'result'
+else:
+    frame = int(sys.argv[2])
+    newdir = curdir
+    modelname = 'benchmark'
+
+# name holder
+old = 0
+new = 0
+
+markersetname = 'markerset'
+
+try:
+    # read old and new results
+
+    os.chdir(olddir)
+    des = Dynearthsol(modelname)
+    old = read_data(des, frame)
+
+    os.chdir(newdir)
+    des = Dynearthsol(modelname)
+    new = read_data(des, frame)
+
+    # compare results
+    print()
+    print('Relative difference (max, stddev) of frame =', frame,
+          ' step =', int(des.steps[frame]))
+    print('  ---')
+    inc = compare(old, new)
+    print('')
+    if inc == 0:
+        print('  Status: Normal round-off error~')
+    else:
+        print('  Status: !!!!!!!!!! SOMETHING WRONG !!!!!!!!!!')
+    print('  ---')
+
+
+finally:
+    # restort to original directory
+    os.chdir(curdir)