Skip to content

Commit

Permalink
Merge branch 'master' into build-updates-intel-oneapi.
Browse files Browse the repository at this point in the history
  • Loading branch information
ohearnk committed Mar 21, 2024
2 parents ea2bc93 + 45db608 commit 38bbe8d
Show file tree
Hide file tree
Showing 125 changed files with 24,888 additions and 24,818 deletions.
6 changes: 3 additions & 3 deletions src/cuda/gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ __device__ __forceinline__ void hrrwholegrad2(QUICKDouble* Yaax, QUICKDouble* Ya
QUICKDouble RAx,QUICKDouble RAy,QUICKDouble RAz, \
QUICKDouble RBx,QUICKDouble RBy,QUICKDouble RBz, \
QUICKDouble RCx,QUICKDouble RCy,QUICKDouble RCz, \
QUICKDouble RDx,QUICKDouble RDy,QUICKDouble RDz, bool bprint);
QUICKDouble RDx,QUICKDouble RDy,QUICKDouble RDz);

__device__ __forceinline__ void hrrwholegrad2_1(QUICKDouble* Yaax, QUICKDouble* Yaay, QUICKDouble* Yaaz, \
QUICKDouble* Ybbx, QUICKDouble* Ybby, QUICKDouble* Ybbz, \
Expand All @@ -427,7 +427,7 @@ __device__ __forceinline__ void hrrwholegrad2_1(QUICKDouble* Yaax, QUICKDouble*
QUICKDouble RAx,QUICKDouble RAy,QUICKDouble RAz, \
QUICKDouble RBx,QUICKDouble RBy,QUICKDouble RBz, \
QUICKDouble RCx,QUICKDouble RCy,QUICKDouble RCz, \
QUICKDouble RDx,QUICKDouble RDy,QUICKDouble RDz, bool bprint);
QUICKDouble RDx,QUICKDouble RDy,QUICKDouble RDz);

__device__ __forceinline__ void hrrwholegrad2_2(QUICKDouble* Yaax, QUICKDouble* Yaay, QUICKDouble* Yaaz, \
QUICKDouble* Ybbx, QUICKDouble* Ybby, QUICKDouble* Ybbz, \
Expand All @@ -438,7 +438,7 @@ __device__ __forceinline__ void hrrwholegrad2_2(QUICKDouble* Yaax, QUICKDouble*
QUICKDouble RAx,QUICKDouble RAy,QUICKDouble RAz, \
QUICKDouble RBx,QUICKDouble RBy,QUICKDouble RBz, \
QUICKDouble RCx,QUICKDouble RCy,QUICKDouble RCz, \
QUICKDouble RDx,QUICKDouble RDy,QUICKDouble RDz, bool bprint);
QUICKDouble RDx,QUICKDouble RDy,QUICKDouble RDz);


__device__ __forceinline__ QUICKDouble quick_dsqr(QUICKDouble a);
Expand Down
5 changes: 4 additions & 1 deletion src/cuda/gpu_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,15 @@ fflush(stdout);\
#ifdef CUDA_SPDF
#define STOREDIM_L 84
#define STOREDIM_XL 120
#define MAXPRIM 20
#else
#define MAXPRIM 14
#define STOREDIM_L 84
#define STOREDIM_XL 84
#endif

#define MAXPRIM 20
#define STORE_OPERATOR +=

#define TRANSDIM 8
#define MCALDIM 120

Expand Down
4 changes: 4 additions & 0 deletions src/cuda/gpu_eri_assembler_spdf_1_2.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_vertical_spdf_1_2(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand Down Expand Up @@ -63,3 +65,5 @@ __device__ __inline__ void ERint_vertical_spdf_1_2(const int I, const int J, con
}

}
#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_assembler_spdf_2_2.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_vertical_spdf_2_2(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand Down Expand Up @@ -63,3 +65,6 @@ __device__ __inline__ void ERint_vertical_spdf_2_2(const int I, const int J, con
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_assembler_spdf_3_2.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_vertical_spdf_3_2(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand All @@ -31,3 +33,6 @@ __device__ __inline__ void ERint_vertical_spdf_3_2(const int I, const int J, con
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_assembler_spdf_4_2.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_vertical_spdf_4_2(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand All @@ -31,3 +33,6 @@ __device__ __inline__ void ERint_vertical_spdf_4_2(const int I, const int J, con
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_assembler_spdf_5_2.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_vertical_spdf_5_2(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand All @@ -31,3 +33,6 @@ __device__ __inline__ void ERint_vertical_spdf_5_2(const int I, const int J, con
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_assembler_spdf_6_2.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_vertical_spdf_6_2(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand All @@ -31,3 +33,6 @@ __device__ __inline__ void ERint_vertical_spdf_6_2(const int I, const int J, con
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_assembler_spdf_7_2.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_vertical_spdf_7_2(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand All @@ -31,3 +33,6 @@ __device__ __inline__ void ERint_vertical_spdf_7_2(const int I, const int J, con
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_assembler_spdf_8_2.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_vertical_spdf_8_2(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand All @@ -31,3 +33,6 @@ __device__ __inline__ void ERint_vertical_spdf_8_2(const int I, const int J, con
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_grad_assembler_sp.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_GRAD_T
#define VDIM3 VDIM3_GRAD_T
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_grad_vertical_sp(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand Down Expand Up @@ -84,3 +86,6 @@ __device__ __inline__ void ERint_grad_vertical_sp(const int I, const int J, cons
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_grad_assembler_spd.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_S
#define VDIM3 VDIM3_S
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_grad_vertical_spd(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand Down Expand Up @@ -124,3 +126,6 @@ __device__ __inline__ void ERint_grad_vertical_spd(const int I, const int J, con
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_grad_assembler_spd_2.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_grad_vertical_spd_2(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand Down Expand Up @@ -6017,3 +6019,6 @@ __device__ __inline__ void ERint_grad_vertical_spd_2(const int I, const int J, c
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_grad_assembler_spdf_1.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_grad_vertical_spdf_1(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand Down Expand Up @@ -47,3 +49,6 @@ __device__ __inline__ void ERint_grad_vertical_spdf_1(const int I, const int J,
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_grad_assembler_spdf_2.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_grad_vertical_spdf_2(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand Down Expand Up @@ -47,3 +49,6 @@ __device__ __inline__ void ERint_grad_vertical_spdf_2(const int I, const int J,
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_grad_assembler_spdf_3.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_grad_vertical_spdf_3(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand All @@ -31,3 +33,6 @@ __device__ __inline__ void ERint_grad_vertical_spdf_3(const int I, const int J,
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_grad_assembler_spdf_4.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_grad_vertical_spdf_4(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand All @@ -31,3 +33,6 @@ __device__ __inline__ void ERint_grad_vertical_spdf_4(const int I, const int J,
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_grad_assembler_spdf_5.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_grad_vertical_spdf_5(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand All @@ -31,3 +33,6 @@ __device__ __inline__ void ERint_grad_vertical_spdf_5(const int I, const int J,
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
5 changes: 5 additions & 0 deletions src/cuda/gpu_eri_grad_assembler_spdf_6.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#undef VDIM3
#undef VY
#undef LOCSTORE
#undef STORE_OPERATOR
#define STOREDIM STOREDIM_XL
#define VDIM3 VDIM3_L
#define LOCSTORE(A,i1,i2,d1,d2) A[(i1+(i2)*(d1))*gridDim.x*blockDim.x]
#define VY(a,b,c) LOCVY(YVerticalTemp, a, b, c, VDIM1, VDIM2, VDIM3)
#define STORE_OPERATOR =

__device__ __inline__ void ERint_grad_vertical_spdf_6(const int I, const int J, const int K, const int L, const int II, const int JJ, const int KK, const int LL,
const QUICKDouble Ptempx, const QUICKDouble Ptempy, const QUICKDouble Ptempz, const QUICKDouble WPtempx, const QUICKDouble WPtempy, const QUICKDouble WPtempz,
Expand All @@ -31,3 +33,6 @@ __device__ __inline__ void ERint_grad_vertical_spdf_6(const int I, const int J,
}

}

#undef STORE_OPERATOR
#define STORE_OPERATOR +=
Loading

0 comments on commit 38bbe8d

Please sign in to comment.