Skip to content

Commit

Permalink
WIP: warning adding BK3 capabilities to elliptic for hex3D. Switch us…
Browse files Browse the repository at this point in the history
…ing ELLIPTIC INTEGRATION option in setup. Currently this does not work, however default GLL integration works
  • Loading branch information
tcew committed Dec 20, 2018
1 parent c7a34b2 commit 8627ab9
Show file tree
Hide file tree
Showing 10 changed files with 205 additions and 88 deletions.
16 changes: 12 additions & 4 deletions matlab/writeNodeDataHex3D.m
Expand Up @@ -219,18 +219,26 @@ function writeNodeDataHex3D(N)


%% 1D quadrature
Nc = ceil(3*N/2)-1;
[z,w] = JacobiGQ(0,0,Nc+1);
%%Nc = ceil(3*N/2)-1;
Nc = N+1;
%[z,w] = JacobiGQ(0,0,Nc);
[z] = JacobiGL(0,0,Nc);
w = sum(inv(Vandermonde1D(Nc, z)*Vandermonde1D(Nc,z)'))'


[N, Nc, length(w)]
cInterp = Vandermonde1D(N, z)/Vandermonde1D(N, r1d);
cubProject = (cInterp)';
cubDT = (Dmatrix1D(Nc+1, z, Vandermonde1D(Nc+1,z)))';
Nqc = length(z);
cubD = Dmatrix1D(Nc, z, Vandermonde1D(Nc,z));
cubDT = (Dmatrix1D(Nc, z, Vandermonde1D(Nc,z)))';
Nqc = length(z)

writeFloatMatrix(fid, z, 'Quadrature r-coordinates');
writeFloatMatrix(fid, w, 'Quadrature weights');

writeFloatMatrix(fid, cInterp, 'Quadrature Interpolation Matrix');
writeFloatMatrix(fid, cubDT, 'Quadrature Weak D Differentiation Matrix');
writeFloatMatrix(fid, cubD, 'Quadrature Differentiation Matrix');
writeFloatMatrix(fid, cubProject, 'Quadrature Projection Matrix');


Expand Down
2 changes: 2 additions & 0 deletions solvers/elliptic/elliptic.h
Expand Up @@ -114,6 +114,8 @@ typedef struct {
occa::kernel AxKernel;
occa::kernel partialAxKernel;
occa::kernel partialFloatAxKernel;
occa::kernel partialCubatureAxKernel;

occa::kernel rhsBCKernel;
occa::kernel addBCKernel;
occa::kernel innerProductKernel;
Expand Down
111 changes: 65 additions & 46 deletions solvers/elliptic/okl/ellipticCubatureAxHex3D.okl
Expand Up @@ -24,12 +24,11 @@ SOFTWARE.

*/

#define p_halfD ((int)((int)p_gjNq+1)/(int)2)

#define p_cubNp (p_cubNq*p_cubNq*p_cubNq)
#define p_cubNq2 (p_cubNq*p_cubNq)
#define p_Nq2 (p_Nq*p_Nq)

#if 0
#if p_cubNq==8 || p_cubNq==16
#define p_cubPad 1
#else
Expand All @@ -41,40 +40,56 @@ SOFTWARE.
#else
#define p_gllPad 0
#endif
#endif

#define p_cubPad 0
#define p_gllPad 0

@kernel void ellipticCubatureAxHex3D(const int Nelements,
const dfloat * restrict cubggeo,
const dfloat * restrict gllD,
const dfloat * restrict cubInterp,
const dfloat lambda,
const dfloat * restrict q,
dfloat * restrict Aq){
@kernel void ellipticCubaturePartialAxHex3D(const dlong Nelements,
@restrict const dlong * elementList,
@restrict const dfloat * cubggeo,
@restrict const dfloat * cubD,
@restrict const dfloat * cubInterpT,
const dfloat lambda,
@restrict const dfloat * q,
@restrict dfloat * Aq){

for(int e=0; e<Nelements; ++e; @outer(0)) {

@shared dfloat s_q[p_cubNq][p_cubNq][p_cubNq];
@shared dfloat s_gllD[p_cubNq][p_cubNq+p_cubPad];
@shared dfloat s_cubD[p_cubNq][p_cubNq+p_cubPad];
@shared dfloat s_qr[p_cubNq][p_cubNq+p_cubPad];
@shared dfloat s_I[p_cubNq][p_Nq+p_gllPad];

@exclusive dfloat r_q[p_cubNq];
@exclusive dfloat r_G00, r_G01, r_G02, r_G11, r_G12, r_G22, r_GwJ, r_qt, r_qs;

@exclusive dlong r_element;

for(int b=0;b<p_cubNq;++b;@inner(1)){
for(int a=0;a<p_cubNq;++a;@inner(0)){

if(a<p_Nq){
s_I[b][a] = cubInterp[a+p_Nq*b];
r_element = elementList[e];

int id = a + b*p_cubNq;
if(id<p_cubNq*p_Nq){
s_I[a][b] = cubInterpT[id];
}

s_gllD[b][a] = gllD[b*p_cubNq+a];
s_cubD[b][a] = cubD[id];

if(e==0 && a==0 && b==0)
printf("cD: ");
if(e==0)
printf("s_cubD[%d][%d]=%lf\n ", b, a, s_cubD[b][a]);



if(a<p_Nq && b<p_Nq){

#pragma unroll p_Nq
for(int c=0;c<p_Nq;++c){
s_q[c][b][a] = q[e*p_Np + c*p_Nq*p_Nq + b*p_Nq + a];
s_q[c][b][a] = q[r_element*p_Np + c*p_Nq*p_Nq + b*p_Nq + a];
}
}
}
Expand All @@ -92,16 +107,17 @@ SOFTWARE.
for(int b=0;b<p_Nq;++b)
r_q[b] = s_q[c][b][a];

#pragma unroll p_halfD
for(int j=0;j<p_halfD;++j){
#pragma unroll p_halfC
for(int j=0;j<p_halfC;++j){

dfloat tmp = 0;
dfloat tmp = 0;
dfloat tmp2 = 0;

#pragma unroll p_Nq
for(int b=0;b<p_Nq;++b){

const dfloat sIjb= s_I[j][b];

tmp += sIjb*r_q[b];
tmp2 += sIjb*r_q[p_Nq-1-b];
}
Expand All @@ -114,19 +130,18 @@ SOFTWARE.
}//for c


//transform in c
//transform in a
for(int c=0;c<p_cubNq;++c;@inner(1)){
for(int j=0;j<p_cubNq;++j;@inner(0)){

if(c<p_Nq){

#pragma unroll p_Nq
for(int a=0;a<p_Nq;++a)

for(int a=0;a<p_Nq;++a)
r_q[a] = s_q[c][j][a];

#pragma unroll p_halfD
for(int i=0;i<p_halfD;++i){
#pragma unroll p_halfC
for(int i=0;i<p_halfC;++i){

dfloat tmp = 0;
dfloat tmp2 = 0;
Expand Down Expand Up @@ -154,8 +169,8 @@ SOFTWARE.
for(int c=0;c<p_Nq;++c)
r_q[c] = s_q[c][j][i];

#pragma unroll p_halfD
for(int k=0;k<p_halfD;++k){
#pragma unroll p_halfC
for(int k=0;k<p_halfC;++k){

dfloat tmp = 0;
dfloat tmp2= 0;
Expand All @@ -175,22 +190,26 @@ SOFTWARE.
}
//===============================================now differentiate once interpolated

#pragma unroll p_cubNq
for(int k=0; k<p_cubNq; ++k)
for(int j=0;j<p_cubNq;++j;@inner(1))
for(int i=0;i<p_cubNq;++i;@inner(0))
r_q[k] =0.0f;
for(int j=0;j<p_cubNq;++j;@inner(1)){
for(int i=0;i<p_cubNq;++i;@inner(0)){
#pragma unroll p_cubNq
for(int k=0; k<p_cubNq; ++k){
r_q[k] =0.0f;
}
}
}

#pragma unroll p_cubNq
for(int k=0; k<p_cubNq; ++k) {

for(int j=0; j<p_cubNq; ++j; @inner(1)) {
for(int i=0; i<p_cubNq; ++i; @inner(0)) {

const int base = e*p_Ngeo*p_cubNp + k*p_cubNq*p_cubNq + j*p_cubNq + i;
const int base = r_element*p_Nggeo*p_cubNp + k*p_cubNq*p_cubNq + j*p_cubNq + i;

//geofactors for k j i thread
r_GwJ = cubggeo[base+p_GWJID*p_cubNp];

r_G00 = cubggeo[base+p_G00ID*p_cubNp];
r_G01 = cubggeo[base+p_G01ID*p_cubNp];
r_G02 = cubggeo[base+p_G02ID*p_cubNp];
Expand All @@ -206,9 +225,9 @@ SOFTWARE.

#pragma unroll p_cubNq
for (int n = 0; n<p_cubNq; ++n) {
dr += s_gllD[i][n]*s_q[k][j][n];
ds += s_gllD[j][n]*s_q[k][n][i];
dt += s_gllD[k][n]*s_q[n][j][i];
dr += s_cubD[i][n]*s_q[k][j][n];
ds += s_cubD[j][n]*s_q[k][n][i];
dt += s_cubD[k][n]*s_q[n][j][i];
}

s_qr[j][i] = r_G00*dr + r_G01*ds + r_G02*dt;
Expand All @@ -228,8 +247,8 @@ SOFTWARE.

#pragma unroll p_cubNq
for(int n=0;n<p_cubNq;++n){
lapqr += s_gllD[n][i]*s_qr[j][n];
r_q[n] += s_gllD[k][n]*r_qt;
lapqr += s_cubD[n][i]*s_qr[j][n];
r_q[n] += s_cubD[k][n]*r_qt;
}

r_q[k] += lapqr;
Expand All @@ -249,14 +268,14 @@ SOFTWARE.

#pragma unroll p_cubNq
for(int n=0;n<p_cubNq;++n){
lapqs += s_gllD[n][j]*s_qr[n][i];
lapqs += s_cubD[n][j]*s_qr[n][i];
}

r_q[k] += lapqs;
}
}
}//k

//Loop 7
for(int j=0;j<p_cubNq;++j;@inner(1)){
for(int i=0;i<p_cubNq;++i;@inner(0)){
Expand All @@ -280,8 +299,8 @@ SOFTWARE.
r_q[j] = s_q[k][j][i];
}

#pragma unroll p_halfD
for(int b=0;b<p_halfD;++b){
#pragma unroll p_halfN
for(int b=0;b<p_halfN;++b){

dfloat tmp = 0.0f;
dfloat tmp2 = 0.0f;
Expand Down Expand Up @@ -309,8 +328,8 @@ SOFTWARE.
for(int i=0;i<p_cubNq;++i)
r_q[i] = s_q[k][b][i];

#pragma unroll p_halfD
for(int a=0;a<p_halfD;++a){
#pragma unroll p_halfN
for(int a=0;a<p_halfN;++a){

dfloat tmp = 0.0f;
dfloat tmp2 = 0.0f;
Expand Down Expand Up @@ -340,8 +359,8 @@ SOFTWARE.
r_q[k] = s_q[k][b][a];
}

#pragma unroll p_halfD
for(int c=0;c<p_halfD;++c){
#pragma unroll p_halfN
for(int c=0;c<p_halfN;++c){

dfloat tmp = 0.0f;
dfloat tmp2 = 0.0f;
Expand All @@ -354,8 +373,8 @@ SOFTWARE.
tmp2 += sIkc*r_q[p_cubNq-1-k];
}

Aq[e*p_Np + c*p_Nq*p_Nq + b*p_Nq + a] = tmp;
Aq[e*p_Np+(p_Nq-1-c)*p_Nq*p_Nq+b*p_Nq+a] = tmp2;
Aq[r_element*p_Np + c*p_Nq*p_Nq + b*p_Nq + a] = tmp;
Aq[r_element*p_Np+(p_Nq-1-c)*p_Nq*p_Nq+b*p_Nq+a] = tmp2;
}//c
}//if
}//a
Expand Down
26 changes: 20 additions & 6 deletions solvers/elliptic/setups/setupHex3D.rc
Expand Up @@ -12,8 +12,18 @@ data/ellipticSineTest3D.h

[MESH FILE]
#../../meshes/cubeHexE8Thilina.msh
#../../meshes/cavityHexH01.msh
../../meshes/cavityHexH0075.msh
#../../meshes/cavityHexH025.msh
#../../meshes/cavityHexH0125.msh
#../../meshes/cavityHexH0075.msh
#../../meshes/cubeHexE00008.msh
#../../meshes/cubeHexE00064.msh
#../../meshes/cubeHexE00216.msh
#../../meshes/cubeHexE00512.msh
../../meshes/cubeHexE01000.msh
#../../meshes/cubeHexE01728.msh
#../../meshes/cubeHexE04096.msh
#../../meshes/cubeHexE05832.msh
#../../meshes/cubeHexE08000.msh

[MESH DIMENSION]
3
Expand All @@ -22,12 +32,16 @@ data/ellipticSineTest3D.h
12

[POLYNOMIAL DEGREE]
8
7

[ELEMENT MAP]
ISOPARAMETRIC
#TRILINEAR

[ELLIPTIC INTEGRATION]
NODAL
# CUBATURE - NOT WORKING YET

[THREAD MODEL]
CUDA

Expand All @@ -38,7 +52,7 @@ CUDA
0

[LAMBDA]
0
10000

# can add FLEXIBLE to PCG
[KRYLOV SOLVER]
Expand All @@ -55,8 +69,8 @@ NODAL

# can be NONE, JACOBI, MASSMATRIX, FULLALMOND, SEMFEM, or MULTIGRID
[PRECONDITIONER]
#JACOBI
MULTIGRID
JACOBI
#MULTIGRID



Expand Down

0 comments on commit 8627ab9

Please sign in to comment.