Skip to content

Commit

Permalink
Store the min/max extent when building the CN FIBGM
Browse files Browse the repository at this point in the history
However, this approach needs to be done with MPI_FETCH_AND_OP since we are accessing overlapping shared memory
  • Loading branch information
kopperp committed Jul 10, 2023
1 parent 650d9b3 commit 6350cc2
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 17 deletions.
54 changes: 38 additions & 16 deletions src/particles/particle_mesh/particle_bgm.f90
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ SUBROUTINE BuildBGMAndIdentifyHaloRegion()
USE MOD_Particle_Mesh_Vars ,ONLY: FIBGM_offsetElem_Shared_Win,FIBGMToProc_Shared_Win,FIBGM_Element_Shared_Win USE MOD_Particle_Mesh_Vars ,ONLY: FIBGM_offsetElem_Shared_Win,FIBGMToProc_Shared_Win,FIBGM_Element_Shared_Win
USE MOD_Particle_Mesh_Vars ,ONLY: FIBGM_nTotalElems_Shared_Win,BoundsOfElem_Shared_Win,ElemToBGM_Shared_Win USE MOD_Particle_Mesh_Vars ,ONLY: FIBGM_nTotalElems_Shared_Win,BoundsOfElem_Shared_Win,ElemToBGM_Shared_Win
USE MOD_Particle_Mesh_Vars ,ONLY: FIBGM_nTotalElems,FIBGM_nTotalElems_Shared USE MOD_Particle_Mesh_Vars ,ONLY: FIBGM_nTotalElems,FIBGM_nTotalElems_Shared
USE MOD_Particle_Mesh_Vars ,ONLY: FIBGMToProcExtent,FIBGMToProcExtent_Shared,FIBGMToProcExtent_Shared_Win
USE MOD_Particle_Mesh_Vars ,ONLY: GlobalSide2CNTotalSide_Shared,GlobalSide2CNTotalSide_Shared_Win USE MOD_Particle_Mesh_Vars ,ONLY: GlobalSide2CNTotalSide_Shared,GlobalSide2CNTotalSide_Shared_Win
USE MOD_Particle_Mesh_Vars ,ONLY: CNTotalSide2GlobalSide_Shared,CNTotalSide2GlobalSide_Shared_Win USE MOD_Particle_Mesh_Vars ,ONLY: CNTotalSide2GlobalSide_Shared,CNTotalSide2GlobalSide_Shared_Win
USE MOD_Particle_Mesh_Vars ,ONLY: GlobalElem2CNTotalElem_Shared,GlobalElem2CNTotalElem_Shared_Win USE MOD_Particle_Mesh_Vars ,ONLY: GlobalElem2CNTotalElem_Shared,GlobalElem2CNTotalElem_Shared_Win
Expand All @@ -144,7 +145,7 @@ SUBROUTINE BuildBGMAndIdentifyHaloRegion()
! OUTPUT VARIABLES ! OUTPUT VARIABLES
!----------------------------------------------------------------------------------------------------------------------------------- !-----------------------------------------------------------------------------------------------------------------------------------
! LOCAL VARIABLES ! LOCAL VARIABLES
INTEGER :: iElem,iHaloElem,iLocSide,SideID INTEGER :: iElem,iLocSide,SideID
INTEGER :: FirstElem,LastElem INTEGER :: FirstElem,LastElem
INTEGER :: firstNodeID,lastNodeID INTEGER :: firstNodeID,lastNodeID
INTEGER :: offsetNodeID,nNodeIDs,currentOffset INTEGER :: offsetNodeID,nNodeIDs,currentOffset
Expand All @@ -155,7 +156,7 @@ SUBROUTINE BuildBGMAndIdentifyHaloRegion()
INTEGER :: BGMCellXmax,BGMCellXmin,BGMCellYmax,BGMCellYmin,BGMCellZmax,BGMCellZmin INTEGER :: BGMCellXmax,BGMCellXmin,BGMCellYmax,BGMCellYmin,BGMCellZmax,BGMCellZmin
INTEGER :: BGMiminglob,BGMimaxglob,BGMjminglob,BGMjmaxglob,BGMkminglob,BGMkmaxglob INTEGER :: BGMiminglob,BGMimaxglob,BGMjminglob,BGMjmaxglob,BGMkminglob,BGMkmaxglob
#if USE_MPI #if USE_MPI
INTEGER :: iSide INTEGER :: iSide,iHaloElem
INTEGER :: ElemID,ElemDone INTEGER :: ElemID,ElemDone
REAL :: deltaT REAL :: deltaT
REAL :: globalDiag,maxCellRadius REAL :: globalDiag,maxCellRadius
Expand Down Expand Up @@ -191,6 +192,7 @@ SUBROUTINE BuildBGMAndIdentifyHaloRegion()
LOGICAL :: EnlargeBGM ! Flag used for enlarging the BGM if RefMapping and/or shape function is used LOGICAL :: EnlargeBGM ! Flag used for enlarging the BGM if RefMapping and/or shape function is used
INTEGER :: offsetElemCNProc INTEGER :: offsetElemCNProc
REAL :: BoundingBoxVolume REAL :: BoundingBoxVolume
CHARACTER(LEN=255) :: hilf
! Mortar ! Mortar
INTEGER :: iMortar,NbElemID,NbSideID,nMortarElems!,nFoundSides,nlocSides,i INTEGER :: iMortar,NbElemID,NbSideID,nMortarElems!,nFoundSides,nlocSides,i
#else #else
Expand All @@ -200,7 +202,6 @@ SUBROUTINE BuildBGMAndIdentifyHaloRegion()
INTEGER,ALLOCATABLE :: NumberOfElements(:) INTEGER,ALLOCATABLE :: NumberOfElements(:)
#endif /*CODE_ANALYZE*/ #endif /*CODE_ANALYZE*/
REAL :: StartT,EndT ! Timer REAL :: StartT,EndT ! Timer
CHARACTER(LEN=255) :: hilf
!=================================================================================================================================== !===================================================================================================================================


! Read parameter for FastInitBackgroundMesh (FIBGM) ! Read parameter for FastInitBackgroundMesh (FIBGM)
Expand All @@ -227,9 +228,9 @@ SUBROUTINE BuildBGMAndIdentifyHaloRegion()
GEO%FIBGMkmaxglob = BGMkmaxglob GEO%FIBGMkmaxglob = BGMkmaxglob


LBWRITE(UNIT_stdOut,'(A,I18,A,I18,A,I18)') ' | Total FIBGM Cells(x,y,z): ' & LBWRITE(UNIT_stdOut,'(A,I18,A,I18,A,I18)') ' | Total FIBGM Cells(x,y,z): ' &
, BGMimaxglob - BGMiminglob ,', '& , BGMimaxglob - BGMiminglob + 1 ,', '&
, BGMjmaxglob - BGMjminglob ,', '& , BGMjmaxglob - BGMjminglob + 1 ,', '&
, BGMkmaxglob - BGMkminglob , BGMkmaxglob - BGMkminglob + 1


! Read periodic vectors from parameter file ! Read periodic vectors from parameter file
CALL InitPeriodicBC() CALL InitPeriodicBC()
Expand Down Expand Up @@ -1206,17 +1207,22 @@ SUBROUTINE BuildBGMAndIdentifyHaloRegion()


! Allocate flags which procs belong to which FIGBM cell ! Allocate flags which procs belong to which FIGBM cell
CALL Allocate_Shared((/(BGMiglobDelta+1)*(BGMjglobDelta+1)*(BGMkglobDelta+1)*nComputeNodeProcessors/),FIBGMToProcFlag_Shared_Win,FIBGMToProcFlag_Shared) CALL Allocate_Shared((/(BGMiglobDelta+1)*(BGMjglobDelta+1)*(BGMkglobDelta+1)*nComputeNodeProcessors/),FIBGMToProcFlag_Shared_Win,FIBGMToProcFlag_Shared)
CALL MPI_WIN_LOCK_ALL(0,FIBGMToProcFlag_Shared_Win,IERROR) CALL Allocate_Shared((/2*3*nComputeNodeProcessors/),FIBGMToProcExtent_Shared_Win,FIBGMToProcExtent_Shared)
CALL MPI_WIN_LOCK_ALL(0,FIBGMToProcFlag_Shared_Win ,iError)
CALL MPI_WIN_LOCK_ALL(0,FIBGMToProcExtent_Shared_Win,iError)
FIBGM_nTotalElems(BGMiminglob:BGMimaxglob,BGMjminglob:BGMjmaxglob,BGMkminglob:BGMkmaxglob) => FIBGM_nTotalElems_Shared FIBGM_nTotalElems(BGMiminglob:BGMimaxglob,BGMjminglob:BGMjmaxglob,BGMkminglob:BGMkmaxglob) => FIBGM_nTotalElems_Shared
FIBGMToProcFlag (BGMiminglob:BGMimaxglob,BGMjminglob:BGMjmaxglob,BGMkminglob:BGMkmaxglob,0:nComputeNodeProcessors-1) => FIBGMToProcFlag_Shared FIBGMToProcFlag( BGMiminglob:BGMimaxglob,BGMjminglob:BGMjmaxglob,BGMkminglob:BGMkmaxglob,0:nComputeNodeProcessors-1) => FIBGMToProcFlag_Shared

FIBGMToProcExtent(1:2 ,1:3 , 0:nComputeNodeProcessors-1) => FIBGMToProcExtent_Shared
IF (myComputeNodeRank.EQ.0) THEN IF (myComputeNodeRank.EQ.0) THEN
FIBGMToProcFlag = .FALSE. FIBGMToProcFlag = .FALSE.
FIBGM_nTotalElems = 0 FIBGM_nTotalElems = 0
FIBGMToProcExtent(1,:,:) = HUGE(1)
FIBGMToProcExtent(2,:,:) = -HUGE(1)
END IF END IF


CALL BARRIER_AND_SYNC(FIBGM_nTotalElems_Shared_Win,MPI_COMM_SHARED) CALL BARRIER_AND_SYNC(FIBGM_nTotalElems_Shared_Win,MPI_COMM_SHARED)
CALL BARRIER_AND_SYNC(FIBGMToProcFlag_Shared_Win ,MPI_COMM_SHARED) CALL BARRIER_AND_SYNC(FIBGMToProcFlag_Shared_Win ,MPI_COMM_SHARED)
CALL BARRIER_AND_SYNC(FIBGMToProcExtent_Shared_Win,MPI_COMM_SHARED)


! 1.1) Count number of elements on compute node ! 1.1) Count number of elements on compute node
DO iElem = offsetElem+1,offsetElem+nElems DO iElem = offsetElem+1,offsetElem+nElems
Expand All @@ -1233,14 +1239,24 @@ SUBROUTINE BuildBGMAndIdentifyHaloRegion()
! Perform logical OR and place data on CN root ! Perform logical OR and place data on CN root
CALL MPI_FETCH_AND_OP(.TRUE. ,dummyLog,MPI_LOGICAL,0,INT(posRank*SIZE_INT,MPI_ADDRESS_KIND),MPI_LOR,FIBGMToProcFlag_Shared_Win ,IERROR) CALL MPI_FETCH_AND_OP(.TRUE. ,dummyLog,MPI_LOGICAL,0,INT(posRank*SIZE_INT,MPI_ADDRESS_KIND),MPI_LOR,FIBGMToProcFlag_Shared_Win ,IERROR)
END ASSOCIATE END ASSOCIATE

! Store the min/max extent
CALL MPI_FETCH_AND_OP(iBGM,dummyInt,MPI_INTEGER,0,INT(((ProcRank)*(3)*(2) + (1-1)*(2) + (1-1))*SIZE_INT,MPI_ADDRESS_KIND),MPI_MIN,FIBGMToProcExtent_Shared_Win,IERROR)
CALL MPI_FETCH_AND_OP(jBGM,dummyInt,MPI_INTEGER,0,INT(((ProcRank)*(3)*(2) + (2-1)*(2) + (1-1))*SIZE_INT,MPI_ADDRESS_KIND),MPI_MIN,FIBGMToProcExtent_Shared_Win,IERROR)
CALL MPI_FETCH_AND_OP(kBGM,dummyInt,MPI_INTEGER,0,INT(((ProcRank)*(3)*(2) + (3-1)*(2) + (1-1))*SIZE_INT,MPI_ADDRESS_KIND),MPI_MIN,FIBGMToProcExtent_Shared_Win,IERROR)
CALL MPI_FETCH_AND_OP(iBGM,dummyInt,MPI_INTEGER,0,INT(((ProcRank)*(3)*(2) + (1-1)*(2) + (2-1))*SIZE_INT,MPI_ADDRESS_KIND),MPI_MAX,FIBGMToProcExtent_Shared_Win,IERROR)
CALL MPI_FETCH_AND_OP(jBGM,dummyInt,MPI_INTEGER,0,INT(((ProcRank)*(3)*(2) + (2-1)*(2) + (2-1))*SIZE_INT,MPI_ADDRESS_KIND),MPI_MAX,FIBGMToProcExtent_Shared_Win,IERROR)
CALL MPI_FETCH_AND_OP(kBGM,dummyInt,MPI_INTEGER,0,INT(((ProcRank)*(3)*(2) + (3-1)*(2) + (2-1))*SIZE_INT,MPI_ADDRESS_KIND),MPI_MAX,FIBGMToProcExtent_Shared_Win,IERROR)
END DO END DO
END DO END DO
END DO END DO
END DO END DO


CALL MPI_WIN_FLUSH(0,FIBGM_nTotalElems_Shared_Win,iError) CALL MPI_WIN_FLUSH(0,FIBGM_nTotalElems_Shared_Win,iError)
CALL MPI_WIN_FLUSH(0,FIBGMToProcFlag_Shared_Win ,iError) CALL MPI_WIN_FLUSH(0,FIBGMToProcFlag_Shared_Win ,iError)
CALL MPI_WIN_FLUSH(0,FIBGMToProcExtent_Shared_Win,iError)
CALL BARRIER_AND_SYNC(FIBGMToProcFlag_Shared_Win ,MPI_COMM_SHARED) CALL BARRIER_AND_SYNC(FIBGMToProcFlag_Shared_Win ,MPI_COMM_SHARED)
CALL BARRIER_AND_SYNC(FIBGMToProcExtent_Shared_Win,MPI_COMM_SHARED)
CALL BARRIER_AND_SYNC(FIBGM_nTotalElems_Shared_Win,MPI_COMM_SHARED) CALL BARRIER_AND_SYNC(FIBGM_nTotalElems_Shared_Win,MPI_COMM_SHARED)


! 1.2) FIBGM_nTotalElems can just be added up ! 1.2) FIBGM_nTotalElems can just be added up
Expand All @@ -1265,11 +1281,12 @@ SUBROUTINE BuildBGMAndIdentifyHaloRegion()
FIBGM_LocalProcs = 0 FIBGM_LocalProcs = 0


! 2.1) Count the number of procs on the current root ! 2.1) Count the number of procs on the current root
DO kBGM = BGMkminglob,BGMkmaxglob DO iProc = 0,nComputeNodeProcessors-1
DO jBGM = BGMjminglob,BGMjmaxglob ! Save number of procs per FIBGM element
DO iBGM = BGMiminglob,BGMimaxglob DO kBGM = FIBGMToProcExtent(1,3,iProc),FIBGMToProcExtent(2,3,iProc)
! Save number of procs per FIBGM element DO jBGM = FIBGMToProcExtent(1,2,iProc),FIBGMToProcExtent(2,2,iProc)
DO iProc = 0,nComputeNodeProcessors-1 DO iBGM = FIBGMToProcExtent(1,1,iProc),FIBGMToProcExtent(2,1,iProc)

! Proc belongs to current FIBGM cell ! Proc belongs to current FIBGM cell
IF (FIBGMToProcFlag(iBGM,jBGM,kBGM,iProc)) THEN IF (FIBGMToProcFlag(iBGM,jBGM,kBGM,iProc)) THEN
FIBGM_LocalProcs(FIBGM_NLOCALPROCS,iBGM,jBGM,kBGM) = FIBGM_LocalProcs(FIBGM_NLOCALPROCS,iBGM,jBGM,kBGM) + 1 FIBGM_LocalProcs(FIBGM_NLOCALPROCS,iBGM,jBGM,kBGM) = FIBGM_LocalProcs(FIBGM_NLOCALPROCS,iBGM,jBGM,kBGM) + 1
Expand Down Expand Up @@ -1356,11 +1373,14 @@ SUBROUTINE BuildBGMAndIdentifyHaloRegion()
! De-allocate FLAG array ! De-allocate FLAG array
CALL MPI_BARRIER(MPI_COMM_SHARED,iERROR) CALL MPI_BARRIER(MPI_COMM_SHARED,iERROR)
CALL UNLOCK_AND_FREE(FIBGMToProcFlag_Shared_Win) CALL UNLOCK_AND_FREE(FIBGMToProcFlag_Shared_Win)
CALL UNLOCK_AND_FREE(FIBGMToProcExtent_Shared_Win)
CALL MPI_BARRIER(MPI_COMM_SHARED,iERROR) CALL MPI_BARRIER(MPI_COMM_SHARED,iERROR)


! Then, free the pointers or arrays ! Then, free the pointers or arrays
ADEALLOCATE(FIBGMToProcFlag_Shared) ADEALLOCATE(FIBGMToProcFlag_Shared)
ADEALLOCATE(FIBGMToProcExtent_Shared)
ADEALLOCATE(FIBGMToProcFlag) ADEALLOCATE(FIBGMToProcFlag)
ADEALLOCATE(FIBGMToProcExtent)


CALL BARRIER_AND_SYNC(FIBGMProcs_Shared_Win ,MPI_COMM_SHARED) CALL BARRIER_AND_SYNC(FIBGMProcs_Shared_Win ,MPI_COMM_SHARED)
CALL BARRIER_AND_SYNC(FIBGMToProc_Shared_Win,MPI_COMM_SHARED) CALL BARRIER_AND_SYNC(FIBGMToProc_Shared_Win,MPI_COMM_SHARED)
Expand Down Expand Up @@ -1547,13 +1567,15 @@ SUBROUTINE FinalizeBGM()
ADEALLOCATE(FIBGMToProc_Shared) ADEALLOCATE(FIBGMToProc_Shared)
ADEALLOCATE(FIBGMProcs) ADEALLOCATE(FIBGMProcs)
ADEALLOCATE(FIBGMProcs_Shared) ADEALLOCATE(FIBGMProcs_Shared)
#if USE_MPI
! Mapping arrays are only allocated if not running on one node ! Mapping arrays are only allocated if not running on one node
IF (nComputeNodeProcessors.NE.nProcessors_Global) THEN IF (nComputeNodeProcessors.NE.nProcessors_Global) THEN
ADEALLOCATE(CNTotalElem2GlobalElem) ADEALLOCATE(CNTotalElem2GlobalElem)
ADEALLOCATE(CNTotalElem2GlobalElem_Shared) ADEALLOCATE(CNTotalElem2GlobalElem_Shared)
END IF ! nComputeNodeProcessors.NE.nProcessors_Global END IF ! nComputeNodeProcessors.NE.nProcessors_Global
ADEALLOCATE(CNTotalSide2GlobalSide) ADEALLOCATE(CNTotalSide2GlobalSide)
ADEALLOCATE(CNTotalSide2GlobalSide_Shared) ADEALLOCATE(CNTotalSide2GlobalSide_Shared)
#endif /*USE_MPI*/


#if USE_MPI #if USE_MPI
CALL FinalizeHaloInfo() CALL FinalizeHaloInfo()
Expand Down
5 changes: 4 additions & 1 deletion src/particles/particle_mesh/particle_mesh_vars.f90
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ MODULE MOD_Particle_Mesh_Vars
! FIBGM to proc mapping ! FIBGM to proc mapping
INTEGER,ALLOCPOINT,DIMENSION(:,:,:,:) :: FIBGMToProc INTEGER,ALLOCPOINT,DIMENSION(:,:,:,:) :: FIBGMToProc
LOGICAL,ALLOCPOINT,DIMENSION(:,:,:,:) :: FIBGMToProcFlag LOGICAL,ALLOCPOINT,DIMENSION(:,:,:,:) :: FIBGMToProcFlag
INTEGER,ALLOCPOINT,DIMENSION(:,:,:) :: FIBGMToProcExtent
INTEGER,ALLOCPOINT,DIMENSION(:) :: FIBGMProcs INTEGER,ALLOCPOINT,DIMENSION(:) :: FIBGMProcs


! Shared arrays containing information for complete mesh ! Shared arrays containing information for complete mesh
Expand Down Expand Up @@ -126,6 +127,7 @@ MODULE MOD_Particle_Mesh_Vars


INTEGER,ALLOCPOINT :: FIBGMToProc_Shared(:,:,:,:) INTEGER,ALLOCPOINT :: FIBGMToProc_Shared(:,:,:,:)
LOGICAL,ALLOCPOINT :: FIBGMToProcFlag_Shared(:) LOGICAL,ALLOCPOINT :: FIBGMToProcFlag_Shared(:)
INTEGER,ALLOCPOINT :: FIBGMToProcExtent_Shared(:)
INTEGER,ALLOCPOINT :: FIBGMProcs_Shared(:) INTEGER,ALLOCPOINT :: FIBGMProcs_Shared(:)


INTEGER,ALLOCPOINT :: CNTotalElem2GlobalElem_Shared(:) !> Compute Nodes mapping 1:nTotal -> 1:nGlobal INTEGER,ALLOCPOINT :: CNTotalElem2GlobalElem_Shared(:) !> Compute Nodes mapping 1:nTotal -> 1:nGlobal
Expand Down Expand Up @@ -208,6 +210,7 @@ MODULE MOD_Particle_Mesh_Vars


INTEGER :: FIBGMToProc_Shared_Win INTEGER :: FIBGMToProc_Shared_Win
INTEGER :: FIBGMToProcFlag_Shared_Win INTEGER :: FIBGMToProcFlag_Shared_Win
INTEGER :: FIBGMToProcExtent_Shared_Win
INTEGER :: FIBGMProcs_Shared_Win INTEGER :: FIBGMProcs_Shared_Win


INTEGER :: CNTotalElem2GlobalElem_Shared_Win INTEGER :: CNTotalElem2GlobalElem_Shared_Win
Expand Down Expand Up @@ -380,4 +383,4 @@ MODULE MOD_Particle_Mesh_Vars


!=================================================================================================================================== !===================================================================================================================================


END MODULE MOD_Particle_Mesh_Vars END MODULE MOD_Particle_Mesh_Vars

0 comments on commit 6350cc2

Please sign in to comment.