Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
346 lines (337 sloc) 13.6 KB
!--------------------------------------------------------------------
!
!
! PURPOSE
!
! This program solves nonlinear sine-Gordon equation in 2 dimensions
! u_{tt}-u_{xx}-u_{yy}=-sin(u)
! using a second order implicit-explicit time stepping scheme.
!
! The boundary conditions are u(x=0,y)=u(2*Lx*\pi,y),
! u(x,y=0)=u(x,y=2*Ly*\pi)
! The initial condition is set in initialdata.f90
!
! AUTHORS
!
! B. Cloutier, B.K. Muite, P. Rigge
! 4 June 2012
!
! .. Parameters ..
! Nx = number of modes in x - power of 2 for FFT
! Ny = number of modes in y - power of 2 for FFT
! Nt = number of timesteps to take
! plotgap = number of timesteps between plots
! FFTW_IN_PLACE = value for FFTW input
! FFTW_MEASURE = value for FFTW input
! FFTW_EXHAUSTIVE = value for FFTW input
! FFTW_PATIENT = value for FFTW input
! FFTW_ESTIMATE = value for FFTW input
! FFTW_FORWARD = value for FFTW input
! FFTW_BACKWARD = value for FFTW input
! pi = 3.1415926535...
! Lx = width of box in x direction
! Ly = width of box in y direction
! .. Scalars ..
! i = loop counter in x direction
! j = loop counter in y direction
! n = loop counter for timesteps direction
! allocatestatus = error indicator during allocation
! start = variable to record start time of program
! finish = variable to record end time of program
! count_rate = variable for clock count rate
! planfxy = Forward 2d fft plan (FFTW)
! planbxy = Backward 2d fft plan (FFTW)
! planf = Forward 2d fft plan (CUFFT)
! planb = Backward 2d fft plan (CUFFT)
! dt = timestep
! ierr = error code
! plotnum = number of plot
! .. Arrays ..
! u = approximate solution
! uold = approximate solution
! u_d = approximate solution (on GPU)
! v_d = Fourier transform of approximate solution (on GPU)
! uold_d = approximate solution (on GPU)
! vold_d = Fourier transform of approximate solution (on GPU)
! nonlinhat_d = Fourier transform of nonlinear term, sin(u) (on GPU)
! temp1 = extra space for energy computation
! temp2 = extra space for energy computation
! savearray = temp array to save out to disk
! .. Vectors ..
! kx = Fourier frequencies in x direction
! ky = Fourier frequencies in y direction
! kx_d = Fourier frequencies in x direction (on GPU)
! ky_d = Fourier frequencies in y direction (on GPU)
! x = x locations
! y = y locations
! time = times at which save data
! en = total energy
! enstr = strain energy
! enpot = potential energy
! enkin = kinetic energy
! name_config = array to store filename for data to be saved
!
! REFERENCES
!
! ACKNOWLEDGEMENTS
!
! This program is based on example code to demonstrate usage of Fortran and
! CUDA FFT routines taken from
! http://cudamusing.blogspot.com/2010/05/CALLing-cufft-from-cuda-fortran.html
!
! and
!
! http://cudamusing.blogspot.com/search?q=cublas
!
! ACCURACY
!
! ERROR INDICATORS AND WARNINGS
!
! FURTHER COMMENTS
! Check that the initial iterate is consistent with the
! boundary conditions for the domain specified
!--------------------------------------------------------------------
! External routines required
! getgrid.f90 -- Get initial grid of points
! initialdata.f90 -- Get initial data
! enercalc.f90 -- Subroutine to calculate the energy
! savedata.f90 -- Save initial data
! External libraries required
! Cuda FFT -- http://developer.nvidia.com/cufft
! FFTW3 -- Fastest Fourier Transform in the West
! (http://www.fftw.org/)
module precision
! Precision control
integer, parameter, public :: Single = kind(0.0) ! Single precision
integer, parameter, public :: Double = kind(0.0d0) ! Double precision
!
integer, parameter, public :: fp_kind = Double
!integer, parameter, public :: fp_kind = Single
end module precision
module cufft
integer, public :: CUFFT_FORWARD = -1
integer, public :: CUFFT_INVERSE = 1
integer, public :: CUFFT_R2C = Z'2a' ! Real to Complex (interleaved)
integer, public :: CUFFT_C2R = Z'2c' ! Complex (interleaved) to Real
integer, public :: CUFFT_C2C = Z'29' ! Complex to Complex, interleaved
integer, public :: CUFFT_D2Z = Z'6a' ! Double to Double-Complex
integer, public :: CUFFT_Z2D = Z'6c' ! Double-Complex to Double
integer, public :: CUFFT_Z2Z = Z'69' ! Double-Complex to Double-Complex
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
! cufftPlan2d(cufftHandle *plan, int nx,int ny, cufftType type,int batch)
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
interface cufftPlan2d
subroutine cufftPlan2d(plan, nx, ny, type) bind(C,name='cufftPlan2d')
use iso_c_binding
integer(c_int):: plan
integer(c_int),value:: nx, ny, type
end subroutine cufftPlan2d
end interface cufftPlan2d
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
! cufftDestroy(cufftHandle plan)
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
interface cufftDestroy
subroutine cufftDestroy(plan) bind(C,name='cufftDestroy')
use iso_c_binding
integer(c_int),value:: plan
end subroutine cufftDestroy
end interface cufftDestroy
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
! cufftExecD2Z(cufftHandle plan,
! cufftDoubleReal *idata,
! cufftDoubleComplex *odata)
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
interface cufftExecD2Z
subroutine cufftExecD2Z(plan, idata, odata) &
& bind(C,name='cufftExecD2Z')
use iso_c_binding
use precision
integer(c_int), value :: plan
real(fp_kind), device :: idata(1:nx,1:ny)
complex(fp_kind),device :: odata(1:nx,1:ny)
end subroutine cufftExecD2Z
end interface cufftExecD2Z
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
! cufftExecD2Z(cufftHandle plan,
! cufftDoubleComplex *idata,
! cufftDoubleReal *odata)
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
interface cufftExecZ2D
subroutine cufftExecZ2D(plan, idata, odata) &
& bind(C,name='cufftExecZ2D')
use iso_c_binding
use precision
integer(c_int),value :: plan
complex(fp_kind),device:: idata(1:nx,1:ny)
real(fp_kind),device :: odata(1:nx,1:ny)
end subroutine cufftExecZ2D
end interface cufftExecZ2D
end module cufft
PROGRAM sg2d
USE precision
USE cudafor
USE cufft
! Declare variables
IMPLICIT NONE
INTEGER(kind=4), PARAMETER :: Nx=1024
INTEGER(kind=4), PARAMETER :: Ny=Nx
INTEGER(kind=4), PARAMETER :: Nt=500
INTEGER(kind=4), PARAMETER :: plotgap=Nt+1
REAL(kind=8), PARAMETER :: &
pi=3.14159265358979323846264338327950288419716939937510d0
REAL(kind=8), PARAMETER :: Lx=5.0d0
REAL(kind=8), PARAMETER :: Ly=5.0d0
REAL(kind=8) :: dt=0.001d0
COMPLEX(kind=8), DIMENSION(:), ALLOCATABLE :: kx,ky
REAL(kind=8), DIMENSION(:), ALLOCATABLE :: x,y
REAL (kind=8), DIMENSION(:,:), ALLOCATABLE :: u,uold
COMPLEX(kind=8), DIMENSION(:,:), ALLOCATABLE :: temp1,temp2
REAL(kind=8), DIMENSION(:,:), ALLOCATABLE :: savearray
REAL(kind=8), DIMENSION(:), ALLOCATABLE :: time,enkin,enstr,enpot,en
REAL(kind=8) :: scalemodes
INTEGER(kind=4) :: ierr,i,j,n,allocatestatus
INTEGER(kind=4) :: start, finish, count_rate, plotnum
INTEGER(kind=4), PARAMETER :: FFTW_IN_PLACE = 8, FFTW_MEASURE = 0, &
FFTW_EXHAUSTIVE = 8, FFTW_PATIENT = 32, FFTW_ESTIMATE = 64
INTEGER(kind=4),PARAMETER :: FFTW_FORWARD = -1, FFTW_BACKWARD=1
INTEGER(kind=8) :: planfxy,planbxy
CHARACTER*100 :: name_config
INTEGER(kind=4) :: planf,planb,kersize
! GPU variables
COMPLEX(fp_kind),DEVICE,DIMENSION(:), ALLOCATABLE :: kx_d,ky_d
COMPLEX(fp_kind),DEVICE,DIMENSION(:,:), ALLOCATABLE :: vold_d,v_d,nonlinhat_d
REAL (fp_kind),DEVICE,DIMENSION(:,:), ALLOCATABLE :: uold_d,u_d
! print run information
PRINT *,"Nx=", Nx
PRINT *,"Ny=", Ny
PRINT *,"Nt=", Nt
PRINT *,"Lx=", Lx
PRINT *,"Ly=", Ly
PRINT *,"dt=", dt
kersize=min(Nx,256)
ALLOCATE(kx(1:Nx),ky(1:Ny),kx_d(1:Nx),ky_d(1:Ny),x(1:Nx),y(1:Ny),&
u(1:Nx,1:Ny),uold(1:Nx,1:Ny),u_d(1:Nx,1:Ny),uold_d(1:Nx,1:Ny),&
v_d(1:Nx/2+1,1:Ny),vold_d(1:Nx/2+1,1:Ny),&
savearray(1:Nx,1:Ny),time(1:1+Nt/plotgap),enkin(1:1+Nt/plotgap+1),&
enstr(1:1+Nt/plotgap+1),enpot(1:1+Nt/plotgap+1),en(1:1+Nt/plotgap),&
nonlinhat_d(1:Nx/2+1,1:Ny),&
temp1(1:Nx,1:Ny),temp2(1:Nx,1:Ny),&
stat=allocatestatus)
IF (allocatestatus .ne. 0) stop
PRINT *,'allocated arrays'
scalemodes=1.0d0/REAL(Nx*Ny,kind(0d0))
! set up cuda ffts
call cufftPlan2D(planf,nx,ny,CUFFT_D2Z)
call cufftPlan2D(planb,nx,ny,CUFFT_Z2D)
! set up fftw ffts
CALL dfftw_plan_dft_2d_(planfxy,Nx,Ny,u,temp2,FFTW_FORWARD,FFTW_ESTIMATE)
CALL dfftw_plan_dft_2d_(planbxy,Nx,Ny,temp2,u,FFTW_BACKWARD,FFTW_ESTIMATE)
PRINT *,'Setup FFTs'
! setup grid, wave numbers
CALL getgrid(Nx,Ny,Lx,Ly,pi,name_config,x,y,kx,ky)
kx_d=kx
ky_d=ky
PRINT *,'Got grid and fourier frequencies'
CALL initialdata(Nx,Ny,x,y,u,uold)
u_d=u
uold_d=uold
plotnum=1
name_config = 'data/u'
savearray=REAL(u)
! CALL savedata(Nx,Ny,plotnum,name_config,savearray) ! disabled for benchmarking
PRINT *,'data saved'
CALL enercalc(Nx,Ny,planfxy,planbxy,dt,enkin(plotnum),enstr(plotnum),&
enpot(plotnum),en(plotnum),kx,ky,temp1,temp2,u,uold)
call cufftExecD2Z(planf,u_d,v_d)
call cufftExecD2Z(planf,uold_d,vold_d)
PRINT *,'Got initial data, starting timestepping'
time(plotnum)=0.0d0
CALL system_clock(start,count_rate)
DO n=1,Nt
!$cuf kernel do(2) <<< (1,*), (kersize,1) >>>
DO j=1,Ny
DO i=1,Nx
uold_d(i,j)=u_d(i,j)
END DO
END DO
!$cuf kernel do(2) <<< (1,*), (kersize,1) >>>
DO j=1,Ny
DO i=1,Nx
u_d(i,j)=sin(u_d(i,j))
END DO
END DO
call cufftExecD2Z(planf,u_d,nonlinhat_d)
!$cuf kernel do(2) <<< (1,*), (kersize,1) >>>
DO j=1,Ny
DO i=1,Nx/2+1
nonlinhat_d(i,j)=scalemodes*( 0.25*(kx_d(i)*kx_d(i) + ky_d(j)*ky_d(j))&
*(2.0d0*v_d(i,j)+vold_d(i,j))&
+(2.0d0*v_d(i,j)-vold_d(i,j))/(dt*dt)&
-nonlinhat_d(i,j) )&
/(1/(dt*dt)-0.25*(kx_d(i)*kx_d(i) + ky_d(j)*ky_d(j)))
END DO
END DO
!$cuf kernel do(2) <<< (1,*), (kersize,1) >>>
DO j=1,Ny
DO i=1,Nx/2+1
vold_d(i,j)=v_d(i,j)
END DO
END DO
!$cuf kernel do(2) <<< (1,*), (kersize,1) >>>
DO j=1,Ny
DO i=1,Nx/2+1
v_d(i,j)=nonlinhat_d(i,j)/scalemodes
END DO
END DO
call cufftExecZ2D(planb,nonlinhat_d,u_d)
IF (mod(n,plotgap)==0) then
plotnum=plotnum+1
time(plotnum)=n*dt
PRINT *,'time',n*dt
u=u_d
uold=uold_d
! savearray=REAL(u,kind(0d0)) ! disabled for benchmarking
! CALL savedata(Nx,Ny,plotnum,name_config,savearray)
CALL enercalc(Nx,Ny,planfxy,planbxy,dt,enkin(plotnum),enstr(plotnum),&
enpot(plotnum),en(plotnum),kx,ky,temp1,temp2,u,uold)
END IF
END DO
CALL system_clock(finish,count_rate)
PRINT *,'Finished time stepping'
u=u_d
uold=uold_d
! compute energy at the end
CALL enercalc(Nx,Ny,planfxy,planbxy,dt,enkin(plotnum+1),enstr(plotnum+1),&
enpot(plotnum+1),en(plotnum+1),kx,ky,temp1,temp2,u,uold)
PRINT*,'Program took ',&
REAL(finish-start,kind(0d0))/REAL(count_rate,kind(0d0)),&
'for Time stepping'
CALL saveresults(Nt,plotgap,time(1:1+n/plotgap),en(1:1+n/plotgap+1),&
enstr(1:1+n/plotgap+1),enkin(1:1+n/plotgap+1),enpot(1:1+n/plotgap+1))
! Save times at which output was made in text format
PRINT *,'Saved data'
call cufftDestroy(planf)
call cufftDestroy(planb)
PRINT *,'Destroy CUFFT Plans'
call dfftw_destroy_plan_(planbxy)
call dfftw_destroy_plan_(planfxy)
PRINT *,'Destroy FFTW Plans'
DEALLOCATE(kx,ky,x,y,u,uold,time,enkin,enstr,enpot,en,savearray,temp1,temp2,&
stat=allocatestatus)
IF (allocatestatus .ne. 0) STOP
PRINT *,'Deallocated host arrays'
DEALLOCATE(uold_d,vold_d,u_d,v_d,nonlinhat_d,&
kx_d,ky_d,&
stat=allocatestatus)
IF (allocatestatus .ne. 0) STOP
PRINT *,'Deallocated gpu arrays'
PRINT *,'Program execution complete'
END PROGRAM sg2d