diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 363b1d5844d1b..b15dd29d68f65 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -282,7 +282,6 @@ struct IntrinsicLibrary { llvm::ArrayRef args); mlir::Value genGetUID(mlir::Type resultType, llvm::ArrayRef args); - mlir::Value genGlobalTimer(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genHostnm(std::optional resultType, llvm::ArrayRef args); fir::ExtendedValue genIall(mlir::Type, llvm::ArrayRef); @@ -377,6 +376,8 @@ struct IntrinsicLibrary { fir::ExtendedValue genNorm2(mlir::Type, llvm::ArrayRef); mlir::Value genNot(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genNull(mlir::Type, llvm::ArrayRef); + template + mlir::Value genNVVMTime(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genPack(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genParity(mlir::Type, llvm::ArrayRef); void genPerror(llvm::ArrayRef); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index ddfa27475fa7a..bfbc26e5e6c19 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -385,6 +385,7 @@ static constexpr IntrinsicHandler handlers[]{ &I::genChdir, {{{"name", asAddr}, {"status", asAddr, handleDynamicOptional}}}, /*isElemental=*/false}, + {"clock", &I::genNVVMTime, {}, /*isElemental=*/false}, {"clock64", &I::genClock64, {}, /*isElemental=*/false}, {"cmplx", &I::genCmplx, @@ -503,7 +504,10 @@ static constexpr IntrinsicHandler handlers[]{ {"getgid", &I::genGetGID}, {"getpid", &I::genGetPID}, {"getuid", &I::genGetUID}, - {"globaltimer", &I::genGlobalTimer, {}, /*isElemental=*/false}, + {"globaltimer", + &I::genNVVMTime, + {}, + /*isElemental=*/false}, {"hostnm", &I::genHostnm, {{{"c", asBox}, {"status", asAddr, handleDynamicOptional}}}, @@ -4320,13 +4324,6 @@ mlir::Value IntrinsicLibrary::genGetUID(mlir::Type resultType, fir::runtime::genGetUID(builder, loc)); } -// GLOBALTIMER -mlir::Value IntrinsicLibrary::genGlobalTimer(mlir::Type resultType, - llvm::ArrayRef args) { - assert(args.size() == 0 && "globalTimer takes no args"); - return builder.create(loc, resultType).getResult(); -} - // GET_COMMAND_ARGUMENT void IntrinsicLibrary::genGetCommandArgument( llvm::ArrayRef args) { @@ -7207,6 +7204,14 @@ IntrinsicLibrary::genNull(mlir::Type, llvm::ArrayRef args) { return fir::MutableBoxValue(boxStorage, mold->nonDeferredLenParams(), {}); } +// CLOCK, GLOBALTIMER +template +mlir::Value IntrinsicLibrary::genNVVMTime(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 0 && "expect no arguments"); + return builder.create(loc, resultType).getResult(); +} + // PACK fir::ExtendedValue IntrinsicLibrary::genPack(mlir::Type resultType, diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index 52a619e07165c..d0c312c09353f 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -957,11 +957,21 @@ attributes(device) pure integer function atomicxori(address, val) ! Time function + interface + attributes(device) integer function clock() + end function + end interface + interface attributes(device) integer(8) function clock64() end function end interface + interface + attributes(device) integer(8) function globalTimer() + end function + end interface + ! Warp Match Functions interface match_all_sync @@ -1613,11 +1623,6 @@ attributes(device,host) logical function on_device() bind(c) end function end interface - interface - attributes(device) integer(8) function globalTimer() - end function - end interface - contains attributes(device) subroutine syncthreads() diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 888c7961ee2b4..2d6f734670740 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -10,6 +10,7 @@ attributes(global) subroutine devsub() integer(4) :: ai integer(8) :: al integer(8) :: time + integer :: smalltime call syncthreads() call syncwarp(1) @@ -45,6 +46,7 @@ attributes(global) subroutine devsub() ai = atomicinc(ai, 1_4) ai = atomicdec(ai, 1_4) + smalltime = clock() time = clock64() time = globalTimer() @@ -84,6 +86,7 @@ end ! CHECK: %{{.*}} = llvm.atomicrmw uinc_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 ! CHECK: %{{.*}} = llvm.atomicrmw udec_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 +! CHECK: %{{.*}} = nvvm.read.ptx.sreg.clock : i32 ! CHECK: fir.call @llvm.nvvm.read.ptx.sreg.clock64() ! CHECK: %{{.*}} = nvvm.read.ptx.sreg.globaltimer : i64