diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 6b02fefb92196..39bac818fe5d0 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -3106,7 +3106,9 @@ IntrinsicLibrary::genAtomicCas(mlir::Type resultType, .getResult(0); auto cmpxchg = mlir::LLVM::AtomicCmpXchgOp::create( builder, loc, address, arg1, arg2, successOrdering, failureOrdering); - return mlir::LLVM::ExtractValueOp::create(builder, loc, cmpxchg, 1); + mlir::Value boolResult = + mlir::LLVM::ExtractValueOp::create(builder, loc, cmpxchg, 1); + return builder.createConvert(loc, resultType, boolResult); } mlir::Value IntrinsicLibrary::genAtomicDec(mlir::Type resultType, diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 7d6caf58d71b3..5c4c3c6d39820 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -479,3 +479,16 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_bulk_s2g ! CHECL: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> + +attributes(device) subroutine testAtomicCasLoop(aa, n) + integer :: a + do while (atomiccas(a, 0, 1) == 1) + end do +end subroutine + +! CHECK-LABEL: func.func @_QPtestatomiccasloop +! CHECK: %[[CMP_XCHG:.*]] = llvm.cmpxchg %15, %c0_i32, %c1_i32 acq_rel monotonic : !llvm.ptr, i32 +! CHECK: %[[CMP_XCHG_EV:.*]] = llvm.extractvalue %[[CMP_XCHG]][1] : !llvm.struct<(i32, i1)> +! CHECK: %[[CASTED_CMP_XCHG_EV:.*]] = fir.convert %[[CMP_XCHG_EV]] : (i1) -> i32 +! CHECK: %{{.*}} = arith.constant 1 : i32 +! CHECK: %19 = arith.cmpi eq, %[[CASTED_CMP_XCHG_EV]], %{{.*}} : i32