diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 4b0dd9f1e6485..4818514191d5b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4814,8 +4814,15 @@ AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { case AtomicRMWInst::FMax: case AtomicRMWInst::FMin: return AtomicExpansionKind::CmpXChg; - default: - return AtomicExpansionKind::None; + default: { + if (auto *IntTy = dyn_cast(RMW->getType())) { + unsigned Size = IntTy->getBitWidth(); + if (Size == 32 || Size == 64) + return AtomicExpansionKind::None; + } + + return AtomicExpansionKind::CmpXChg; + } } } diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll index b5b7661298f5e..b76886f3ff8b9 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll @@ -4,8 +4,30 @@ define i16 @test_atomicrmw_xchg_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_xchg_i16_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg i16 addrspace(1)* [[PTR:%.*]], i16 [[VALUE:%.*]] seq_cst, align 2 -; CHECK-NEXT: ret i16 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED]] ; %res = atomicrmw xchg i16 addrspace(1)* %ptr, i16 %value seq_cst ret i16 %res @@ -13,8 +35,30 @@ define i16 @test_atomicrmw_xchg_i16_global(i16 addrspace(1)* %ptr, i16 %value) { define i16 @test_atomicrmw_xchg_i16_global_align4(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_xchg_i16_global_align4( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg i16 addrspace(1)* [[PTR:%.*]], i16 [[VALUE:%.*]] seq_cst, align 4 -; CHECK-NEXT: ret i16 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED]] ; %res = atomicrmw xchg i16 addrspace(1)* %ptr, i16 %value seq_cst, align 4 ret i16 %res @@ -22,8 +66,32 @@ define i16 @test_atomicrmw_xchg_i16_global_align4(i16 addrspace(1)* %ptr, i16 %v define i16 @test_atomicrmw_add_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_add_i16_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw add i16 addrspace(1)* [[PTR:%.*]], i16 [[VALUE:%.*]] seq_cst, align 2 -; CHECK-NEXT: ret i16 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED]] ; %res = atomicrmw add i16 addrspace(1)* %ptr, i16 %value seq_cst ret i16 %res @@ -31,8 +99,32 @@ define i16 @test_atomicrmw_add_i16_global(i16 addrspace(1)* %ptr, i16 %value) { define i16 @test_atomicrmw_add_i16_global_align4(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_add_i16_global_align4( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw add i16 addrspace(1)* [[PTR:%.*]], i16 [[VALUE:%.*]] seq_cst, align 4 -; CHECK-NEXT: ret i16 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED]] ; %res = atomicrmw add i16 addrspace(1)* %ptr, i16 %value seq_cst, align 4 ret i16 %res @@ -40,8 +132,32 @@ define i16 @test_atomicrmw_add_i16_global_align4(i16 addrspace(1)* %ptr, i16 %va define i16 @test_atomicrmw_sub_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_sub_i16_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw sub i16 addrspace(1)* [[PTR:%.*]], i16 [[VALUE:%.*]] seq_cst, align 2 -; CHECK-NEXT: ret i16 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED]] ; %res = atomicrmw sub i16 addrspace(1)* %ptr, i16 %value seq_cst ret i16 %res @@ -147,8 +263,36 @@ define i16 @test_atomicrmw_xor_i16_global(i16 addrspace(1)* %ptr, i16 %value) { define i16 @test_atomicrmw_max_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_max_i16_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw max i16 addrspace(1)* [[PTR:%.*]], i16 [[VALUE:%.*]] seq_cst, align 2 -; CHECK-NEXT: ret i16 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i16 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i16 [[EXTRACTED]], i16 [[VALUE]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] ; %res = atomicrmw max i16 addrspace(1)* %ptr, i16 %value seq_cst ret i16 %res @@ -156,8 +300,36 @@ define i16 @test_atomicrmw_max_i16_global(i16 addrspace(1)* %ptr, i16 %value) { define i16 @test_atomicrmw_min_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_min_i16_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw min i16 addrspace(1)* [[PTR:%.*]], i16 [[VALUE:%.*]] seq_cst, align 2 -; CHECK-NEXT: ret i16 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP6:%.*]] = icmp sle i16 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i16 [[EXTRACTED]], i16 [[VALUE]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] ; %res = atomicrmw min i16 addrspace(1)* %ptr, i16 %value seq_cst ret i16 %res @@ -165,8 +337,36 @@ define i16 @test_atomicrmw_min_i16_global(i16 addrspace(1)* %ptr, i16 %value) { define i16 @test_atomicrmw_umax_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_umax_i16_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw umax i16 addrspace(1)* [[PTR:%.*]], i16 [[VALUE:%.*]] seq_cst, align 2 -; CHECK-NEXT: ret i16 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i16 [[EXTRACTED]], i16 [[VALUE]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] ; %res = atomicrmw umax i16 addrspace(1)* %ptr, i16 %value seq_cst ret i16 %res @@ -174,8 +374,36 @@ define i16 @test_atomicrmw_umax_i16_global(i16 addrspace(1)* %ptr, i16 %value) { define i16 @test_atomicrmw_umin_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_umin_i16_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw umin i16 addrspace(1)* [[PTR:%.*]], i16 [[VALUE:%.*]] seq_cst, align 2 -; CHECK-NEXT: ret i16 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ule i16 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i16 [[EXTRACTED]], i16 [[VALUE]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] ; %res = atomicrmw umin i16 addrspace(1)* %ptr, i16 %value seq_cst ret i16 %res diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll index ae8656916b60d..d968c4876f61b 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll @@ -4,8 +4,30 @@ define i8 @test_atomicrmw_xchg_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_xchg_i8_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg i8 addrspace(1)* [[PTR:%.*]], i8 [[VALUE:%.*]] seq_cst, align 1 -; CHECK-NEXT: ret i8 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED]] ; %res = atomicrmw xchg i8 addrspace(1)* %ptr, i8 %value seq_cst ret i8 %res @@ -13,8 +35,32 @@ define i8 @test_atomicrmw_xchg_i8_global(i8 addrspace(1)* %ptr, i8 %value) { define i8 @test_atomicrmw_add_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_add_i8_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw add i8 addrspace(1)* [[PTR:%.*]], i8 [[VALUE:%.*]] seq_cst, align 1 -; CHECK-NEXT: ret i8 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED]] ; %res = atomicrmw add i8 addrspace(1)* %ptr, i8 %value seq_cst ret i8 %res @@ -22,8 +68,32 @@ define i8 @test_atomicrmw_add_i8_global(i8 addrspace(1)* %ptr, i8 %value) { define i8 @test_atomicrmw_sub_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_sub_i8_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw sub i8 addrspace(1)* [[PTR:%.*]], i8 [[VALUE:%.*]] seq_cst, align 1 -; CHECK-NEXT: ret i8 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED]] ; %res = atomicrmw sub i8 addrspace(1)* %ptr, i8 %value seq_cst ret i8 %res @@ -129,8 +199,36 @@ define i8 @test_atomicrmw_xor_i8_global(i8 addrspace(1)* %ptr, i8 %value) { define i8 @test_atomicrmw_max_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_max_i8_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw max i8 addrspace(1)* [[PTR:%.*]], i8 [[VALUE:%.*]] seq_cst, align 1 -; CHECK-NEXT: ret i8 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i8 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i8 [[EXTRACTED]], i8 [[VALUE]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] ; %res = atomicrmw max i8 addrspace(1)* %ptr, i8 %value seq_cst ret i8 %res @@ -138,8 +236,36 @@ define i8 @test_atomicrmw_max_i8_global(i8 addrspace(1)* %ptr, i8 %value) { define i8 @test_atomicrmw_min_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_min_i8_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw min i8 addrspace(1)* [[PTR:%.*]], i8 [[VALUE:%.*]] seq_cst, align 1 -; CHECK-NEXT: ret i8 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP6:%.*]] = icmp sle i8 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i8 [[EXTRACTED]], i8 [[VALUE]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] ; %res = atomicrmw min i8 addrspace(1)* %ptr, i8 %value seq_cst ret i8 %res @@ -147,8 +273,36 @@ define i8 @test_atomicrmw_min_i8_global(i8 addrspace(1)* %ptr, i8 %value) { define i8 @test_atomicrmw_umax_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_umax_i8_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw umax i8 addrspace(1)* [[PTR:%.*]], i8 [[VALUE:%.*]] seq_cst, align 1 -; CHECK-NEXT: ret i8 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i8 [[EXTRACTED]], i8 [[VALUE]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] ; %res = atomicrmw umax i8 addrspace(1)* %ptr, i8 %value seq_cst ret i8 %res @@ -156,8 +310,36 @@ define i8 @test_atomicrmw_umax_i8_global(i8 addrspace(1)* %ptr, i8 %value) { define i8 @test_atomicrmw_umin_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_umin_i8_global( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw umin i8 addrspace(1)* [[PTR:%.*]], i8 [[VALUE:%.*]] seq_cst, align 1 -; CHECK-NEXT: ret i8 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ule i8 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i8 [[EXTRACTED]], i8 [[VALUE]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] ; %res = atomicrmw umin i8 addrspace(1)* %ptr, i8 %value seq_cst ret i8 %res