diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h index 490d1a34cc846..3bf9d694b1b21 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h @@ -37,8 +37,6 @@ class LLVM_ABI GISelValueTracking : public GISelChangeObserver { const TargetLowering &TL; const DataLayout &DL; unsigned MaxDepth; - /// Cache maintained during a computeKnownBits request. - SmallDenseMap ComputeKnownBitsCache; void computeKnownBitsMin(Register Src0, Register Src1, KnownBits &Known, const APInt &DemandedElts, unsigned Depth = 0); diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 974fc40de6222..f2ad56e0efcde 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -93,12 +93,8 @@ KnownBits GISelValueTracking::getKnownBits(Register R) { KnownBits GISelValueTracking::getKnownBits(Register R, const APInt &DemandedElts, unsigned Depth) { - // For now, we only maintain the cache during one request. - assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared"); - KnownBits Known; computeKnownBitsImpl(R, Known, DemandedElts, Depth); - ComputeKnownBitsCache.clear(); return Known; } @@ -187,14 +183,6 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, #endif unsigned BitWidth = DstTy.getScalarSizeInBits(); - auto CacheEntry = ComputeKnownBitsCache.find(R); - if (CacheEntry != ComputeKnownBitsCache.end()) { - Known = CacheEntry->second; - LLVM_DEBUG(dbgs() << "Cache hit at "); - LLVM_DEBUG(dumpResult(MI, Known, Depth)); - assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match"); - return; - } Known = KnownBits(BitWidth); // Don't know anything // Depth may get bigger than max depth if it gets passed to a different @@ -254,16 +242,6 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, // point of the pipeline, otherwise the main live-range will be // defined more than once, which is against SSA. assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?"); - // Record in the cache that we know nothing for MI. - // This will get updated later and in the meantime, if we reach that - // phi again, because of a loop, we will cut the search thanks to this - // cache entry. - // We could actually build up more information on the phi by not cutting - // the search, but that additional information is more a side effect - // than an intended choice. - // Therefore, for now, save on compile time until we derive a proper way - // to derive known bits for PHIs within loops. - ComputeKnownBitsCache[R] = KnownBits(BitWidth); // PHI's operand are a mix of registers and basic blocks interleaved. // We only care about the register ones. for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { @@ -700,9 +678,6 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, } LLVM_DEBUG(dumpResult(MI, Known, Depth)); - - // Update the cache. - ComputeKnownBitsCache[R] = Known; } static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty) { diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll index c57383ad9b1e7..3934231cb0cfe 100644 --- a/llvm/test/CodeGen/AArch64/rem-by-const.ll +++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll @@ -88,7 +88,7 @@ define i8 @ui8_7(i8 %a, i8 %b) { ; CHECK-GI-NEXT: sub w9, w0, w8 ; CHECK-GI-NEXT: ubfx w9, w9, #1, #7 ; CHECK-GI-NEXT: add w8, w9, w8 -; CHECK-GI-NEXT: ubfx w8, w8, #2, #6 +; CHECK-GI-NEXT: lsr w8, w8, #2 ; CHECK-GI-NEXT: lsl w9, w8, #3 ; CHECK-GI-NEXT: sub w8, w9, w8 ; CHECK-GI-NEXT: sub w0, w0, w8 @@ -207,7 +207,7 @@ define i16 @ui16_7(i16 %a, i16 %b) { ; CHECK-GI-NEXT: sub w9, w0, w8 ; CHECK-GI-NEXT: ubfx w9, w9, #1, #15 ; CHECK-GI-NEXT: add w8, w9, w8 -; CHECK-GI-NEXT: ubfx w8, w8, #2, #14 +; CHECK-GI-NEXT: lsr w8, w8, #2 ; CHECK-GI-NEXT: lsl w9, w8, #3 ; CHECK-GI-NEXT: sub w8, w9, w8 ; CHECK-GI-NEXT: sub w0, w0, w8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index f57fc005b994b..9ffc565d9d47a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -1186,77 +1186,77 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: s_subb_u32 s6, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; GISEL-NEXT: v_trunc_f32_e32 v8, v5 -; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v8 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0 -; GISEL-NEXT: v_mov_b32_e32 v9, v5 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v8, v[9:10] -; GISEL-NEXT: v_mul_hi_u32 v11, v7, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, v8, v4 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[9:10] -; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 -; GISEL-NEXT: v_mul_lo_u32 v13, v7, v9 -; GISEL-NEXT: v_mul_lo_u32 v4, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v10, v13 +; GISEL-NEXT: v_trunc_f32_e32 v7, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v7 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0 +; GISEL-NEXT: v_mov_b32_e32 v7, v5 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v4 +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], s6, v8, v[10:11] +; GISEL-NEXT: v_mul_lo_u32 v10, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v11, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v8, v13 +; GISEL-NEXT: v_mul_lo_u32 v4, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_mul_hi_u32 v14, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; GISEL-NEXT: v_mul_hi_u32 v14, v8, v13 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v7, v4 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v8, v4 ; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 -; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v9, v7, vcc ; GISEL-NEXT: v_mov_b32_e32 v4, v14 -; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] -; GISEL-NEXT: v_mul_lo_u32 v4, v17, v13 +; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v7, v[4:5] +; GISEL-NEXT: v_mul_lo_u32 v4, v7, v13 ; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15] ; GISEL-NEXT: s_mov_b32 s6, 1 ; GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GISEL-NEXT: v_mul_lo_u32 v9, v16, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, v16, v14 ; GISEL-NEXT: s_subb_u32 s6, 0, 0 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v16, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v15 +; GISEL-NEXT: v_mul_hi_u32 v15, v16, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v15, v7, v14 +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v16, v14 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_mul_hi_u32 v9, v17, v13 -; GISEL-NEXT: v_mul_lo_u32 v13, v17, v14 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; GISEL-NEXT: v_mul_hi_u32 v15, v16, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v9, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v9 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_xor_b32_e32 v18, v0, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, v17, v14 -; GISEL-NEXT: v_xor_b32_e32 v19, v1, v9 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v4 +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_xor_b32_e32 v18, v0, v4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v17 +; GISEL-NEXT: v_mul_hi_u32 v13, v7, v14 +; GISEL-NEXT: v_xor_b32_e32 v19, v1, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v15, v1 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc ; GISEL-NEXT: v_mul_lo_u32 v13, v19, v0 ; GISEL-NEXT: v_mul_lo_u32 v14, v18, v1 ; GISEL-NEXT: v_mul_hi_u32 v15, v18, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v19, v0 -; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb +; GISEL-NEXT: v_mov_b32_e32 v7, 0x12d8fb ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 @@ -1271,144 +1271,147 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v0, v13 ; GISEL-NEXT: v_mul_hi_u32 v16, v19, v1 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v15, 0 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v15, 0 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v13 -; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v16, v[1:2] +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v7, v16, v[1:2] ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v18, v0 +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], 0, v15, v[13:14] ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v19, v13, vcc ; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], v19, v13 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 ; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v13, vcc -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 ; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v15 ; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; GISEL-NEXT: v_cndmask_b32_e32 v18, -1, v0, vcc ; GISEL-NEXT: v_mov_b32_e32 v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v14, -1, v14, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, v[0:1] ; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v17, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v7, v0 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v18 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GISEL-NEXT: v_mul_hi_u32 v10, v7, v0 +; GISEL-NEXT: v_mul_hi_u32 v10, v8, v0 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v1 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v1 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GISEL-NEXT: v_cndmask_b32_e32 v11, v16, v5, vcc -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v11, v9 -; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2] ; GISEL-NEXT: v_cndmask_b32_e32 v10, v15, v13, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc -; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11 -; GISEL-NEXT: v_mul_lo_u32 v2, v8, v0 -; GISEL-NEXT: v_mul_lo_u32 v6, v7, v5 -; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v3, v7, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 +; GISEL-NEXT: v_xor_b32_e32 v1, v10, v4 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v8, v[5:6] +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v12, v2, v10 +; GISEL-NEXT: v_mul_lo_u32 v2, v9, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v8, v5 +; GISEL-NEXT: v_xor_b32_e32 v13, v3, v10 +; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v8, v5 +; GISEL-NEXT: v_mul_lo_u32 v3, v9, v5 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v8, v5 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v3 -; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2 -; GISEL-NEXT: v_xor_b32_e32 v10, v10, v9 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v3 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v9 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_xor_b32_e32 v8, v11, v4 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2 -; GISEL-NEXT: v_mul_hi_u32 v3, v13, v3 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v13, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v5, v12, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v3, v5 -; GISEL-NEXT: v_mul_hi_u32 v8, v13, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v5 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4] +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v6, v13, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v9, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v7, v11, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v9, v[5:6] ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 -; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc -; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 -; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 -; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v7 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v7 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, -1, v5, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v9 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v11, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v6 -; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v6, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v7, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_sdiv_v2i64_oddk_denom: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index 19dc20c510041..82279e641ed63 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -1112,67 +1112,67 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: s_subb_u32 s6, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; GISEL-NEXT: v_trunc_f32_e32 v8, v5 -; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v8 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0 -; GISEL-NEXT: v_mov_b32_e32 v9, v5 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v8, v[9:10] -; GISEL-NEXT: v_mul_hi_u32 v11, v7, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, v8, v4 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[9:10] -; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 -; GISEL-NEXT: v_mul_lo_u32 v13, v7, v9 -; GISEL-NEXT: v_mul_lo_u32 v4, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v10, v13 +; GISEL-NEXT: v_trunc_f32_e32 v7, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v7 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0 +; GISEL-NEXT: v_mov_b32_e32 v7, v5 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v4 +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], s6, v8, v[10:11] +; GISEL-NEXT: v_mul_lo_u32 v10, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v11, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v8, v13 +; GISEL-NEXT: v_mul_lo_u32 v4, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_mul_hi_u32 v14, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; GISEL-NEXT: v_mul_hi_u32 v14, v8, v13 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v7, v4 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v8, v4 ; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 -; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v9, v7, vcc ; GISEL-NEXT: v_mov_b32_e32 v4, v14 ; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] ; GISEL-NEXT: v_mul_lo_u32 v4, v17, v13 ; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15] ; GISEL-NEXT: s_mov_b32 s6, 1 ; GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GISEL-NEXT: v_mul_lo_u32 v9, v16, v14 +; GISEL-NEXT: v_mul_lo_u32 v7, v16, v14 ; GISEL-NEXT: s_subb_u32 s6, 0, 0 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v16, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v16, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_mul_hi_u32 v9, v17, v13 +; GISEL-NEXT: v_mul_hi_u32 v7, v17, v13 ; GISEL-NEXT: v_mul_lo_u32 v13, v17, v14 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4 ; GISEL-NEXT: v_mul_hi_u32 v15, v16, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v9, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v9 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_xor_b32_e32 v18, v0, v9 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v7, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v7 +; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc +; GISEL-NEXT: v_xor_b32_e32 v18, v0, v7 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v4 ; GISEL-NEXT: v_mul_hi_u32 v4, v17, v14 -; GISEL-NEXT: v_xor_b32_e32 v19, v1, v9 +; GISEL-NEXT: v_xor_b32_e32 v19, v1, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 @@ -1195,13 +1195,14 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_mul_hi_u32 v15, v19, v1 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v0, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v19, v1 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v15, 0 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 ; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2] +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], 0, v15, v[13:14] ; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v18, v0 ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v19, v13 ; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], v19, v13, vcc @@ -1217,94 +1218,96 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cndmask_b32_e32 v18, -1, v0, vcc ; GISEL-NEXT: v_mov_b32_e32 v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, v[0:1] ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v16, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v17, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v7, v0 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v16, v16, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v18 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GISEL-NEXT: v_mul_hi_u32 v10, v7, v0 +; GISEL-NEXT: v_mul_hi_u32 v10, v8, v0 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v1 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v1 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v5, vcc -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v11, v9 -; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2] ; GISEL-NEXT: v_cndmask_b32_e32 v10, v14, v16, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc -; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11 -; GISEL-NEXT: v_mul_lo_u32 v2, v8, v0 -; GISEL-NEXT: v_mul_lo_u32 v6, v7, v5 -; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v3, v7, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 +; GISEL-NEXT: v_xor_b32_e32 v1, v10, v7 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v8, v[5:6] +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v12, v2, v10 +; GISEL-NEXT: v_mul_lo_u32 v2, v9, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v8, v5 +; GISEL-NEXT: v_xor_b32_e32 v13, v3, v10 +; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v8, v5 +; GISEL-NEXT: v_mul_lo_u32 v3, v9, v5 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v8, v5 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v3 -; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2 -; GISEL-NEXT: v_xor_b32_e32 v10, v10, v9 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v3 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v9 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_xor_b32_e32 v8, v11, v7 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2 -; GISEL-NEXT: v_mul_hi_u32 v3, v13, v3 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v13, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v5, v12, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; GISEL-NEXT: v_mul_hi_u32 v7, v13, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4] +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v6, v13, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v9, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v7 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], 0, v9, v[5:6] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v7, vcc ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc ; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 @@ -1327,10 +1330,10 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64_pow2k_denom: @@ -1705,67 +1708,67 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: s_subb_u32 s6, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; GISEL-NEXT: v_trunc_f32_e32 v8, v5 -; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v8 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0 -; GISEL-NEXT: v_mov_b32_e32 v9, v5 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v8, v[9:10] -; GISEL-NEXT: v_mul_hi_u32 v11, v7, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, v8, v4 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[9:10] -; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 -; GISEL-NEXT: v_mul_lo_u32 v13, v7, v9 -; GISEL-NEXT: v_mul_lo_u32 v4, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v10, v13 +; GISEL-NEXT: v_trunc_f32_e32 v7, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v7 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0 +; GISEL-NEXT: v_mov_b32_e32 v7, v5 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v4 +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], s6, v8, v[10:11] +; GISEL-NEXT: v_mul_lo_u32 v10, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v11, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v8, v13 +; GISEL-NEXT: v_mul_lo_u32 v4, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_mul_hi_u32 v14, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; GISEL-NEXT: v_mul_hi_u32 v14, v8, v13 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v7, v4 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v8, v4 ; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 -; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v9, v7, vcc ; GISEL-NEXT: v_mov_b32_e32 v4, v14 ; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] ; GISEL-NEXT: v_mul_lo_u32 v4, v17, v13 ; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15] ; GISEL-NEXT: s_mov_b32 s6, 1 ; GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GISEL-NEXT: v_mul_lo_u32 v9, v16, v14 +; GISEL-NEXT: v_mul_lo_u32 v7, v16, v14 ; GISEL-NEXT: s_subb_u32 s6, 0, 0 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v16, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v16, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_mul_hi_u32 v9, v17, v13 +; GISEL-NEXT: v_mul_hi_u32 v7, v17, v13 ; GISEL-NEXT: v_mul_lo_u32 v13, v17, v14 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4 ; GISEL-NEXT: v_mul_hi_u32 v15, v16, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v9, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v9 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_xor_b32_e32 v18, v0, v9 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v7, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v7 +; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc +; GISEL-NEXT: v_xor_b32_e32 v18, v0, v7 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v4 ; GISEL-NEXT: v_mul_hi_u32 v4, v17, v14 -; GISEL-NEXT: v_xor_b32_e32 v19, v1, v9 +; GISEL-NEXT: v_xor_b32_e32 v19, v1, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 @@ -1788,13 +1791,14 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_mul_hi_u32 v15, v19, v1 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v0, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v19, v1 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v15, 0 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 ; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2] +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], 0, v15, v[13:14] ; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v18, v0 ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v19, v13 ; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], v19, v13, vcc @@ -1810,94 +1814,96 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cndmask_b32_e32 v18, -1, v0, vcc ; GISEL-NEXT: v_mov_b32_e32 v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, v[0:1] ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v16, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v17, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v7, v0 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v16, v16, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v18 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GISEL-NEXT: v_mul_hi_u32 v10, v7, v0 +; GISEL-NEXT: v_mul_hi_u32 v10, v8, v0 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v1 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v1 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v5, vcc -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v11, v9 -; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2] ; GISEL-NEXT: v_cndmask_b32_e32 v10, v14, v16, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc -; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11 -; GISEL-NEXT: v_mul_lo_u32 v2, v8, v0 -; GISEL-NEXT: v_mul_lo_u32 v6, v7, v5 -; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v3, v7, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 +; GISEL-NEXT: v_xor_b32_e32 v1, v10, v7 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v8, v[5:6] +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v12, v2, v10 +; GISEL-NEXT: v_mul_lo_u32 v2, v9, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v8, v5 +; GISEL-NEXT: v_xor_b32_e32 v13, v3, v10 +; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v8, v5 +; GISEL-NEXT: v_mul_lo_u32 v3, v9, v5 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v8, v5 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v3 -; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2 -; GISEL-NEXT: v_xor_b32_e32 v10, v10, v9 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v3 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v9 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_xor_b32_e32 v8, v11, v7 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2 -; GISEL-NEXT: v_mul_hi_u32 v3, v13, v3 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v13, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v5, v12, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; GISEL-NEXT: v_mul_hi_u32 v7, v13, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4] +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v6, v13, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v9, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v7 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], 0, v9, v[5:6] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v7, vcc ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc ; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 @@ -1920,10 +1926,10 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64_oddk_denom: diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll index 6864afe3855f4..225ceed9627b7 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll @@ -240,7 +240,6 @@ define i8 @udiv8_constant_add(i8 %a) nounwind { ; RV32-NEXT: zext.b a0, a0 ; RV32-NEXT: srli a0, a0, 1 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: zext.b a0, a0 ; RV32-NEXT: srli a0, a0, 2 ; RV32-NEXT: ret ; @@ -254,7 +253,6 @@ define i8 @udiv8_constant_add(i8 %a) nounwind { ; RV64-NEXT: zext.b a0, a0 ; RV64-NEXT: srli a0, a0, 1 ; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: zext.b a0, a0 ; RV64-NEXT: srli a0, a0, 2 ; RV64-NEXT: ret %1 = udiv i8 %a, 7 @@ -317,7 +315,6 @@ define i16 @udiv16_constant_add(i16 %a) nounwind { ; RV32IM-NEXT: and a0, a0, a2 ; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: add a0, a0, a1 -; RV32IM-NEXT: and a0, a0, a2 ; RV32IM-NEXT: srli a0, a0, 2 ; RV32IM-NEXT: ret ; @@ -332,7 +329,6 @@ define i16 @udiv16_constant_add(i16 %a) nounwind { ; RV32IMZB-NEXT: zext.h a0, a0 ; RV32IMZB-NEXT: srli a0, a0, 1 ; RV32IMZB-NEXT: add a0, a0, a1 -; RV32IMZB-NEXT: zext.h a0, a0 ; RV32IMZB-NEXT: srli a0, a0, 2 ; RV32IMZB-NEXT: ret ; @@ -349,7 +345,6 @@ define i16 @udiv16_constant_add(i16 %a) nounwind { ; RV64IM-NEXT: and a0, a0, a2 ; RV64IM-NEXT: srli a0, a0, 1 ; RV64IM-NEXT: add a0, a0, a1 -; RV64IM-NEXT: and a0, a0, a2 ; RV64IM-NEXT: srli a0, a0, 2 ; RV64IM-NEXT: ret ; @@ -364,7 +359,6 @@ define i16 @udiv16_constant_add(i16 %a) nounwind { ; RV64IMZB-NEXT: zext.h a0, a0 ; RV64IMZB-NEXT: srli a0, a0, 1 ; RV64IMZB-NEXT: add a0, a0, a1 -; RV64IMZB-NEXT: zext.h a0, a0 ; RV64IMZB-NEXT: srli a0, a0, 2 ; RV64IMZB-NEXT: ret %1 = udiv i16 %a, 7 diff --git a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp index 089fb00d6080d..8563d7f1f15c9 100644 --- a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp @@ -190,7 +190,7 @@ TEST_F(AArch64GISelMITest, TestKnownBitsDecreasingCstPHIWithLoop) { // Therefore, %14's known zero are 0x80 shifted by one 0xC0. // If we had simulated the loop we could have more zero bits, basically // up to 0xFC (count leading zero of 5, + 1). - EXPECT_EQ((uint64_t)0xC0, Res.Zero.getZExtValue()); + EXPECT_EQ((uint64_t)0xFC, Res.Zero.getZExtValue()); KnownBits Res2 = Info.getKnownBits(DstReg); EXPECT_EQ(Res.One.getZExtValue(), Res2.One.getZExtValue()); diff --git a/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp b/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp index 73ddf0c88d3ed..6b70ae9739179 100644 --- a/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp @@ -220,7 +220,7 @@ TEST_F(AArch64GISelMITest, TestKnownBitsVectorDecreasingCstPHIWithLoop) { GISelValueTracking Info(*MF, /*MaxDepth=*/24); KnownBits Res = Info.getKnownBits(SrcReg); EXPECT_EQ((uint64_t)0, Res.One.getZExtValue()); - EXPECT_EQ((uint64_t)0xC0, Res.Zero.getZExtValue()); + EXPECT_EQ((uint64_t)0xFC, Res.Zero.getZExtValue()); KnownBits Res2 = Info.getKnownBits(DstReg); EXPECT_EQ(Res.One.getZExtValue(), Res2.One.getZExtValue());