diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index d6f23b62519fe..af7689563eba8 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -643,6 +643,40 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, Known.Zero.setBitsFrom(LowBits); break; } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: { + GExtractVectorElement &Extract = cast(MI); + Register InVec = Extract.getVectorReg(); + Register EltNo = Extract.getIndexReg(); + + auto ConstEltNo = getIConstantVRegVal(EltNo, MRI); + + LLT VecVT = MRI.getType(InVec); + // computeKnownBits not yet implemented for scalable vectors. + if (VecVT.isScalableVector()) + break; + + const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); + const unsigned NumSrcElts = VecVT.getNumElements(); + // A return type different from the vector's element type may lead to + // issues with pattern selection. Bail out to avoid that. + if (BitWidth > EltBitWidth) { + break; + } + + Known.Zero.setAllBits(); + Known.One.setAllBits(); + + // If we know the element index, just demand that vector element, else for + // an unknown element index, ignore DemandedElts and demand them all. + APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts); + if (ConstEltNo && ConstEltNo->ult(NumSrcElts)) + DemandedSrcElts = + APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); + + computeKnownBitsImpl(InVec, Known, DemandedSrcElts, Depth + 1); + + break; + } case TargetOpcode::G_SHUFFLE_VECTOR: { APInt DemandedLHS, DemandedRHS; // Collect the known bits that are shared by every vector element referenced diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-buildvector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-buildvector.mir index 3f2bb1eed572b..94ea12d3c66d9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-buildvector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-buildvector.mir @@ -22,7 +22,7 @@ body: | ; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4 ; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4 ; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63 - ; CHECK-NEXT: %4:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %4:_ KnownBits:00001010 SignBits:4 %0:_(s8) = G_CONSTANT i8 3 %1:_(s8) = G_CONSTANT i8 10 %2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-extract-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-extract-vector.mir new file mode 100644 index 0000000000000..ab576dfccc40c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-extract-vector.mir @@ -0,0 +1,133 @@ +# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple aarch64 -passes="print" %s -filetype=null 2>&1 | FileCheck %s + +--- +name: all_knownbits_const_idx +body: | + bb.0: + ; CHECK-LABEL: name: @all_knownbits_const_idx + ; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6 + ; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4 + ; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4 + ; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63 + ; CHECK-NEXT: %4:_ KnownBits:00001010 SignBits:4 + %0:_(s8) = G_CONSTANT i8 3 + %1:_(s8) = G_CONSTANT i8 10 + %2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1 + %idx:_(s64) = G_CONSTANT i64 1 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %idx +... +--- +name: all_knownbits +body: | + bb.0: + ; CHECK-LABEL: name: @all_knownbits + ; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6 + ; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4 + ; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4 + ; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 + ; CHECK-NEXT: %4:_ KnownBits:0000?01? SignBits:4 + %0:_(s8) = G_CONSTANT i8 3 + %1:_(s8) = G_CONSTANT i8 10 + %2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1 + %idx:_(s64) = COPY $d0 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %idx +... +--- +name: no_knownbits_const_idx +body: | + bb.0: + ; CHECK-LABEL: name: @no_knownbits_const_idx + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63 + ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1 + %0:_(<2 x s8>) = COPY $h0 + %idx:_(s64) = G_CONSTANT i64 1 + %1:_(s8) = G_EXTRACT_VECTOR_ELT %0, %idx +... +--- +name: no_knownbits +body: | + bb.0: + ; CHECK-LABEL: name: @no_knownbits + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 + ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1 + %0:_(<2 x s8>) = COPY $h0 + %idx:_(s64) = COPY $d1 + %1:_(s8) = G_EXTRACT_VECTOR_ELT %0, %idx +... +--- +name: zext_const_idx +body: | + bb.0: + ; CHECK-LABEL: name: @zext_const_idx + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %zext0:_ KnownBits:00000000???????? SignBits:8 + ; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63 + ; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8 + %0:_(<2 x s8>) = COPY $h0 + %zext0:_(<2 x s16>) = G_ZEXT %0 + %idx:_(s64) = G_CONSTANT i64 1 + %1:_(s16) = G_EXTRACT_VECTOR_ELT %zext0, %idx +... +--- +name: zext +body: | + bb.0: + + ; CHECK-LABEL: name: @zext + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %zext0:_ KnownBits:00000000???????? SignBits:8 + ; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 + ; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8 + %0:_(<2 x s8>) = COPY $h0 + %zext0:_(<2 x s16>) = G_ZEXT %0 + %idx:_(s64) = COPY $d1 + %1:_(s16) = G_EXTRACT_VECTOR_ELT %zext0, %idx +... +--- +name: sext_const_idx +body: | + bb.0: + ; CHECK-LABEL: name: @sext_const_idx + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %sext0:_ KnownBits:???????????????? SignBits:9 + ; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63 + ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1 + %0:_(<2 x s8>) = COPY $h0 + %sext0:_(<2 x s16>) = G_SEXT %0 + %idx:_(s64) = G_CONSTANT i64 1 + %1:_(s16) = G_EXTRACT_VECTOR_ELT %sext0, %idx +... +--- +name: sext +body: | + bb.0: + ; CHECK-LABEL: name: @sext + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %sext0:_ KnownBits:???????????????? SignBits:9 + ; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 + ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1 + %0:_(<2 x s8>) = COPY $h0 + %sext0:_(<2 x s16>) = G_SEXT %0 + %idx:_(s64) = COPY $d1 + %1:_(s16) = G_EXTRACT_VECTOR_ELT %sext0, %idx +... +--- +# Verifies known bit computation bails if return type differs from vector +# element type. Without bailing, the 8 lowest bits of %4 would be known. +name: bail_on_different_return_type +body: | + bb.0: + ; CHECK-LABEL: name: @bail_on_different_return_type + ; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6 + ; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4 + ; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4 + ; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63 + ; CHECK-NEXT: %4:_ KnownBits:???????????????? SignBits:1 + %0:_(s8) = G_CONSTANT i8 3 + %1:_(s8) = G_CONSTANT i8 10 + %2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1 + %idx:_(s64) = G_CONSTANT i64 1 + %3:_(s16) = G_EXTRACT_VECTOR_ELT %2, %idx diff --git a/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll b/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll index 93a50ec305e1e..64cb3603f53a1 100644 --- a/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll +++ b/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll @@ -734,7 +734,7 @@ define <1 x i64> @mullu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) { ; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 -; CHECK-GI-NEXT: mul x8, x8, x9 +; CHECK-GI-NEXT: umull x8, w8, w9 ; CHECK-GI-NEXT: fmov d0, x8 ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index 4f2c454e13356..8224ec56c12da 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -2757,47 +2757,47 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v4 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v3 -; CGP-NEXT: v_and_b32_e32 v4, 0xffffff, v6 -; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v3 +; CGP-NEXT: v_and_b32_e32 v4, 0xffffff, v0 +; CGP-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; CGP-NEXT: v_and_b32_e32 v8, 0xffffff, v2 ; CGP-NEXT: v_rcp_f32_e32 v1, v1 -; CGP-NEXT: v_and_b32_e32 v8, 0xffffff, v0 -; CGP-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v1 -; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 -; CGP-NEXT: v_mul_lo_u32 v6, v6, v5 -; CGP-NEXT: v_rcp_f32_e32 v7, v1 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v6, 0 -; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v7 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v1 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v0 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v1 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v0, 0 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, v1, v3 -; CGP-NEXT: v_mul_lo_u32 v0, v0, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v5 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v3 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v6 +; CGP-NEXT: v_rcp_f32_e32 v7, v0 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v1, 0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v2, v0 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v6, v1 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v6, v1, v3 +; CGP-NEXT: v_mul_lo_u32 v0, v0, v2 ; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v1 -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v3 -; CGP-NEXT: v_cndmask_b32_e32 v7, v1, v7, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v0, 0 -; CGP-NEXT: v_and_b32_e32 v8, 0xffffff, v2 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v5, v3 -; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v6, v1 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v3 +; CGP-NEXT: v_cndmask_b32_e32 v6, v1, v7, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, 0 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v3 +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v2, v1 ; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v1, 0 -; CGP-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v7 -; CGP-NEXT: v_mul_lo_u32 v5, v2, v4 +; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v6 +; CGP-NEXT: v_mul_lo_u32 v4, v2, v5 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc ; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v4 +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v5 +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; CGP-NEXT: s_setpc_b64 s[30:31] %num.mask = and <2 x i64> %num,