Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,40 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
Known.Zero.setBitsFrom(LowBits);
break;
}
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
GExtractVectorElement &Extract = cast<GExtractVectorElement>(MI);
Register InVec = Extract.getVectorReg();
Register EltNo = Extract.getIndexReg();

auto ConstEltNo = getIConstantVRegVal(EltNo, MRI);

LLT VecVT = MRI.getType(InVec);
// computeKnownBits not yet implemented for scalable vectors.
if (VecVT.isScalableVector())
break;

const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
const unsigned NumSrcElts = VecVT.getNumElements();
// A return type different from the vector's element type may lead to
// issues with pattern selection. Bail out to avoid that.
if (BitWidth > EltBitWidth) {
break;
}

Known.Zero.setAllBits();
Known.One.setAllBits();

// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.
APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
if (ConstEltNo && ConstEltNo->ult(NumSrcElts))
DemandedSrcElts =
APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());

computeKnownBitsImpl(InVec, Known, DemandedSrcElts, Depth + 1);

break;
}
case TargetOpcode::G_SHUFFLE_VECTOR: {
APInt DemandedLHS, DemandedRHS;
// Collect the known bits that are shared by every vector element referenced
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ body: |
; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4
; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4
; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63
; CHECK-NEXT: %4:_ KnownBits:???????? SignBits:1
; CHECK-NEXT: %4:_ KnownBits:00001010 SignBits:4
%0:_(s8) = G_CONSTANT i8 3
%1:_(s8) = G_CONSTANT i8 10
%2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1
Expand Down
133 changes: 133 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/knownbits-extract-vector.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple aarch64 -passes="print<gisel-value-tracking>" %s -filetype=null 2>&1 | FileCheck %s

---
name: all_knownbits_const_idx
body: |
bb.0:
; CHECK-LABEL: name: @all_knownbits_const_idx
; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6
; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4
; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4
; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63
; CHECK-NEXT: %4:_ KnownBits:00001010 SignBits:4
%0:_(s8) = G_CONSTANT i8 3
%1:_(s8) = G_CONSTANT i8 10
%2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1
%idx:_(s64) = G_CONSTANT i64 1
%3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %idx
...
---
name: all_knownbits
body: |
bb.0:
; CHECK-LABEL: name: @all_knownbits
; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6
; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4
; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4
; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
; CHECK-NEXT: %4:_ KnownBits:0000?01? SignBits:4
%0:_(s8) = G_CONSTANT i8 3
%1:_(s8) = G_CONSTANT i8 10
%2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1
%idx:_(s64) = COPY $d0
%3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %idx
...
---
name: no_knownbits_const_idx
body: |
bb.0:
; CHECK-LABEL: name: @no_knownbits_const_idx
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63
; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1
%0:_(<2 x s8>) = COPY $h0
%idx:_(s64) = G_CONSTANT i64 1
%1:_(s8) = G_EXTRACT_VECTOR_ELT %0, %idx
...
---
name: no_knownbits
body: |
bb.0:
; CHECK-LABEL: name: @no_knownbits
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1
%0:_(<2 x s8>) = COPY $h0
%idx:_(s64) = COPY $d1
%1:_(s8) = G_EXTRACT_VECTOR_ELT %0, %idx
...
---
name: zext_const_idx
body: |
bb.0:
; CHECK-LABEL: name: @zext_const_idx
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
; CHECK-NEXT: %zext0:_ KnownBits:00000000???????? SignBits:8
; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63
; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8
%0:_(<2 x s8>) = COPY $h0
%zext0:_(<2 x s16>) = G_ZEXT %0
%idx:_(s64) = G_CONSTANT i64 1
%1:_(s16) = G_EXTRACT_VECTOR_ELT %zext0, %idx
...
---
name: zext
body: |
bb.0:

; CHECK-LABEL: name: @zext
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
; CHECK-NEXT: %zext0:_ KnownBits:00000000???????? SignBits:8
; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8
%0:_(<2 x s8>) = COPY $h0
%zext0:_(<2 x s16>) = G_ZEXT %0
%idx:_(s64) = COPY $d1
%1:_(s16) = G_EXTRACT_VECTOR_ELT %zext0, %idx
...
---
name: sext_const_idx
body: |
bb.0:
; CHECK-LABEL: name: @sext_const_idx
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
; CHECK-NEXT: %sext0:_ KnownBits:???????????????? SignBits:9
; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63
; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
%0:_(<2 x s8>) = COPY $h0
%sext0:_(<2 x s16>) = G_SEXT %0
%idx:_(s64) = G_CONSTANT i64 1
%1:_(s16) = G_EXTRACT_VECTOR_ELT %sext0, %idx
...
---
name: sext
body: |
bb.0:
; CHECK-LABEL: name: @sext
; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
; CHECK-NEXT: %sext0:_ KnownBits:???????????????? SignBits:9
; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
%0:_(<2 x s8>) = COPY $h0
%sext0:_(<2 x s16>) = G_SEXT %0
%idx:_(s64) = COPY $d1
%1:_(s16) = G_EXTRACT_VECTOR_ELT %sext0, %idx
...
---
# Verifies known bit computation bails if return type differs from vector
# element type. Without bailing, the 8 lowest bits of %4 would be known.
name: bail_on_different_return_type
body: |
bb.0:
; CHECK-LABEL: name: @bail_on_different_return_type
; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6
; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4
; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4
; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63
; CHECK-NEXT: %4:_ KnownBits:???????????????? SignBits:1
%0:_(s8) = G_CONSTANT i8 3
%1:_(s8) = G_CONSTANT i8 10
%2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1
%idx:_(s64) = G_CONSTANT i64 1
%3:_(s16) = G_EXTRACT_VECTOR_ELT %2, %idx
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ define <1 x i64> @mullu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: umull x8, w8, w9
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
entry:
Expand Down
70 changes: 35 additions & 35 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2757,47 +2757,47 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v4
; CGP-NEXT: v_cvt_f32_u32_e32 v1, v3
; CGP-NEXT: v_and_b32_e32 v4, 0xffffff, v6
; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v3
; CGP-NEXT: v_and_b32_e32 v4, 0xffffff, v0
; CGP-NEXT: v_and_b32_e32 v5, 0xffffff, v6
; CGP-NEXT: v_and_b32_e32 v8, 0xffffff, v2
; CGP-NEXT: v_rcp_f32_e32 v1, v1
; CGP-NEXT: v_and_b32_e32 v8, 0xffffff, v0
; CGP-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
; CGP-NEXT: v_cvt_u32_f32_e32 v5, v1
; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4
; CGP-NEXT: v_mul_lo_u32 v6, v6, v5
; CGP-NEXT: v_rcp_f32_e32 v7, v1
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v6, 0
; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v7
; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v1
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v0
; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v1
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v0, 0
; CGP-NEXT: v_sub_i32_e32 v0, vcc, 0, v4
; CGP-NEXT: v_mul_lo_u32 v5, v1, v3
; CGP-NEXT: v_mul_lo_u32 v0, v0, v6
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v5
; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v3
; CGP-NEXT: v_mul_lo_u32 v1, v1, v6
; CGP-NEXT: v_rcp_f32_e32 v7, v0
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v1, 0
; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v7
; CGP-NEXT: v_cvt_u32_f32_e32 v2, v0
; CGP-NEXT: v_add_i32_e32 v0, vcc, v6, v1
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
; CGP-NEXT: v_sub_i32_e32 v0, vcc, 0, v5
; CGP-NEXT: v_mul_lo_u32 v6, v1, v3
; CGP-NEXT: v_mul_lo_u32 v0, v0, v2
; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v1
; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v3
; CGP-NEXT: v_cndmask_b32_e32 v7, v1, v7, vcc
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v0, 0
; CGP-NEXT: v_and_b32_e32 v8, 0xffffff, v2
; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v5, v3
; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v6, v1
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v3
; CGP-NEXT: v_cndmask_b32_e32 v6, v1, v7, vcc
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, 0
; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v3
; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v2, v1
; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v1, 0
; CGP-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v7
; CGP-NEXT: v_mul_lo_u32 v5, v2, v4
; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v6
; CGP-NEXT: v_mul_lo_u32 v4, v2, v5
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v1, vcc
; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc
; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v5
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v4
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v4
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v5
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; CGP-NEXT: s_setpc_b64 s[30:31]
%num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
Expand Down