diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e1e95f7004fdad..3805f70df07a81 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2977,17 +2977,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Depth + 1); case ISD::SPLAT_VECTOR: { SDValue SrcOp = Op.getOperand(0); - if (SrcOp.getValueSizeInBits() != BitWidth) { - assert(SrcOp.getValueSizeInBits() > BitWidth && - "Expected SPLAT_VECTOR implicit truncation"); - // FIXME: We should be able to truncate the known bits here to match - // the official semantics of SPLAT_VECTOR, but doing so exposes a - // Hexagon target bug which results in an infinite loop during - // DAGCombine. (See D137140 for repo). Once that's fixed, we can - // strengthen this. - break; - } - Known = computeKnownBits(SrcOp, Depth + 1); + assert(SrcOp.getValueSizeInBits() >= BitWidth && + "Expected SPLAT_VECTOR implicit truncation"); + // Implicitly truncate the bits to match the official semantics of + // SPLAT_VECTOR. + Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth); break; } case ISD::BUILD_VECTOR: diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll index cb2b498f274b04..09cb0aa28b104f 100644 --- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -100,7 +100,6 @@ define @lane_mask_nxv8i1_i8(i8 %index, i8 %TC) { ; CHECK-NEXT: mov z1.h, w1 ; CHECK-NEXT: umin z0.h, z0.h, #255 ; CHECK-NEXT: and z1.h, z1.h, #0xff -; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmphi p0.h, p0/z, z1.h, z0.h ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll index 95c312db049ecd..553acc7930982f 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll @@ -40,7 +40,6 @@ define void @select_v16f16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -56,15 +55,14 @@ define void @select_v32f16(ptr %a, ptr %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v32f16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #16 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.h ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.h, w9 -; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z4.h, #0 ; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h ; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z2.h @@ -80,7 +78,6 @@ define void @select_v32f16(ptr %a, ptr %b, i1 %mask) #0 { ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.h ; VBITS_GE_512-NEXT: mov z2.h, w8 -; VBITS_GE_512-NEXT: and z2.h, z2.h, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] @@ -101,7 +98,6 @@ define void @select_v64f16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -122,7 +118,6 @@ define void @select_v128f16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll index e10a3531f8a0e4..19d7c4212842fd 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll @@ -40,7 +40,6 @@ define void @select_v32i8(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov z2.b, w8 -; CHECK-NEXT: and z2.b, z2.b, #0x1 ; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0 ; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] @@ -56,15 +55,14 @@ define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v64i8: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov w8, #32 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.b ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.b, w9 -; VBITS_GE_256-NEXT: and z4.b, z4.b, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z4.b, #0 ; VBITS_GE_256-NEXT: sel z1.b, p1, z1.b, z3.b ; VBITS_GE_256-NEXT: sel z0.b, p1, z0.b, z2.b @@ -80,7 +78,6 @@ define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 { ; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.b ; VBITS_GE_512-NEXT: mov z2.b, w8 -; VBITS_GE_512-NEXT: and z2.b, z2.b, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.b, p1/z, z2.b, #0 ; VBITS_GE_512-NEXT: sel z0.b, p1, z0.b, z1.b ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0] @@ -101,7 +98,6 @@ define void @select_v128i8(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov z2.b, w8 -; CHECK-NEXT: and z2.b, z2.b, #0x1 ; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0 ; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] @@ -122,7 +118,6 @@ define void @select_v256i8(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov z2.b, w8 -; CHECK-NEXT: and z2.b, z2.b, #0x1 ; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0 ; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] @@ -169,7 +164,6 @@ define void @select_v16i16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -185,15 +179,14 @@ define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v32i16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #16 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.h ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.h, w9 -; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z4.h, #0 ; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h ; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z2.h @@ -209,7 +202,6 @@ define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 { ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.h ; VBITS_GE_512-NEXT: mov z2.h, w8 -; VBITS_GE_512-NEXT: and z2.h, z2.h, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] @@ -230,7 +222,6 @@ define void @select_v64i16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -251,7 +242,6 @@ define void @select_v128i16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll b/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll index 8b2455bff61e94..8ef7b8032cc0ff 100644 --- a/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll +++ b/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll @@ -49,15 +49,10 @@ define @umulo_nxv8i8( %x, % ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: and z0.h, z0.h, #0xff -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h -; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: lsr z1.h, z2.h, #8 -; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0 +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsr z1.h, z0.h, #8 ; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0 -; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b -; CHECK-NEXT: mov z2.h, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv8i8( %x, %y) %b = extractvalue { , } %a, 0 diff --git a/llvm/test/CodeGen/WebAssembly/pr59626.ll b/llvm/test/CodeGen/WebAssembly/pr59626.ll index 33b85fb2de098d..a2324b0b48a78b 100644 --- a/llvm/test/CodeGen/WebAssembly/pr59626.ll +++ b/llvm/test/CodeGen/WebAssembly/pr59626.ll @@ -5,79 +5,38 @@ define i8 @f(ptr %0, ptr %1) { ; CHECK-32-LABEL: f: ; CHECK-32: .functype f (i32, i32) -> (i32) -; CHECK-32-NEXT: .local v128 ; CHECK-32-NEXT: # %bb.0: # %BB ; CHECK-32-NEXT: local.get 0 ; CHECK-32-NEXT: i32.const 0 ; CHECK-32-NEXT: i32.store8 2 ; CHECK-32-NEXT: local.get 0 -; CHECK-32-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-32-NEXT: v128.store16_lane 0, 0 -; CHECK-32-NEXT: local.get 1 -; CHECK-32-NEXT: i32.const 5 -; CHECK-32-NEXT: v128.const 0, 0 -; CHECK-32-NEXT: i32x4.extract_lane 0 -; CHECK-32-NEXT: i8x16.splat -; CHECK-32-NEXT: v128.const 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 -; CHECK-32-NEXT: v128.and -; CHECK-32-NEXT: local.tee 2 -; CHECK-32-NEXT: i8x16.extract_lane_u 2 -; CHECK-32-NEXT: i32.div_u -; CHECK-32-NEXT: i32.store8 2 +; CHECK-32-NEXT: i32.const 0 +; CHECK-32-NEXT: i32.store16 0 ; CHECK-32-NEXT: local.get 1 -; CHECK-32-NEXT: i32.const 1 -; CHECK-32-NEXT: local.get 2 -; CHECK-32-NEXT: i8x16.extract_lane_u 0 -; CHECK-32-NEXT: local.tee 0 -; CHECK-32-NEXT: i32.const 1 -; CHECK-32-NEXT: i32.and -; CHECK-32-NEXT: i32.div_u +; CHECK-32-NEXT: local.get 0 ; CHECK-32-NEXT: i8x16.splat -; CHECK-32-NEXT: i32.const 3 -; CHECK-32-NEXT: local.get 2 -; CHECK-32-NEXT: i8x16.extract_lane_u 1 -; CHECK-32-NEXT: i32.div_u -; CHECK-32-NEXT: i8x16.replace_lane 1 ; CHECK-32-NEXT: v128.store16_lane 0, 0 -; CHECK-32-NEXT: local.get 0 +; CHECK-32-NEXT: v128.const 0, 0 +; CHECK-32-NEXT: i32x4.extract_lane 0 ; CHECK-32-NEXT: # fallthrough-return ; ; CHECK-64-LABEL: f: ; CHECK-64: .functype f (i64, i64) -> (i32) -; CHECK-64-NEXT: .local v128, i32 +; CHECK-64-NEXT: .local i32 ; CHECK-64-NEXT: # %bb.0: # %BB ; CHECK-64-NEXT: local.get 0 ; CHECK-64-NEXT: i32.const 0 ; CHECK-64-NEXT: i32.store8 2 ; CHECK-64-NEXT: local.get 0 -; CHECK-64-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-64-NEXT: v128.store16_lane 0, 0 -; CHECK-64-NEXT: drop -; CHECK-64-NEXT: local.get 1 -; CHECK-64-NEXT: i32.const 5 -; CHECK-64-NEXT: v128.const 0, 0 -; CHECK-64-NEXT: i32x4.extract_lane 0 -; CHECK-64-NEXT: i8x16.splat -; CHECK-64-NEXT: v128.const 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 -; CHECK-64-NEXT: v128.and -; CHECK-64-NEXT: local.tee 2 -; CHECK-64-NEXT: i8x16.extract_lane_u 2 -; CHECK-64-NEXT: i32.const 1 -; CHECK-64-NEXT: i32.and -; CHECK-64-NEXT: i32.div_u -; CHECK-64-NEXT: i32.store8 2 +; CHECK-64-NEXT: i32.const 0 +; CHECK-64-NEXT: i32.store16 0 ; CHECK-64-NEXT: local.get 1 -; CHECK-64-NEXT: i32.const 1 ; CHECK-64-NEXT: local.get 2 -; CHECK-64-NEXT: i8x16.extract_lane_u 0 -; CHECK-64-NEXT: local.tee 3 -; CHECK-64-NEXT: i32.const 1 -; CHECK-64-NEXT: i32.and -; CHECK-64-NEXT: i32.div_u ; CHECK-64-NEXT: i8x16.splat ; CHECK-64-NEXT: v128.store16_lane 0, 0 ; CHECK-64-NEXT: drop -; CHECK-64-NEXT: local.get 3 +; CHECK-64-NEXT: v128.const 0, 0 +; CHECK-64-NEXT: i32x4.extract_lane 0 ; CHECK-64-NEXT: # fallthrough-return BB: store <3 x i8> zeroinitializer, ptr %0