diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index fc6c2903471a8..de12e49228e28 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -319,6 +319,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // Support vector extending for (auto T : MVT::integer_fixedlen_vector_valuetypes()) { + setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Custom); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom); } @@ -1136,7 +1137,27 @@ void WebAssemblyTargetLowering::computeKnownBitsForTargetNode( } break; } - + case WebAssemblyISD::EXTEND_LOW_U: + case WebAssemblyISD::EXTEND_HIGH_U: { + // We know the high half, of each destination vector element, will be zero. + SDValue SrcOp = Op.getOperand(0); + EVT VT = SrcOp.getSimpleValueType(); + unsigned BitWidth = Known.getBitWidth(); + if (VT == MVT::v8i8 || VT == MVT::v16i8) { + assert(BitWidth >= 8 && "Unexpected width!"); + APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8); + Known.Zero |= Mask; + } else if (VT == MVT::v4i16 || VT == MVT::v8i16) { + assert(BitWidth >= 16 && "Unexpected width!"); + APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16); + Known.Zero |= Mask; + } else if (VT == MVT::v2i32 || VT == MVT::v4i32) { + assert(BitWidth >= 32 && "Unexpected width!"); + APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 32); + Known.Zero |= Mask; + } + break; + } // For 128-bit addition if the upper bits are all zero then it's known that // the upper bits of the result will have all bits guaranteed zero except the // first. @@ -1705,6 +1726,7 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::ZERO_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ANY_EXTEND_VECTOR_INREG: return LowerEXTEND_VECTOR_INREG(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); @@ -2299,6 +2321,9 @@ WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op, unsigned Ext; switch (Op.getOpcode()) { + default: + llvm_unreachable("unexpected opcode"); + case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: Ext = WebAssemblyISD::EXTEND_LOW_U; break; diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll index d698fad745dfb..60b4a837f7c31 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -1997,38 +1997,30 @@ define void @avgr_undef_shuffle_lanes(ptr %res, <8 x i8> %a, <8 x i8> %b, <8 x i ; SIMD128: .functype avgr_undef_shuffle_lanes (i32, v128, v128, v128, v128) -> () ; SIMD128-NEXT: # %bb.0: ; SIMD128-NEXT: i8x16.avgr_u $push1=, $1, $2 -; SIMD128-NEXT: i8x16.shuffle $push12=, $pop1, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 -; SIMD128-NEXT: local.tee $push11=, $2=, $pop12 +; SIMD128-NEXT: i16x8.extend_low_i8x16_u $push8=, $pop1 +; SIMD128-NEXT: local.tee $push7=, $2=, $pop8 ; SIMD128-NEXT: i8x16.avgr_u $push0=, $3, $4 -; SIMD128-NEXT: i8x16.shuffle $push10=, $pop0, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 -; SIMD128-NEXT: local.tee $push9=, $4=, $pop10 -; SIMD128-NEXT: i8x16.shuffle $push4=, $pop11, $pop9, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 -; SIMD128-NEXT: v128.const $push8=, 255, 255, 255, 255, 255, 255, 255, 255 -; SIMD128-NEXT: local.tee $push7=, $3=, $pop8 -; SIMD128-NEXT: v128.and $push5=, $pop4, $pop7 +; SIMD128-NEXT: i16x8.extend_low_i8x16_u $push6=, $pop0 +; SIMD128-NEXT: local.tee $push5=, $4=, $pop6 +; SIMD128-NEXT: i8x16.shuffle $push3=, $pop7, $pop5, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 ; SIMD128-NEXT: i8x16.shuffle $push2=, $2, $4, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 -; SIMD128-NEXT: v128.and $push3=, $pop2, $3 -; SIMD128-NEXT: i8x16.narrow_i16x8_u $push6=, $pop5, $pop3 -; SIMD128-NEXT: v128.store 0($0):p2align=0, $pop6 +; SIMD128-NEXT: i8x16.narrow_i16x8_u $push4=, $pop3, $pop2 +; SIMD128-NEXT: v128.store 0($0):p2align=0, $pop4 ; SIMD128-NEXT: return ; ; SIMD128-FAST-LABEL: avgr_undef_shuffle_lanes: ; SIMD128-FAST: .functype avgr_undef_shuffle_lanes (i32, v128, v128, v128, v128) -> () ; SIMD128-FAST-NEXT: # %bb.0: ; SIMD128-FAST-NEXT: i8x16.avgr_u $push1=, $1, $2 -; SIMD128-FAST-NEXT: i8x16.shuffle $push12=, $pop1, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 -; SIMD128-FAST-NEXT: local.tee $push11=, $2=, $pop12 +; SIMD128-FAST-NEXT: i16x8.extend_low_i8x16_u $push8=, $pop1 +; SIMD128-FAST-NEXT: local.tee $push7=, $2=, $pop8 ; SIMD128-FAST-NEXT: i8x16.avgr_u $push0=, $3, $4 -; SIMD128-FAST-NEXT: i8x16.shuffle $push10=, $pop0, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 -; SIMD128-FAST-NEXT: local.tee $push9=, $4=, $pop10 -; SIMD128-FAST-NEXT: i8x16.shuffle $push4=, $pop11, $pop9, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 -; SIMD128-FAST-NEXT: v128.const $push8=, 255, 255, 255, 255, 255, 255, 255, 255 -; SIMD128-FAST-NEXT: local.tee $push7=, $3=, $pop8 -; SIMD128-FAST-NEXT: v128.and $push5=, $pop4, $pop7 +; SIMD128-FAST-NEXT: i16x8.extend_low_i8x16_u $push6=, $pop0 +; SIMD128-FAST-NEXT: local.tee $push5=, $4=, $pop6 +; SIMD128-FAST-NEXT: i8x16.shuffle $push3=, $pop7, $pop5, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 ; SIMD128-FAST-NEXT: i8x16.shuffle $push2=, $2, $4, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 -; SIMD128-FAST-NEXT: v128.and $push3=, $pop2, $3 -; SIMD128-FAST-NEXT: i8x16.narrow_i16x8_u $push6=, $pop5, $pop3 -; SIMD128-FAST-NEXT: v128.store 0($0):p2align=0, $pop6 +; SIMD128-FAST-NEXT: i8x16.narrow_i16x8_u $push4=, $pop3, $pop2 +; SIMD128-FAST-NEXT: v128.store 0($0):p2align=0, $pop4 ; SIMD128-FAST-NEXT: return ; ; NO-SIMD128-LABEL: avgr_undef_shuffle_lanes: diff --git a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll index f7143711394fa..70c6baf2be005 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll @@ -276,7 +276,7 @@ define i1 @test_any_v8i8(<8 x i8> %x) { ; CHECK-LABEL: test_any_v8i8: ; CHECK: .functype test_any_v8i8 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i8x16.shuffle $push0=, $0, $0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 +; CHECK-NEXT: i16x8.extend_low_i8x16_u $push0=, $0 ; CHECK-NEXT: i32.const $push1=, 15 ; CHECK-NEXT: i16x8.shl $push2=, $pop0, $pop1 ; CHECK-NEXT: i32.const $push5=, 15 @@ -292,7 +292,7 @@ define i1 @test_all_v8i8(<8 x i8> %x) { ; CHECK-LABEL: test_all_v8i8: ; CHECK: .functype test_all_v8i8 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i8x16.shuffle $push0=, $0, $0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 +; CHECK-NEXT: i16x8.extend_low_i8x16_u $push0=, $0 ; CHECK-NEXT: i32.const $push1=, 15 ; CHECK-NEXT: i16x8.shl $push2=, $pop0, $pop1 ; CHECK-NEXT: i32.const $push5=, 15