diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def index a3a33f4a5b3a3..b8954f4693f0a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -46,7 +46,6 @@ HANDLE_NODETYPE(MEMORY_COPY) HANDLE_NODETYPE(MEMORY_FILL) // Memory intrinsics -HANDLE_MEM_NODETYPE(LOAD_SPLAT) HANDLE_MEM_NODETYPE(GLOBAL_GET) HANDLE_MEM_NODETYPE(GLOBAL_SET) HANDLE_MEM_NODETYPE(TABLE_GET) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index eee707bcd66ed..f32efde459024 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -194,6 +194,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( MVT::v2f64}) setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); + // Support splatting + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, + MVT::v2f64}) + setOperationAction(ISD::SPLAT_VECTOR, T, Legal); + // Custom lowering since wasm shifts must have a scalar shift amount for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) @@ -2161,18 +2166,8 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, return IsConstant(Lane); }; } else { - // Use a splat, but possibly a load_splat - LoadSDNode *SplattedLoad; - if ((SplattedLoad = dyn_cast(SplatValue)) && - SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) { - Result = DAG.getMemIntrinsicNode( - WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT), - {SplattedLoad->getChain(), SplattedLoad->getBasePtr(), - SplattedLoad->getOffset()}, - SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand()); - } else { - Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); - } + // Use a splat (which might be selected as a load splat) + Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) { return Lane == SplatValue; }; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index ffd1f91a88288..ad2ec40b8b31b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -54,15 +54,6 @@ def ImmI#SIZE : ImmLeaf; -// Create vector with identical lanes: splat -def splat2 : PatFrag<(ops node:$x), (build_vector $x, $x)>; -def splat4 : PatFrag<(ops node:$x), (build_vector $x, $x, $x, $x)>; -def splat8 : PatFrag<(ops node:$x), (build_vector $x, $x, $x, $x, - $x, $x, $x, $x)>; -def splat16 : PatFrag<(ops node:$x), - (build_vector $x, $x, $x, $x, $x, $x, $x, $x, - $x, $x, $x, $x, $x, $x, $x, $x)>; - class Vec { ValueType vt; ValueType int_vt; @@ -70,6 +61,7 @@ class Vec { WebAssemblyRegClass lane_rc; int lane_bits; ImmLeaf lane_idx; + SDPatternOperator lane_load; PatFrag splat; string prefix; Vec split; @@ -82,7 +74,8 @@ def I8x16 : Vec { let lane_rc = I32; let lane_bits = 8; let lane_idx = LaneIdx16; - let splat = splat16; + let lane_load = extloadi8; + let splat = PatFrag<(ops node:$x), (v16i8 (splat_vector (i8 $x)))>; let prefix = "i8x16"; } @@ -93,7 +86,8 @@ def I16x8 : Vec { let lane_rc = I32; let lane_bits = 16; let lane_idx = LaneIdx8; - let splat = splat8; + let lane_load = extloadi16; + let splat = PatFrag<(ops node:$x), (v8i16 (splat_vector (i16 $x)))>; let prefix = "i16x8"; let split = I8x16; } @@ -105,7 +99,8 @@ def I32x4 : Vec { let lane_rc = I32; let lane_bits = 32; let lane_idx = LaneIdx4; - let splat = splat4; + let lane_load = load; + let splat = PatFrag<(ops node:$x), (v4i32 (splat_vector (i32 $x)))>; let prefix = "i32x4"; let split = I16x8; } @@ -117,7 +112,8 @@ def I64x2 : Vec { let lane_rc = I64; let lane_bits = 64; let lane_idx = LaneIdx2; - let splat = splat2; + let lane_load = load; + let splat = PatFrag<(ops node:$x), (v2i64 (splat_vector (i64 $x)))>; let prefix = "i64x2"; let split = I32x4; } @@ -129,7 +125,8 @@ def F32x4 : Vec { let lane_rc = F32; let lane_bits = 32; let lane_idx = LaneIdx4; - let splat = splat4; + let lane_load = load; + let splat = PatFrag<(ops node:$x), (v4f32 (splat_vector (f32 $x)))>; let prefix = "f32x4"; } @@ -140,7 +137,8 @@ def F64x2 : Vec { let lane_rc = F64; let lane_bits = 64; let lane_idx = LaneIdx2; - let splat = splat2; + let lane_load = load; + let splat = PatFrag<(ops node:$x), (v2f64 (splat_vector (f64 $x)))>; let prefix = "f64x2"; } @@ -195,14 +193,11 @@ defm "" : SIMDLoadSplat<16, 8>; defm "" : SIMDLoadSplat<32, 9>; defm "" : SIMDLoadSplat<64, 10>; -def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>; -def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; -def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>; - foreach vec = AllVecs in { -defvar inst = "LOAD"#vec.lane_bits#"_SPLAT"; -defm : LoadPat; + defvar inst = "LOAD"#vec.lane_bits#"_SPLAT"; + defm : LoadPat, + inst>; } // Load and extend @@ -488,6 +483,17 @@ defm "" : ConstVec; +// Match splat(x) -> const.v128(x, ..., x) +foreach vec = AllVecs in { + defvar numEls = !div(vec.vt.Size, vec.lane_bits); + defvar isFloat = !or(!eq(vec.lane_vt, f32), !eq(vec.lane_vt, f64)); + defvar immKind = !if(isFloat, fpimm, imm); + def : Pat<(vec.splat (vec.lane_vt immKind:$x)), + !dag(!cast("CONST_V128_"#vec), + !listsplat((vec.lane_vt immKind:$x), numEls), + ?)>; +} + // Shuffle lanes: shuffle defm SHUFFLE : SIMD_I<(outs V128:$dst), diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index e7bd9b53541f1..007802dd0c035 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -297,9 +297,9 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) { ; CHECK-NEXT: f64x2.extract_lane 1 ; CHECK-NEXT: i32.trunc_sat_f64_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: v128.const 32767, 32767, 0, 0 +; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767 ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const -32768, -32768, 0, 0 +; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 @@ -326,7 +326,7 @@ define <2 x i16> @utest_f64i16(<2 x double> %x) { ; CHECK-NEXT: f64x2.extract_lane 1 ; CHECK-NEXT: i32.trunc_sat_f64_u ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: v128.const 65535, 65535, 0, 0 +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 @@ -351,7 +351,7 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-NEXT: f64x2.extract_lane 1 ; CHECK-NEXT: i32.trunc_sat_f64_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: v128.const 65535, 65535, 0, 0 +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 ; CHECK-NEXT: i32x4.min_s ; CHECK-NEXT: v128.const 0, 0, 0, 0 ; CHECK-NEXT: i32x4.max_s @@ -1790,9 +1790,9 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: f64x2.extract_lane 1 ; CHECK-NEXT: i32.trunc_sat_f64_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: v128.const 32767, 32767, 0, 0 +; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767 ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const -32768, -32768, 0, 0 +; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 @@ -1817,7 +1817,7 @@ define <2 x i16> @utest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: f64x2.extract_lane 1 ; CHECK-NEXT: i32.trunc_sat_f64_u ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: v128.const 65535, 65535, 0, 0 +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 @@ -1841,7 +1841,7 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: f64x2.extract_lane 1 ; CHECK-NEXT: i32.trunc_sat_f64_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: v128.const 65535, 65535, 0, 0 +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 ; CHECK-NEXT: i32x4.min_s ; CHECK-NEXT: v128.const 0, 0, 0, 0 ; CHECK-NEXT: i32x4.max_s diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll index 1cc05fcf80f15..a51b358de2e89 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -118,107 +118,92 @@ define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) { ; CHECK-LABEL: swizzle_all_i8x16: ; CHECK: .functype swizzle_all_i8x16 (v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: global.get $push80=, __stack_pointer -; CHECK-NEXT: i32.const $push81=, 16 -; CHECK-NEXT: i32.sub $push98=, $pop80, $pop81 -; CHECK-NEXT: local.tee $push97=, $2=, $pop98 -; CHECK-NEXT: v128.store 0($pop97), $0 -; CHECK-NEXT: i8x16.extract_lane_u $push61=, $1, 0 +; CHECK-NEXT: global.get $push65=, __stack_pointer +; CHECK-NEXT: i32.const $push66=, 16 +; CHECK-NEXT: i32.sub $push83=, $pop65, $pop66 +; CHECK-NEXT: local.tee $push82=, $2=, $pop83 +; CHECK-NEXT: v128.store 0($pop82), $0 +; CHECK-NEXT: i8x16.extract_lane_u $push61=, $1, 15 ; CHECK-NEXT: i32.const $push1=, 15 ; CHECK-NEXT: i32.and $push62=, $pop61, $pop1 ; CHECK-NEXT: i32.or $push63=, $2, $pop62 -; CHECK-NEXT: v128.load8_splat $push64=, 0($pop63) -; CHECK-NEXT: i8x16.extract_lane_u $push57=, $1, 1 -; CHECK-NEXT: i32.const $push96=, 15 -; CHECK-NEXT: i32.and $push58=, $pop57, $pop96 +; CHECK-NEXT: i8x16.extract_lane_u $push57=, $1, 14 +; CHECK-NEXT: i32.const $push81=, 15 +; CHECK-NEXT: i32.and $push58=, $pop57, $pop81 ; CHECK-NEXT: i32.or $push59=, $2, $pop58 -; CHECK-NEXT: i32.load8_u $push60=, 0($pop59) -; CHECK-NEXT: i8x16.replace_lane $push65=, $pop64, 1, $pop60 -; CHECK-NEXT: i8x16.extract_lane_u $push53=, $1, 2 -; CHECK-NEXT: i32.const $push95=, 15 -; CHECK-NEXT: i32.and $push54=, $pop53, $pop95 +; CHECK-NEXT: i8x16.extract_lane_u $push53=, $1, 13 +; CHECK-NEXT: i32.const $push80=, 15 +; CHECK-NEXT: i32.and $push54=, $pop53, $pop80 ; CHECK-NEXT: i32.or $push55=, $2, $pop54 -; CHECK-NEXT: i32.load8_u $push56=, 0($pop55) -; CHECK-NEXT: i8x16.replace_lane $push66=, $pop65, 2, $pop56 -; CHECK-NEXT: i8x16.extract_lane_u $push49=, $1, 3 -; CHECK-NEXT: i32.const $push94=, 15 -; CHECK-NEXT: i32.and $push50=, $pop49, $pop94 +; CHECK-NEXT: i8x16.extract_lane_u $push49=, $1, 12 +; CHECK-NEXT: i32.const $push79=, 15 +; CHECK-NEXT: i32.and $push50=, $pop49, $pop79 ; CHECK-NEXT: i32.or $push51=, $2, $pop50 -; CHECK-NEXT: i32.load8_u $push52=, 0($pop51) -; CHECK-NEXT: i8x16.replace_lane $push67=, $pop66, 3, $pop52 -; CHECK-NEXT: i8x16.extract_lane_u $push45=, $1, 4 -; CHECK-NEXT: i32.const $push93=, 15 -; CHECK-NEXT: i32.and $push46=, $pop45, $pop93 +; CHECK-NEXT: i8x16.extract_lane_u $push45=, $1, 11 +; CHECK-NEXT: i32.const $push78=, 15 +; CHECK-NEXT: i32.and $push46=, $pop45, $pop78 ; CHECK-NEXT: i32.or $push47=, $2, $pop46 -; CHECK-NEXT: i32.load8_u $push48=, 0($pop47) -; CHECK-NEXT: i8x16.replace_lane $push68=, $pop67, 4, $pop48 -; CHECK-NEXT: i8x16.extract_lane_u $push41=, $1, 5 -; CHECK-NEXT: i32.const $push92=, 15 -; CHECK-NEXT: i32.and $push42=, $pop41, $pop92 +; CHECK-NEXT: i8x16.extract_lane_u $push41=, $1, 10 +; CHECK-NEXT: i32.const $push77=, 15 +; CHECK-NEXT: i32.and $push42=, $pop41, $pop77 ; CHECK-NEXT: i32.or $push43=, $2, $pop42 -; CHECK-NEXT: i32.load8_u $push44=, 0($pop43) -; CHECK-NEXT: i8x16.replace_lane $push69=, $pop68, 5, $pop44 -; CHECK-NEXT: i8x16.extract_lane_u $push37=, $1, 6 -; CHECK-NEXT: i32.const $push91=, 15 -; CHECK-NEXT: i32.and $push38=, $pop37, $pop91 +; CHECK-NEXT: i8x16.extract_lane_u $push37=, $1, 9 +; CHECK-NEXT: i32.const $push76=, 15 +; CHECK-NEXT: i32.and $push38=, $pop37, $pop76 ; CHECK-NEXT: i32.or $push39=, $2, $pop38 -; CHECK-NEXT: i32.load8_u $push40=, 0($pop39) -; CHECK-NEXT: i8x16.replace_lane $push70=, $pop69, 6, $pop40 -; CHECK-NEXT: i8x16.extract_lane_u $push33=, $1, 7 -; CHECK-NEXT: i32.const $push90=, 15 -; CHECK-NEXT: i32.and $push34=, $pop33, $pop90 +; CHECK-NEXT: i8x16.extract_lane_u $push33=, $1, 8 +; CHECK-NEXT: i32.const $push75=, 15 +; CHECK-NEXT: i32.and $push34=, $pop33, $pop75 ; CHECK-NEXT: i32.or $push35=, $2, $pop34 -; CHECK-NEXT: i32.load8_u $push36=, 0($pop35) -; CHECK-NEXT: i8x16.replace_lane $push71=, $pop70, 7, $pop36 -; CHECK-NEXT: i8x16.extract_lane_u $push29=, $1, 8 -; CHECK-NEXT: i32.const $push89=, 15 -; CHECK-NEXT: i32.and $push30=, $pop29, $pop89 +; CHECK-NEXT: i8x16.extract_lane_u $push29=, $1, 7 +; CHECK-NEXT: i32.const $push74=, 15 +; CHECK-NEXT: i32.and $push30=, $pop29, $pop74 ; CHECK-NEXT: i32.or $push31=, $2, $pop30 -; CHECK-NEXT: i32.load8_u $push32=, 0($pop31) -; CHECK-NEXT: i8x16.replace_lane $push72=, $pop71, 8, $pop32 -; CHECK-NEXT: i8x16.extract_lane_u $push25=, $1, 9 -; CHECK-NEXT: i32.const $push88=, 15 -; CHECK-NEXT: i32.and $push26=, $pop25, $pop88 +; CHECK-NEXT: i8x16.extract_lane_u $push25=, $1, 6 +; CHECK-NEXT: i32.const $push73=, 15 +; CHECK-NEXT: i32.and $push26=, $pop25, $pop73 ; CHECK-NEXT: i32.or $push27=, $2, $pop26 -; CHECK-NEXT: i32.load8_u $push28=, 0($pop27) -; CHECK-NEXT: i8x16.replace_lane $push73=, $pop72, 9, $pop28 -; CHECK-NEXT: i8x16.extract_lane_u $push21=, $1, 10 -; CHECK-NEXT: i32.const $push87=, 15 -; CHECK-NEXT: i32.and $push22=, $pop21, $pop87 +; CHECK-NEXT: i8x16.extract_lane_u $push21=, $1, 5 +; CHECK-NEXT: i32.const $push72=, 15 +; CHECK-NEXT: i32.and $push22=, $pop21, $pop72 ; CHECK-NEXT: i32.or $push23=, $2, $pop22 -; CHECK-NEXT: i32.load8_u $push24=, 0($pop23) -; CHECK-NEXT: i8x16.replace_lane $push74=, $pop73, 10, $pop24 -; CHECK-NEXT: i8x16.extract_lane_u $push17=, $1, 11 -; CHECK-NEXT: i32.const $push86=, 15 -; CHECK-NEXT: i32.and $push18=, $pop17, $pop86 +; CHECK-NEXT: i8x16.extract_lane_u $push17=, $1, 4 +; CHECK-NEXT: i32.const $push71=, 15 +; CHECK-NEXT: i32.and $push18=, $pop17, $pop71 ; CHECK-NEXT: i32.or $push19=, $2, $pop18 -; CHECK-NEXT: i32.load8_u $push20=, 0($pop19) -; CHECK-NEXT: i8x16.replace_lane $push75=, $pop74, 11, $pop20 -; CHECK-NEXT: i8x16.extract_lane_u $push13=, $1, 12 -; CHECK-NEXT: i32.const $push85=, 15 -; CHECK-NEXT: i32.and $push14=, $pop13, $pop85 +; CHECK-NEXT: i8x16.extract_lane_u $push13=, $1, 3 +; CHECK-NEXT: i32.const $push70=, 15 +; CHECK-NEXT: i32.and $push14=, $pop13, $pop70 ; CHECK-NEXT: i32.or $push15=, $2, $pop14 -; CHECK-NEXT: i32.load8_u $push16=, 0($pop15) -; CHECK-NEXT: i8x16.replace_lane $push76=, $pop75, 12, $pop16 -; CHECK-NEXT: i8x16.extract_lane_u $push9=, $1, 13 -; CHECK-NEXT: i32.const $push84=, 15 -; CHECK-NEXT: i32.and $push10=, $pop9, $pop84 +; CHECK-NEXT: i8x16.extract_lane_u $push9=, $1, 2 +; CHECK-NEXT: i32.const $push69=, 15 +; CHECK-NEXT: i32.and $push10=, $pop9, $pop69 ; CHECK-NEXT: i32.or $push11=, $2, $pop10 -; CHECK-NEXT: i32.load8_u $push12=, 0($pop11) -; CHECK-NEXT: i8x16.replace_lane $push77=, $pop76, 13, $pop12 -; CHECK-NEXT: i8x16.extract_lane_u $push5=, $1, 14 -; CHECK-NEXT: i32.const $push83=, 15 -; CHECK-NEXT: i32.and $push6=, $pop5, $pop83 -; CHECK-NEXT: i32.or $push7=, $2, $pop6 -; CHECK-NEXT: i32.load8_u $push8=, 0($pop7) -; CHECK-NEXT: i8x16.replace_lane $push78=, $pop77, 14, $pop8 -; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 15 -; CHECK-NEXT: i32.const $push82=, 15 -; CHECK-NEXT: i32.and $push2=, $pop0, $pop82 +; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 1 +; CHECK-NEXT: i32.const $push68=, 15 +; CHECK-NEXT: i32.and $push2=, $pop0, $pop68 ; CHECK-NEXT: i32.or $push3=, $2, $pop2 -; CHECK-NEXT: i32.load8_u $push4=, 0($pop3) -; CHECK-NEXT: i8x16.replace_lane $push79=, $pop78, 15, $pop4 -; CHECK-NEXT: return $pop79 +; CHECK-NEXT: i8x16.extract_lane_u $push4=, $1, 0 +; CHECK-NEXT: i32.const $push67=, 15 +; CHECK-NEXT: i32.and $push5=, $pop4, $pop67 +; CHECK-NEXT: i32.or $push6=, $2, $pop5 +; CHECK-NEXT: v128.load8_splat $push7=, 0($pop6) +; CHECK-NEXT: v128.load8_lane $push8=, 0($pop3), $pop7, 1 +; CHECK-NEXT: v128.load8_lane $push12=, 0($pop11), $pop8, 2 +; CHECK-NEXT: v128.load8_lane $push16=, 0($pop15), $pop12, 3 +; CHECK-NEXT: v128.load8_lane $push20=, 0($pop19), $pop16, 4 +; CHECK-NEXT: v128.load8_lane $push24=, 0($pop23), $pop20, 5 +; CHECK-NEXT: v128.load8_lane $push28=, 0($pop27), $pop24, 6 +; CHECK-NEXT: v128.load8_lane $push32=, 0($pop31), $pop28, 7 +; CHECK-NEXT: v128.load8_lane $push36=, 0($pop35), $pop32, 8 +; CHECK-NEXT: v128.load8_lane $push40=, 0($pop39), $pop36, 9 +; CHECK-NEXT: v128.load8_lane $push44=, 0($pop43), $pop40, 10 +; CHECK-NEXT: v128.load8_lane $push48=, 0($pop47), $pop44, 11 +; CHECK-NEXT: v128.load8_lane $push52=, 0($pop51), $pop48, 12 +; CHECK-NEXT: v128.load8_lane $push56=, 0($pop55), $pop52, 13 +; CHECK-NEXT: v128.load8_lane $push60=, 0($pop59), $pop56, 14 +; CHECK-NEXT: v128.load8_lane $push64=, 0($pop63), $pop60, 15 +; CHECK-NEXT: return $pop64 %m0 = extractelement <16 x i8> %mask, i32 0 %s0 = extractelement <16 x i8> %src, i8 %m0 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 @@ -423,7 +408,7 @@ define <4 x float> @undef_const_insert_f32x4() { ; CHECK-LABEL: undef_const_insert_f32x4: ; CHECK: .functype undef_const_insert_f32x4 () -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0 +; CHECK-NEXT: v128.const $push0=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5 ; CHECK-NEXT: return $pop0 %v = insertelement <4 x float> undef, float 42., i32 1 ret <4 x float> %v diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll b/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll index 1a2aaa1f97ecc..c7ef795cf3c0e 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll @@ -7,10 +7,11 @@ target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: load_splat: ; CHECK-NEXT: .functype load_splat (i32, i32) -> (i32) -; CHECK-NEXT: i32.load8_u $[[E:[0-9]+]]=, 0($0){{$}} -; CHECK-NEXT: v128.load8_splat $push[[V:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: i32.load8_u $push[[E:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: local.tee $push[[T:[0-9]+]]=, $[[R:[0-9]+]]=, $pop[[E]]{{$}} +; CHECK-NEXT: i8x16.splat $push[[V:[0-9]+]]=, $pop[[T]]{{$}} ; CHECK-NEXT: v128.store 0($1), $pop[[V]]{{$}} -; CHECK-NEXT: return $[[E]]{{$}} +; CHECK-NEXT: return $[[R]]{{$}} define i8 @load_splat(ptr %p, ptr %out) { %e = load i8, ptr %p %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 diff --git a/llvm/test/CodeGen/WebAssembly/simd-pr51605.ll b/llvm/test/CodeGen/WebAssembly/simd-pr51605.ll index ba89f70824bb4..2c2002d6fa83b 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-pr51605.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-pr51605.ll @@ -13,11 +13,9 @@ define <4 x i8> @test_i8(<4 x i8> %b) { ; CHECK-LABEL: test_i8: ; CHECK: .functype test_i8 (v128) -> (v128) ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.const -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: v128.xor -; CHECK-NEXT: v128.const 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: v128.and +; CHECK-NEXT: v128.andnot ; CHECK-NEXT: # fallthrough-return %c = and <4 x i8> %b, %d = xor <4 x i8> %c, @@ -28,11 +26,9 @@ define <4 x i16> @test_i16(<4 x i16> %b) { ; CHECK-LABEL: test_i16: ; CHECK: .functype test_i16 (v128) -> (v128) ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const 1, 1, 1, 1, 1, 1, 1, 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.const -1, -1, -1, -1, 0, 0, 0, 0 -; CHECK-NEXT: v128.xor -; CHECK-NEXT: v128.const 1, 1, 1, 1, 0, 0, 0, 0 -; CHECK-NEXT: v128.and +; CHECK-NEXT: v128.andnot ; CHECK-NEXT: # fallthrough-return %c = and <4 x i16> %b, %d = xor <4 x i16> %c, diff --git a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll index 88494c0c6aff2..2b08f1c23b59a 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll @@ -47,11 +47,10 @@ define <16 x i8> @shl_abs(<16 x i8> %v, i8 %a) { ; CHECK-NEXT: i8x16.splat $push1=, $1 ; CHECK-NEXT: i8x16.splat $push0=, $2 ; CHECK-NEXT: i8x16.add $push2=, $pop1, $pop0 -; CHECK-NEXT: i8x16.shuffle $push3=, $pop2, $0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: i8x16.abs $push4=, $pop3 -; CHECK-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0 -; CHECK-NEXT: i8x16.shl $push6=, $0, $pop5 -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i8x16.abs $push3=, $pop2 +; CHECK-NEXT: i8x16.extract_lane_u $push4=, $pop3, 0 +; CHECK-NEXT: i8x16.shl $push5=, $0, $pop4 +; CHECK-NEXT: return $pop5 define <16 x i8> @shl_abs_add(<16 x i8> %v, i8 %a, i8 %b) { %t1 = insertelement <16 x i8> undef, i8 %a, i32 0 %va = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer