diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 70f7b889551a4..e3d01075ed5e9 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -13,6 +13,9 @@ //===----------------------------------------------------------------------===// #include "WebAssemblyTargetTransformInfo.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/CodeGen/CostTable.h" using namespace llvm; @@ -493,3 +496,87 @@ bool WebAssemblyTTIImpl::isProfitableToSinkOperands( return false; } + +/// Attempt to convert [relaxed_]swizzle to shufflevector if the mask is +/// constant. +static Value *simplifyWasmSwizzle(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder, + bool IsRelaxed) { + auto *V = dyn_cast(II.getArgOperand(1)); + if (!V) + return nullptr; + + auto *VecTy = cast(II.getType()); + unsigned NumElts = VecTy->getNumElements(); + assert(NumElts == 16); + + // Construct a shuffle mask from constant integers or UNDEFs. + int Indexes[16]; + bool AnyOutOfBounds = false; + + for (unsigned I = 0; I < NumElts; ++I) { + Constant *COp = V->getAggregateElement(I); + if (!COp || (!isa(COp) && !isa(COp))) + return nullptr; + + if (isa(COp)) { + Indexes[I] = -1; + continue; + } + + if (IsRelaxed && cast(COp)->getSExtValue() >= NumElts) { + // The relaxed_swizzle operation always returns 0 if the lane index is + // less than 0 when interpreted as a signed value. For lane indices above + // 15, however, it can choose between returning 0 or the lane at `Index % + // 16`. However, the choice must be made consistently. As the WebAssembly + // spec states: + // + // "The result of relaxed operators are implementation-dependent, because + // the set of possible results may depend on properties of the host + // environment, such as its hardware. Technically, their behaviour is + // controlled by a set of global parameters to the semantics that an + // implementation can instantiate in different ways. These choices are + // fixed, that is, parameters are constant during the execution of any + // given program." + // + // The WebAssembly runtime may choose differently from us, so we can't + // optimize a relaxed swizzle with lane indices above 15. + return nullptr; + } + + uint64_t Index = cast(COp)->getZExtValue(); + if (Index >= NumElts) { + AnyOutOfBounds = true; + // If there are out-of-bounds indices, the swizzle instruction returns + // zeroes in those lanes. We'll provide an all-zeroes vector as the + // second argument to shufflevector and read the first element from it. + Indexes[I] = NumElts; + continue; + } + + Indexes[I] = Index; + } + + auto *V1 = II.getArgOperand(0); + auto *V2 = + AnyOutOfBounds ? Constant::getNullValue(VecTy) : PoisonValue::get(VecTy); + + return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts)); +} + +std::optional +WebAssemblyTTIImpl::instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const { + Intrinsic::ID IID = II.getIntrinsicID(); + switch (IID) { + case Intrinsic::wasm_swizzle: + case Intrinsic::wasm_relaxed_swizzle: + if (Value *V = simplifyWasmSwizzle( + II, IC.Builder, IID == Intrinsic::wasm_relaxed_swizzle)) { + return IC.replaceInstUsesWith(II, V); + } + break; + } + + return std::nullopt; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 4146c0ec6ab07..3e9e8972395ab 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -103,6 +103,9 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl &Ops) const override; + std::optional + instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override; + /// @} }; diff --git a/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll b/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll new file mode 100644 index 0000000000000..d3223ec20faf8 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=instcombine -mtriple=wasm32-unknown-unknown -S | FileCheck %s + +; swizzle with a constant operand should be optimized to a shufflevector. + +declare <16 x i8> @llvm.wasm.swizzle(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8>, <16 x i8>) + +; Identity swizzle pattern +define <16 x i8> @swizzle_identity(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @swizzle_identity( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: ret <16 x i8> [[V]] +; + %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Reverse swizzle pattern +define <16 x i8> @swizzle_reverse(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @swizzle_reverse( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; poison elements +define <16 x i8> @swizzle_with_poison(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @swizzle_with_poison( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Negative test: non-constant operand +define <16 x i8> @swizzle_non_constant(<16 x i8> %v, <16 x i8> %mask) { +; CHECK-LABEL: define <16 x i8> @swizzle_non_constant( +; CHECK-SAME: <16 x i8> [[V:%.*]], <16 x i8> [[MASK:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> [[MASK]]) +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> %mask) + ret <16 x i8> %result +} + +; Out-of-bounds index, otherwise identity pattern +define <16 x i8> @swizzle_out_of_bounds_1(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @swizzle_out_of_bounds_1( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = insertelement <16 x i8> [[V]], i8 0, i64 15 +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Out-of-bounds indices, both negative and positive +define <16 x i8> @swizzle_out_of_bounds_2(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @swizzle_out_of_bounds_2( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> , <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Identity swizzle pattern (relaxed_swizzle) +define <16 x i8> @relaxed_swizzle_identity(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_identity( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: ret <16 x i8> [[V]] +; + %result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Reverse swizzle pattern (relaxed_swizzle) +define <16 x i8> @relaxed_swizzle_reverse(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_reverse( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Out-of-bounds index, only negative (relaxed_swizzle) +define <16 x i8> @relaxed_swizzle_out_of_bounds(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_out_of_bounds( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> , <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Negative test: out-of-bounds index, both positive and negative (relaxed_swizzle) +; The choice between different relaxed semantics can only be made at runtime, since it must be consistent. +define <16 x i8> @relaxed_swizzle_out_of_bounds_positive(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_out_of_bounds_positive( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +}