From 9746078bd5243021d43ef5b8690f36f1ffc9a7d1 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Fri, 21 Nov 2025 15:53:53 -0500 Subject: [PATCH 1/5] [WebAssembly] Add InstCombine test for constant swizzles --- .../InstCombine/WebAssembly/fold-swizzle.ll | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll diff --git a/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll b/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll new file mode 100644 index 0000000000000..cc4dd71abc96f --- /dev/null +++ b/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll @@ -0,0 +1,126 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=instcombine -mtriple=wasm32-unknown-unknown -S | FileCheck %s + +; swizzle with a constant operand should be optimized to a shufflevector. + +declare <16 x i8> @llvm.wasm.swizzle(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8>, <16 x i8>) + +; Identity swizzle pattern +; TODO: Should simplify to nothing. +define <16 x i8> @swizzle_identity(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @swizzle_identity( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Reverse swizzle pattern +; TODO: Should simplify to shufflevector. +define <16 x i8> @swizzle_reverse(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @swizzle_reverse( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; undef elements +; TODO: Should simplify to shufflevector. +define <16 x i8> @swizzle_with_undef(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @swizzle_with_undef( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Negative test: non-constant operand +define <16 x i8> @swizzle_non_constant(<16 x i8> %v, <16 x i8> %mask) { +; CHECK-LABEL: define <16 x i8> @swizzle_non_constant( +; CHECK-SAME: <16 x i8> [[V:%.*]], <16 x i8> [[MASK:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> [[MASK]]) +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> %mask) + ret <16 x i8> %result +} + +; Out-of-bounds index, otherwise identity pattern +; TODO: Should simplify to insertelement. +define <16 x i8> @swizzle_out_of_bounds_1(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @swizzle_out_of_bounds_1( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Out-of-bounds indices, both negative and positive +; TODO: Should simplify to shufflevector. +define <16 x i8> @swizzle_out_of_bounds_2(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @swizzle_out_of_bounds_2( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Identity swizzle pattern (relaxed_swizzle) +; TODO: Should simplify to nothing. +define <16 x i8> @relaxed_swizzle_identity(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_identity( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Reverse swizzle pattern (relaxed_swizzle) +; TODO: Should simplify to shufflevector. +define <16 x i8> @relaxed_swizzle_reverse(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_reverse( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Out-of-bounds index, only negative (relaxed_swizzle) +; TODO: Should simplify to shufflevector. +define <16 x i8> @relaxed_swizzle_out_of_bounds(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_out_of_bounds( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} + +; Negative test: out-of-bounds index, both positive and negative (relaxed_swizzle) +; The choice between different relaxed semantics can only be made at runtime, since it must be consistent. +define <16 x i8> @relaxed_swizzle_out_of_bounds_positive(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_out_of_bounds_positive( +; CHECK-SAME: <16 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; + %result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> ) + ret <16 x i8> %result +} From ffc29511e8c65db9998e840cd5459871577afd88 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Fri, 21 Nov 2025 16:22:54 -0500 Subject: [PATCH 2/5] [WebAssembly] Fold constant i8x16.swizzle to shufflevector --- llvm/lib/Target/WebAssembly/CMakeLists.txt | 1 + .../WebAssemblyInstCombineIntrinsic.cpp | 107 ++++++++++++++++++ .../WebAssemblyTargetTransformInfo.h | 2 + .../InstCombine/WebAssembly/fold-swizzle.ll | 26 ++--- .../llvm/lib/Target/WebAssembly/BUILD.gn | 1 + 5 files changed, 119 insertions(+), 18 deletions(-) create mode 100644 llvm/lib/Target/WebAssembly/WebAssemblyInstCombineIntrinsic.cpp diff --git a/llvm/lib/Target/WebAssembly/CMakeLists.txt b/llvm/lib/Target/WebAssembly/CMakeLists.txt index 17df119d62709..13fff96fc6a33 100644 --- a/llvm/lib/Target/WebAssembly/CMakeLists.txt +++ b/llvm/lib/Target/WebAssembly/CMakeLists.txt @@ -32,6 +32,7 @@ add_llvm_target(WebAssemblyCodeGen WebAssemblyFixIrreducibleControlFlow.cpp WebAssemblyFixFunctionBitcasts.cpp WebAssemblyFrameLowering.cpp + WebAssemblyInstCombineIntrinsic.cpp WebAssemblyISelDAGToDAG.cpp WebAssemblyISelLowering.cpp WebAssemblyInstrInfo.cpp diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstCombineIntrinsic.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstCombineIntrinsic.cpp new file mode 100644 index 0000000000000..2fa00b3c5d50d --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstCombineIntrinsic.cpp @@ -0,0 +1,107 @@ +//=== WebAssemblyInstCombineIntrinsic.cpp - +// WebAssembly specific InstCombine pass ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to +/// WebAssembly. It uses the target's detailed information to provide more +/// precise answers to certain TTI queries, while letting the target independent +/// and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyTargetTransformInfo.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include + +using namespace llvm; +using namespace llvm::PatternMatch; + +/// Attempt to convert [relaxed_]swizzle to shufflevector if the mask is +/// constant. +static Value *simplifyWasmSwizzle(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder, + bool IsRelaxed) { + auto *V = dyn_cast(II.getArgOperand(1)); + if (!V) + return nullptr; + + auto *VecTy = cast(II.getType()); + unsigned NumElts = VecTy->getNumElements(); + assert(NumElts == 16); + + // Construct a shuffle mask from constant integers or UNDEFs. + int Indexes[16]; + bool AnyOutOfBounds = false; + + for (unsigned I = 0; I < NumElts; ++I) { + Constant *COp = V->getAggregateElement(I); + if (!COp || (!isa(COp) && !isa(COp))) + return nullptr; + + if (isa(COp)) { + Indexes[I] = -1; + continue; + } + + int64_t Index = cast(COp)->getSExtValue(); + + if (Index >= NumElts && IsRelaxed) { + // For lane indices above 15, the relaxed_swizzle operation can choose + // between returning 0 or the lane at `Index % 16`. However, the choice + // must be made consistently. As the WebAssembly spec states: + // + // "The result of relaxed operators are implementation-dependent, because + // the set of possible results may depend on properties of the host + // environment, such as its hardware. Technically, their behaviour is + // controlled by a set of global parameters to the semantics that an + // implementation can instantiate in different ways. These choices are + // fixed, that is, parameters are constant during the execution of any + // given program." + // + // The WebAssembly runtime may choose differently from us, so we can't + // optimize a relaxed swizzle with lane indices above 15. + return nullptr; + } + + if (Index >= NumElts || Index < 0) { + AnyOutOfBounds = true; + // If there are out-of-bounds indices, the swizzle instruction returns + // zeroes in those lanes. We'll provide an all-zeroes vector as the + // second argument to shufflevector and read the first element from it. + Indexes[I] = NumElts; + continue; + } + + Indexes[I] = Index; + } + + auto *V1 = II.getArgOperand(0); + auto *V2 = + AnyOutOfBounds ? Constant::getNullValue(VecTy) : PoisonValue::get(VecTy); + + return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts)); +} + +std::optional +WebAssemblyTTIImpl::instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const { + Intrinsic::ID IID = II.getIntrinsicID(); + switch (IID) { + case Intrinsic::wasm_swizzle: + case Intrinsic::wasm_relaxed_swizzle: + if (Value *V = simplifyWasmSwizzle( + II, IC.Builder, IID == Intrinsic::wasm_relaxed_swizzle)) { + return IC.replaceInstUsesWith(II, V); + } + break; + } + + return std::nullopt; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 4146c0ec6ab07..11f7efc625399 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -90,6 +90,8 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override; + std::optional + instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override; InstructionCost getPartialReductionCost( unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, diff --git a/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll b/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll index cc4dd71abc96f..ba251929c3739 100644 --- a/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll +++ b/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll @@ -7,23 +7,20 @@ declare <16 x i8> @llvm.wasm.swizzle(<16 x i8>, <16 x i8>) declare <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8>, <16 x i8>) ; Identity swizzle pattern -; TODO: Should simplify to nothing. define <16 x i8> @swizzle_identity(<16 x i8> %v) { ; CHECK-LABEL: define <16 x i8> @swizzle_identity( ; CHECK-SAME: <16 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> ) -; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; CHECK-NEXT: ret <16 x i8> [[V]] ; %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) ret <16 x i8> %result } ; Reverse swizzle pattern -; TODO: Should simplify to shufflevector. define <16 x i8> @swizzle_reverse(<16 x i8> %v) { ; CHECK-LABEL: define <16 x i8> @swizzle_reverse( ; CHECK-SAME: <16 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[RESULT]] ; %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) @@ -31,11 +28,10 @@ define <16 x i8> @swizzle_reverse(<16 x i8> %v) { } ; undef elements -; TODO: Should simplify to shufflevector. define <16 x i8> @swizzle_with_undef(<16 x i8> %v) { ; CHECK-LABEL: define <16 x i8> @swizzle_with_undef( ; CHECK-SAME: <16 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[RESULT]] ; %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) @@ -54,11 +50,10 @@ define <16 x i8> @swizzle_non_constant(<16 x i8> %v, <16 x i8> %mask) { } ; Out-of-bounds index, otherwise identity pattern -; TODO: Should simplify to insertelement. define <16 x i8> @swizzle_out_of_bounds_1(<16 x i8> %v) { ; CHECK-LABEL: define <16 x i8> @swizzle_out_of_bounds_1( ; CHECK-SAME: <16 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: [[RESULT:%.*]] = insertelement <16 x i8> [[V]], i8 0, i64 15 ; CHECK-NEXT: ret <16 x i8> [[RESULT]] ; %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) @@ -66,11 +61,10 @@ define <16 x i8> @swizzle_out_of_bounds_1(<16 x i8> %v) { } ; Out-of-bounds indices, both negative and positive -; TODO: Should simplify to shufflevector. define <16 x i8> @swizzle_out_of_bounds_2(<16 x i8> %v) { ; CHECK-LABEL: define <16 x i8> @swizzle_out_of_bounds_2( ; CHECK-SAME: <16 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> , <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[RESULT]] ; %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) @@ -78,23 +72,20 @@ define <16 x i8> @swizzle_out_of_bounds_2(<16 x i8> %v) { } ; Identity swizzle pattern (relaxed_swizzle) -; TODO: Should simplify to nothing. define <16 x i8> @relaxed_swizzle_identity(<16 x i8> %v) { ; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_identity( ; CHECK-SAME: <16 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> [[V]], <16 x i8> ) -; CHECK-NEXT: ret <16 x i8> [[RESULT]] +; CHECK-NEXT: ret <16 x i8> [[V]] ; %result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> ) ret <16 x i8> %result } ; Reverse swizzle pattern (relaxed_swizzle) -; TODO: Should simplify to shufflevector. define <16 x i8> @relaxed_swizzle_reverse(<16 x i8> %v) { ; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_reverse( ; CHECK-SAME: <16 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[RESULT]] ; %result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> ) @@ -102,11 +93,10 @@ define <16 x i8> @relaxed_swizzle_reverse(<16 x i8> %v) { } ; Out-of-bounds index, only negative (relaxed_swizzle) -; TODO: Should simplify to shufflevector. define <16 x i8> @relaxed_swizzle_out_of_bounds(<16 x i8> %v) { ; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_out_of_bounds( ; CHECK-SAME: <16 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> [[V]], <16 x i8> ) +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> , <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[RESULT]] ; %result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> ) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn index 11a57fcb008cd..8d976a33ce9db 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn @@ -54,6 +54,7 @@ static_library("LLVMWebAssemblyCodeGen") { "WebAssemblyFixFunctionBitcasts.cpp", "WebAssemblyFixIrreducibleControlFlow.cpp", "WebAssemblyFrameLowering.cpp", + "WebAssemblyInstCombineIntrinsic.cpp", "WebAssemblyISelDAGToDAG.cpp", "WebAssemblyISelLowering.cpp", "WebAssemblyInstrInfo.cpp", From 41edf7d6737ea672dcfc6da9bd8eed8fb274f50b Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 22 Nov 2025 02:48:03 -0500 Subject: [PATCH 3/5] [WebAssembly] Use poison instead of undef for swizzle test --- .../Transforms/InstCombine/WebAssembly/fold-swizzle.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll b/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll index ba251929c3739..d3223ec20faf8 100644 --- a/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll +++ b/llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll @@ -27,14 +27,14 @@ define <16 x i8> @swizzle_reverse(<16 x i8> %v) { ret <16 x i8> %result } -; undef elements -define <16 x i8> @swizzle_with_undef(<16 x i8> %v) { -; CHECK-LABEL: define <16 x i8> @swizzle_with_undef( +; poison elements +define <16 x i8> @swizzle_with_poison(<16 x i8> %v) { +; CHECK-LABEL: define <16 x i8> @swizzle_with_poison( ; CHECK-SAME: <16 x i8> [[V:%.*]]) { ; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[RESULT]] ; - %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) + %result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> ) ret <16 x i8> %result } From 07ed82b03c8a748d51d690f8eec4c9991c7a6126 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 24 Nov 2025 12:25:27 -0500 Subject: [PATCH 4/5] [WebAssembly] Rearrange new TTI hook code --- llvm/lib/Target/WebAssembly/CMakeLists.txt | 1 - .../WebAssemblyInstCombineIntrinsic.cpp | 107 ------------------ .../WebAssemblyTargetTransformInfo.cpp | 86 ++++++++++++++ .../WebAssemblyTargetTransformInfo.h | 5 +- .../llvm/lib/Target/WebAssembly/BUILD.gn | 1 - 5 files changed, 89 insertions(+), 111 deletions(-) delete mode 100644 llvm/lib/Target/WebAssembly/WebAssemblyInstCombineIntrinsic.cpp diff --git a/llvm/lib/Target/WebAssembly/CMakeLists.txt b/llvm/lib/Target/WebAssembly/CMakeLists.txt index 13fff96fc6a33..17df119d62709 100644 --- a/llvm/lib/Target/WebAssembly/CMakeLists.txt +++ b/llvm/lib/Target/WebAssembly/CMakeLists.txt @@ -32,7 +32,6 @@ add_llvm_target(WebAssemblyCodeGen WebAssemblyFixIrreducibleControlFlow.cpp WebAssemblyFixFunctionBitcasts.cpp WebAssemblyFrameLowering.cpp - WebAssemblyInstCombineIntrinsic.cpp WebAssemblyISelDAGToDAG.cpp WebAssemblyISelLowering.cpp WebAssemblyInstrInfo.cpp diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstCombineIntrinsic.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstCombineIntrinsic.cpp deleted file mode 100644 index 2fa00b3c5d50d..0000000000000 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstCombineIntrinsic.cpp +++ /dev/null @@ -1,107 +0,0 @@ -//=== WebAssemblyInstCombineIntrinsic.cpp - -// WebAssembly specific InstCombine pass ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements a TargetTransformInfo analysis pass specific to -/// WebAssembly. It uses the target's detailed information to provide more -/// precise answers to certain TTI queries, while letting the target independent -/// and default TTI implementations handle the rest. -/// -//===----------------------------------------------------------------------===// - -#include "WebAssemblyTargetTransformInfo.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IntrinsicsWebAssembly.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" -#include - -using namespace llvm; -using namespace llvm::PatternMatch; - -/// Attempt to convert [relaxed_]swizzle to shufflevector if the mask is -/// constant. -static Value *simplifyWasmSwizzle(const IntrinsicInst &II, - InstCombiner::BuilderTy &Builder, - bool IsRelaxed) { - auto *V = dyn_cast(II.getArgOperand(1)); - if (!V) - return nullptr; - - auto *VecTy = cast(II.getType()); - unsigned NumElts = VecTy->getNumElements(); - assert(NumElts == 16); - - // Construct a shuffle mask from constant integers or UNDEFs. - int Indexes[16]; - bool AnyOutOfBounds = false; - - for (unsigned I = 0; I < NumElts; ++I) { - Constant *COp = V->getAggregateElement(I); - if (!COp || (!isa(COp) && !isa(COp))) - return nullptr; - - if (isa(COp)) { - Indexes[I] = -1; - continue; - } - - int64_t Index = cast(COp)->getSExtValue(); - - if (Index >= NumElts && IsRelaxed) { - // For lane indices above 15, the relaxed_swizzle operation can choose - // between returning 0 or the lane at `Index % 16`. However, the choice - // must be made consistently. As the WebAssembly spec states: - // - // "The result of relaxed operators are implementation-dependent, because - // the set of possible results may depend on properties of the host - // environment, such as its hardware. Technically, their behaviour is - // controlled by a set of global parameters to the semantics that an - // implementation can instantiate in different ways. These choices are - // fixed, that is, parameters are constant during the execution of any - // given program." - // - // The WebAssembly runtime may choose differently from us, so we can't - // optimize a relaxed swizzle with lane indices above 15. - return nullptr; - } - - if (Index >= NumElts || Index < 0) { - AnyOutOfBounds = true; - // If there are out-of-bounds indices, the swizzle instruction returns - // zeroes in those lanes. We'll provide an all-zeroes vector as the - // second argument to shufflevector and read the first element from it. - Indexes[I] = NumElts; - continue; - } - - Indexes[I] = Index; - } - - auto *V1 = II.getArgOperand(0); - auto *V2 = - AnyOutOfBounds ? Constant::getNullValue(VecTy) : PoisonValue::get(VecTy); - - return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts)); -} - -std::optional -WebAssemblyTTIImpl::instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) const { - Intrinsic::ID IID = II.getIntrinsicID(); - switch (IID) { - case Intrinsic::wasm_swizzle: - case Intrinsic::wasm_relaxed_swizzle: - if (Value *V = simplifyWasmSwizzle( - II, IC.Builder, IID == Intrinsic::wasm_relaxed_swizzle)) { - return IC.replaceInstUsesWith(II, V); - } - break; - } - - return std::nullopt; -} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 70f7b889551a4..c9c1f4948e081 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -13,6 +13,9 @@ //===----------------------------------------------------------------------===// #include "WebAssemblyTargetTransformInfo.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/CodeGen/CostTable.h" using namespace llvm; @@ -493,3 +496,86 @@ bool WebAssemblyTTIImpl::isProfitableToSinkOperands( return false; } + +/// Attempt to convert [relaxed_]swizzle to shufflevector if the mask is +/// constant. +static Value *simplifyWasmSwizzle(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder, + bool IsRelaxed) { + auto *V = dyn_cast(II.getArgOperand(1)); + if (!V) + return nullptr; + + auto *VecTy = cast(II.getType()); + unsigned NumElts = VecTy->getNumElements(); + assert(NumElts == 16); + + // Construct a shuffle mask from constant integers or UNDEFs. + int Indexes[16]; + bool AnyOutOfBounds = false; + + for (unsigned I = 0; I < NumElts; ++I) { + Constant *COp = V->getAggregateElement(I); + if (!COp || (!isa(COp) && !isa(COp))) + return nullptr; + + if (isa(COp)) { + Indexes[I] = -1; + continue; + } + + int64_t Index = cast(COp)->getSExtValue(); + + if (Index >= NumElts && IsRelaxed) { + // For lane indices above 15, the relaxed_swizzle operation can choose + // between returning 0 or the lane at `Index % 16`. However, the choice + // must be made consistently. As the WebAssembly spec states: + // + // "The result of relaxed operators are implementation-dependent, because + // the set of possible results may depend on properties of the host + // environment, such as its hardware. Technically, their behaviour is + // controlled by a set of global parameters to the semantics that an + // implementation can instantiate in different ways. These choices are + // fixed, that is, parameters are constant during the execution of any + // given program." + // + // The WebAssembly runtime may choose differently from us, so we can't + // optimize a relaxed swizzle with lane indices above 15. + return nullptr; + } + + if (Index >= NumElts || Index < 0) { + AnyOutOfBounds = true; + // If there are out-of-bounds indices, the swizzle instruction returns + // zeroes in those lanes. We'll provide an all-zeroes vector as the + // second argument to shufflevector and read the first element from it. + Indexes[I] = NumElts; + continue; + } + + Indexes[I] = Index; + } + + auto *V1 = II.getArgOperand(0); + auto *V2 = + AnyOutOfBounds ? Constant::getNullValue(VecTy) : PoisonValue::get(VecTy); + + return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts)); +} + +std::optional +WebAssemblyTTIImpl::instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const { + Intrinsic::ID IID = II.getIntrinsicID(); + switch (IID) { + case Intrinsic::wasm_swizzle: + case Intrinsic::wasm_relaxed_swizzle: + if (Value *V = simplifyWasmSwizzle( + II, IC.Builder, IID == Intrinsic::wasm_relaxed_swizzle)) { + return IC.replaceInstUsesWith(II, V); + } + break; + } + + return std::nullopt; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 11f7efc625399..3e9e8972395ab 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -90,8 +90,6 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override; - std::optional - instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override; InstructionCost getPartialReductionCost( unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, @@ -105,6 +103,9 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl &Ops) const override; + std::optional + instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override; + /// @} }; diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn index 8d976a33ce9db..11a57fcb008cd 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn @@ -54,7 +54,6 @@ static_library("LLVMWebAssemblyCodeGen") { "WebAssemblyFixFunctionBitcasts.cpp", "WebAssemblyFixIrreducibleControlFlow.cpp", "WebAssemblyFrameLowering.cpp", - "WebAssemblyInstCombineIntrinsic.cpp", "WebAssemblyISelDAGToDAG.cpp", "WebAssemblyISelLowering.cpp", "WebAssemblyInstrInfo.cpp", From 14300e4d65b37701dba81cd3ab8e37a46be0262c Mon Sep 17 00:00:00 2001 From: valadaptive Date: Fri, 28 Nov 2025 11:30:56 -0500 Subject: [PATCH 5/5] [WebAssembly] Treat swizzle indices as unsigned more --- .../WebAssemblyTargetTransformInfo.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index c9c1f4948e081..e3d01075ed5e9 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -524,12 +524,12 @@ static Value *simplifyWasmSwizzle(const IntrinsicInst &II, continue; } - int64_t Index = cast(COp)->getSExtValue(); - - if (Index >= NumElts && IsRelaxed) { - // For lane indices above 15, the relaxed_swizzle operation can choose - // between returning 0 or the lane at `Index % 16`. However, the choice - // must be made consistently. As the WebAssembly spec states: + if (IsRelaxed && cast(COp)->getSExtValue() >= NumElts) { + // The relaxed_swizzle operation always returns 0 if the lane index is + // less than 0 when interpreted as a signed value. For lane indices above + // 15, however, it can choose between returning 0 or the lane at `Index % + // 16`. However, the choice must be made consistently. As the WebAssembly + // spec states: // // "The result of relaxed operators are implementation-dependent, because // the set of possible results may depend on properties of the host @@ -544,7 +544,8 @@ static Value *simplifyWasmSwizzle(const IntrinsicInst &II, return nullptr; } - if (Index >= NumElts || Index < 0) { + uint64_t Index = cast(COp)->getZExtValue(); + if (Index >= NumElts) { AnyOutOfBounds = true; // If there are out-of-bounds indices, the swizzle instruction returns // zeroes in those lanes. We'll provide an all-zeroes vector as the