7 changes: 5 additions & 2 deletions llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,8 @@
///
//===----------------------------------------------------------------------===//

// TODO: Implement SIMD instructions.
// Note: use Requires<[HasSIMD128]>.
let isCommutable = 1 in {
defm ADD : SIMDBinary<add, fadd, "add ">;
defm MUL: SIMDBinary<mul, fmul, "mul ">;
} // isCommutable = 1
defm SUB: SIMDBinary<sub, fsub, "sub ">;
31 changes: 30 additions & 1 deletion llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {

MachineRegisterInfo &MRI = MF.getRegInfo();
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
const auto &Subtarget = MF.getSubtarget<WebAssemblySubtarget>();
const auto &TII = *Subtarget.getInstrInfo();
const WebAssemblyTargetLowering &TLI =
*MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
Expand Down Expand Up @@ -186,6 +187,34 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F64,
WebAssembly::COPY_LOCAL_F64);
break;
case WebAssembly::RETURN_v16i8:
Changed |=
Subtarget.hasSIMD128() &&
MaybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII,
WebAssembly::FALLTHROUGH_RETURN_v16i8,
WebAssembly::COPY_LOCAL_V128);
break;
case WebAssembly::RETURN_v8i16:
Changed |=
Subtarget.hasSIMD128() &&
MaybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII,
WebAssembly::FALLTHROUGH_RETURN_v8i16,
WebAssembly::COPY_LOCAL_V128);
break;
case WebAssembly::RETURN_v4i32:
Changed |=
Subtarget.hasSIMD128() &&
MaybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII,
WebAssembly::FALLTHROUGH_RETURN_v4i32,
WebAssembly::COPY_LOCAL_V128);
break;
case WebAssembly::RETURN_v4f32:
Changed |=
Subtarget.hasSIMD128() &&
MaybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII,
WebAssembly::FALLTHROUGH_RETURN_v4f32,
WebAssembly::COPY_LOCAL_V128);
break;
case WebAssembly::RETURN_VOID:
if (!DisableWebAssemblyFallthroughReturnOpt &&
&MBB == &MF.back() && &MI == &MBB.back())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ static bool IsArgument(const MachineInstr *MI) {
case WebAssembly::ARGUMENT_I64:
case WebAssembly::ARGUMENT_F32:
case WebAssembly::ARGUMENT_F64:
case WebAssembly::ARGUMENT_v16i8:
case WebAssembly::ARGUMENT_v8i16:
case WebAssembly::ARGUMENT_v4i32:
case WebAssembly::ARGUMENT_v4f32:
return true;
default:
return false;
Expand All @@ -73,7 +77,7 @@ static bool IsArgument(const MachineInstr *MI) {

// Test whether the given register has an ARGUMENT def.
static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
for (auto &Def : MRI.def_instructions(Reg))
for (const auto &Def : MRI.def_instructions(Reg))
if (IsArgument(&Def))
return true;
return false;
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) {
case WebAssembly::ARGUMENT_I32:
case WebAssembly::ARGUMENT_I64:
case WebAssembly::ARGUMENT_F32:
case WebAssembly::ARGUMENT_F64: {
case WebAssembly::ARGUMENT_F64:
case WebAssembly::ARGUMENT_v16i8:
case WebAssembly::ARGUMENT_v8i16:
case WebAssembly::ARGUMENT_v4i32:
case WebAssembly::ARGUMENT_v4f32: {
int64_t Imm = MI.getOperand(1).getImm();
DEBUG(dbgs() << "Arg VReg " << MI.getOperand(0).getReg() << " -> WAReg "
<< Imm << "\n");
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,8 @@ static unsigned GetTeeLocalOpcode(const TargetRegisterClass *RC) {
return WebAssembly::TEE_LOCAL_F32;
if (RC == &WebAssembly::F64RegClass)
return WebAssembly::TEE_LOCAL_F64;
if (RC == &WebAssembly::V128RegClass)
return WebAssembly::TEE_LOCAL_V128;
llvm_unreachable("Unexpected register class");
}

Expand Down Expand Up @@ -765,7 +767,11 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
Def->getOpcode() == WebAssembly::ARGUMENT_I64 ||
Def->getOpcode() == WebAssembly::ARGUMENT_F32 ||
Def->getOpcode() == WebAssembly::ARGUMENT_F64)
Def->getOpcode() == WebAssembly::ARGUMENT_F64 ||
Def->getOpcode() == WebAssembly::ARGUMENT_v16i8 ||
Def->getOpcode() == WebAssembly::ARGUMENT_v8i16 ||
Def->getOpcode() == WebAssembly::ARGUMENT_v4i32 ||
Def->getOpcode() == WebAssembly::ARGUMENT_v4f32)
continue;

// Decide which strategy to take. Prefer to move a single-use value
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def SP64 : WebAssemblyReg<"%SP64">;
def F32_0 : WebAssemblyReg<"%f32.0">;
def F64_0 : WebAssemblyReg<"%f64.0">;

def V128_0: WebAssemblyReg<"%v128">;

// The expression stack "register". This is an opaque entity which serves to
// order uses and defs that must remain in LIFO order.
def EXPR_STACK : WebAssemblyReg<"STACK">;
Expand All @@ -56,3 +58,5 @@ def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32)>;
def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64)>;
def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
def V128 : WebAssemblyRegClass<[v4f32, v4i32, v16i8, v8i16], 128, (add V128_0)>;

158 changes: 158 additions & 0 deletions llvm/test/CodeGen/WebAssembly/simd-arith.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -mattr=-simd128 | FileCheck %s --check-prefixes CHECK,NO-SIMD128
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -mattr=-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128

; Test that basic SIMD128 arithmetic operations assemble as expected.

target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"

declare i32 @llvm.ctlz.i32(i32, i1)
declare i32 @llvm.cttz.i32(i32, i1)
declare i32 @llvm.ctpop.i32(i32)

; ==============================================================================
; 16 x i8
; ==============================================================================
; CHECK-LABEL: add_v16i8
; NO-SIMD128-NOT: i8x16
; SIMD128: .param v128, v128{{$}}
; SIMD128: .result v128{{$}}
; SIMD128: i8x16.add $push0=, $0, $1{{$}}
; SIMD128: return $pop0{{$}}
define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) {
%a = add <16 x i8> %x, %y
ret <16 x i8> %a
}

; CHECK-LABEL: sub_v16i8
; NO-SIMD128-NOT: i8x16
; SIMD128: .param v128, v128{{$}}
; SIMD128: .result v128{{$}}
; SIMD128: i8x16.sub $push0=, $0, $1{{$}}
; SIMD128: return $pop0{{$}}
define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) {
%a = sub <16 x i8> %x, %y
ret <16 x i8> %a
}

; CHECK-LABEL: mul_v16i8
; NO-SIMD128-NOT: i8x16
; SIMD128: .param v128, v128{{$}}
; SIMD128: .result v128{{$}}
; SIMD128: i8x16.mul $push0=, $0, $1{{$}}
; SIMD128: return $pop0{{$}}
define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
%a = mul <16 x i8> %x, %y
ret <16 x i8> %a
}

; ==============================================================================
; 8 x i16
; ==============================================================================
; CHECK-LABEL: add_v8i16
; NO-SIMD128-NOT: i16x8
; SIMD128: .param v128, v128{{$}}
; SIMD128: .result v128{{$}}
; SIMD128: i16x8.add $push0=, $0, $1{{$}}
; SIMD128: return $pop0{{$}}
define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) {
%a = add <8 x i16> %x, %y
ret <8 x i16> %a
}

; CHECK-LABEL: sub_v8i16
; NO-SIMD128-NOT: i16x8
; SIMD128: .param v128, v128{{$}}
; SIMD128: .result v128{{$}}
; SIMD128: i16x8.sub $push0=, $0, $1{{$}}
; SIMD128: return $pop0{{$}}
define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) {
%a = sub <8 x i16> %x, %y
ret <8 x i16> %a
}

; CHECK-LABEL: mul_v8i16
; NO-SIMD128-NOT: i16x8
; SIMD128: .param v128, v128{{$}}
; SIMD128: .result v128{{$}}
; SIMD128: i16x8.mul $push0=, $0, $1{{$}}
; SIMD128: return $pop0{{$}}
define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) {
%a = mul <8 x i16> %x, %y
ret <8 x i16> %a
}

; ==============================================================================
; 4 x i32
; ==============================================================================
; CHECK-LABEL: add_v4i32
; NO-SIMD128-NOT: i32x4
; SIMD128: .param v128, v128{{$}}
; SIMD128: .result v128{{$}}
; SIMD128: i32x4.add $push0=, $0, $1{{$}}
; SIMD128: return $pop0{{$}}
define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) {
%a = add <4 x i32> %x, %y
ret <4 x i32> %a
}

; CHECK-LABEL: sub_v4i32
; NO-SIMD128-NOT: i32x4
; SIMD128: .param v128, v128{{$}}
; SIMD128: .result v128{{$}}
; SIMD128: i32x4.sub $push0=, $0, $1{{$}}
; SIMD128: return $pop0{{$}}
define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) {
%a = sub <4 x i32> %x, %y
ret <4 x i32> %a
}

; CHECK-LABEL: mul_v4i32
; NO-SIMD128-NOT: i32x4
; SIMD128: .param v128, v128{{$}}
; SIMD128: .result v128{{$}}
; SIMD128: i32x4.mul $push0=, $0, $1{{$}}
; SIMD128: return $pop0{{$}}
define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) {
%a = mul <4 x i32> %x, %y
ret <4 x i32> %a
}

; ==============================================================================
; 4 x float
; ==============================================================================
; CHECK-LABEL: add_v4f32
; NO-SIMD128-NOT: f32x4
; SIMD128: .param v128, v128{{$}}
; SIMD128: .result v128{{$}}
; SIMD128: f32x4.add $push0=, $0, $1{{$}}
; SIMD128: return $pop0{{$}}
define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) {
%a = fadd <4 x float> %x, %y
ret <4 x float> %a
}

; CHECK-LABEL: sub_v4f32
; NO-SIMD128-NOT: f32x4
; SIMD128: .param v128, v128{{$}}
; SIMD128: .result v128{{$}}
; SIMD128: f32x4.sub $push0=, $0, $1{{$}}
; SIMD128: return $pop0{{$}}
define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) {
%a = fsub <4 x float> %x, %y
ret <4 x float> %a
}

; CHECK-LABEL: mul_v4f32
; NO-SIMD128-NOT: f32x4
; SIMD128: .param v128, v128{{$}}
; SIMD128: .result v128{{$}}
; SIMD128: f32x4.mul $push0=, $0, $1{{$}}
; SIMD128: return $pop0{{$}}
define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) {
%a = fmul <4 x float> %x, %y
ret <4 x float> %a
}