Skip to content

Commit

Permalink
[WebAssembly][NFC] Simplify vector shift lowering and add tests
Browse files Browse the repository at this point in the history
This patch builds on 0d7286a by simplifying the code for detecting
splat values and adding new tests demonstrating the lowering of
splatted absolute value shift amounts, which are common in code
generated by Halide. The lowering is very bad right now, but
subsequent patches will improve it considerably. The tests will be
useful for evaluating the improvements in those patches.

Reviewed By: aheejin

Differential Revision: https://reviews.llvm.org/D83493
  • Loading branch information
tlively committed Jul 10, 2020
1 parent eb6b7c5 commit 043eaa9
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 8 deletions.
13 changes: 5 additions & 8 deletions llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
Expand Up @@ -1677,12 +1677,12 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
// Only manually lower vector shifts
assert(Op.getSimpleValueType().isVector());

auto ShiftVal = Op.getOperand(1);
if (!DAG.isSplatValue(ShiftVal, /*AllowUndefs=*/true))
auto ShiftVal = DAG.getSplatValue(Op.getOperand(1));
if (!ShiftVal)
return unrollVectorShift(Op, DAG);

auto SplatVal = DAG.getSplatValue(ShiftVal);
assert(SplatVal != SDValue());
// Use anyext because none of the high bits can affect the shift
ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);

unsigned Opcode;
switch (Op.getOpcode()) {
Expand All @@ -1699,10 +1699,7 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
llvm_unreachable("unexpected opcode");
}

// Use anyext because none of the high bits can affect the shift
auto ScalarShift = DAG.getAnyExtOrTrunc(SplatVal, DL, MVT::i32);
return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0),
ScalarShift);
return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
}

//===----------------------------------------------------------------------===//
Expand Down
76 changes: 76 additions & 0 deletions llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
Expand Up @@ -25,3 +25,79 @@ define <16 x i8> @shl_add(<16 x i8> %v, i8 %a, i8 %b) {
%r = shl <16 x i8> %v, %shift
ret <16 x i8> %r
}

; CHECK-LABEL: shl_abs:
; CHECK-NEXT: .functype shl_abs (v128, i32) -> (v128)
; CHECK-NEXT: i8x16.extract_lane_u $push8=, $0, 0
; CHECK-NEXT: i8x16.splat $push0=, $1
; CHECK-NEXT: i8x16.abs $push98=, $pop0
; CHECK-NEXT: local.tee $push97=, $2=, $pop98
; CHECK-NEXT: i8x16.extract_lane_u $push6=, $pop97, 0
; CHECK-NEXT: i32.const $push2=, 7
; CHECK-NEXT: i32.and $push7=, $pop6, $pop2
; CHECK-NEXT: i32.shl $push9=, $pop8, $pop7
; CHECK-NEXT: i8x16.splat $push10=, $pop9
; CHECK-NEXT: i8x16.extract_lane_u $push4=, $0, 1
; CHECK-NEXT: i8x16.extract_lane_u $push1=, $2, 1
; CHECK-NEXT: i32.const $push96=, 7
; CHECK-NEXT: i32.and $push3=, $pop1, $pop96
; CHECK-NEXT: i32.shl $push5=, $pop4, $pop3
; CHECK-NEXT: i8x16.replace_lane $push11=, $pop10, 1, $pop5
; ...
; CHECK: i8x16.extract_lane_u $push79=, $0, 15
; CHECK-NEXT: i8x16.extract_lane_u $push77=, $2, 15
; CHECK-NEXT: i32.const $push82=, 7
; CHECK-NEXT: i32.and $push78=, $pop77, $pop82
; CHECK-NEXT: i32.shl $push80=, $pop79, $pop78
; CHECK-NEXT: i8x16.replace_lane $push81=, $pop76, 15, $pop80
; CHECK-NEXT: return $pop81
define <16 x i8> @shl_abs(<16 x i8> %v, i8 %a) {
%t1 = insertelement <16 x i8> undef, i8 %a, i32 0
%va = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer
%nva = sub <16 x i8> zeroinitializer, %va
%c = icmp sgt <16 x i8> %va, zeroinitializer
%shift = select <16 x i1> %c, <16 x i8> %va, <16 x i8> %nva
%r = shl <16 x i8> %v, %shift
ret <16 x i8> %r
}

; CHECK-LABEL: shl_abs_add:
; CHECK-NEXT: .functype shl_abs_add (v128, i32, i32) -> (v128)
; CHECK-NEXT: i8x16.extract_lane_u $push11=, $0, 0
; CHECK-NEXT: i8x16.splat $push1=, $1
; CHECK-NEXT: i8x16.splat $push0=, $2
; CHECK-NEXT: i8x16.add $push2=, $pop1, $pop0
; CHECK-NEXT: v8x16.shuffle $push3=, $pop2, $0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: i8x16.abs $push101=, $pop3
; CHECK-NEXT: local.tee $push100=, $3=, $pop101
; CHECK-NEXT: i8x16.extract_lane_u $push9=, $pop100, 0
; CHECK-NEXT: i32.const $push5=, 7
; CHECK-NEXT: i32.and $push10=, $pop9, $pop5
; CHECK-NEXT: i32.shl $push12=, $pop11, $pop10
; CHECK-NEXT: i8x16.splat $push13=, $pop12
; CHECK-NEXT: i8x16.extract_lane_u $push7=, $0, 1
; CHECK-NEXT: i8x16.extract_lane_u $push4=, $3, 1
; CHECK-NEXT: i32.const $push99=, 7
; CHECK-NEXT: i32.and $push6=, $pop4, $pop99
; CHECK-NEXT: i32.shl $push8=, $pop7, $pop6
; CHECK-NEXT: i8x16.replace_lane $push14=, $pop13, 1, $pop8
; ...
; CHECK: i8x16.extract_lane_u $push82=, $0, 15
; CHECK-NEXT: i8x16.extract_lane_u $push80=, $3, 15
; CHECK-NEXT: i32.const $push85=, 7
; CHECK-NEXT: i32.and $push81=, $pop80, $pop85
; CHECK-NEXT: i32.shl $push83=, $pop82, $pop81
; CHECK-NEXT: i8x16.replace_lane $push84=, $pop79, 15, $pop83
; CHECK-NEXT: return $pop84
define <16 x i8> @shl_abs_add(<16 x i8> %v, i8 %a, i8 %b) {
%t1 = insertelement <16 x i8> undef, i8 %a, i32 0
%va = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer
%t2 = insertelement <16 x i8> undef, i8 %b, i32 0
%vb = shufflevector <16 x i8> %t2, <16 x i8> undef, <16 x i32> zeroinitializer
%vadd = add <16 x i8> %va, %vb
%nvadd = sub <16 x i8> zeroinitializer, %vadd
%c = icmp sgt <16 x i8> %vadd, zeroinitializer
%shift = select <16 x i1> %c, <16 x i8> %vadd, <16 x i8> %nvadd
%r = shl <16 x i8> %v, %shift
ret <16 x i8> %r
}

0 comments on commit 043eaa9

Please sign in to comment.