Skip to content

Commit

Permalink
[PowerPC] Fix the xxperm swap requirements
Browse files Browse the repository at this point in the history
This patch is to fix the xxperm vector operand swap condition so that the
single-use operand is in V2 to prevent copying, it also fixes the subtarget
condition to exploit the xpperm.

Reviewed By: stefanp

Differential Revision: https://reviews.llvm.org/D146632
  • Loading branch information
Maryam Moghadas committed Apr 6, 2023
1 parent 8c88565 commit cf0395f
Show file tree
Hide file tree
Showing 5 changed files with 351 additions and 269 deletions.
10 changes: 6 additions & 4 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -10228,14 +10228,16 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
if (isLittleEndian)
std::swap(V1, V2);

if (Subtarget.isISA3_0() && (V1->hasOneUse() || V2->hasOneUse())) {
if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
(V1->hasOneUse() || V2->hasOneUse())) {
LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
"XXPERM instead\n");
Opcode = PPCISD::XXPERM;

// if V2 is dead, then we swap V1 and V2 so we can
// use V2 as the destination instead.
if (!V1->hasOneUse() && V2->hasOneUse()) {
// The second input to XXPERM is also an output so if the second input has
// multiple uses then copying is necessary, as a result we want the
// single-use operand to be used as the second input to prevent copying.
if (!V2->hasOneUse() && V1->hasOneUse()) {
std::swap(V1, V2);
NeedSwap = !NeedSwap;
}
Expand Down
191 changes: 89 additions & 102 deletions llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
Expand Up @@ -137,85 +137,81 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9LE-LABEL: test32:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: add 5, 3, 4
; P9LE-NEXT: lfiwzx 0, 3, 4
; P9LE-NEXT: lxsiwzx 2, 3, 4
; P9LE-NEXT: addis 3, 2, .LCPI1_0@toc@ha
; P9LE-NEXT: xxlxor 3, 3, 3
; P9LE-NEXT: xxlxor 0, 0, 0
; P9LE-NEXT: vspltisw 4, 8
; P9LE-NEXT: addi 3, 3, .LCPI1_0@toc@l
; P9LE-NEXT: lxv 1, 0(3)
; P9LE-NEXT: li 3, 4
; P9LE-NEXT: xxlxor 2, 2, 2
; P9LE-NEXT: vadduwm 4, 4, 4
; P9LE-NEXT: xxperm 3, 0, 1
; P9LE-NEXT: lfiwzx 0, 5, 3
; P9LE-NEXT: lxsiwzx 3, 5, 3
; P9LE-NEXT: xxperm 2, 0, 1
; P9LE-NEXT: vnegw 2, 2
; P9LE-NEXT: vslw 2, 2, 4
; P9LE-NEXT: vsubuwm 2, 2, 3
; P9LE-NEXT: xxperm 3, 0, 1
; P9LE-NEXT: vnegw 3, 3
; P9LE-NEXT: vslw 3, 3, 4
; P9LE-NEXT: vsubuwm 2, 3, 2
; P9LE-NEXT: xxswapd 0, 2
; P9LE-NEXT: stxv 0, 0(3)
; P9LE-NEXT: blr
;
; P9BE-LABEL: test32:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: add 5, 3, 4
; P9BE-NEXT: lfiwzx 0, 3, 4
; P9BE-NEXT: lxsiwzx 2, 3, 4
; P9BE-NEXT: addis 3, 2, .LCPI1_0@toc@ha
; P9BE-NEXT: xxlxor 3, 3, 3
; P9BE-NEXT: xxlxor 0, 0, 0
; P9BE-NEXT: vspltisw 4, 8
; P9BE-NEXT: addi 3, 3, .LCPI1_0@toc@l
; P9BE-NEXT: lxv 1, 0(3)
; P9BE-NEXT: li 3, 4
; P9BE-NEXT: xxlxor 2, 2, 2
; P9BE-NEXT: vadduwm 4, 4, 4
; P9BE-NEXT: xxperm 3, 0, 1
; P9BE-NEXT: lfiwzx 0, 5, 3
; P9BE-NEXT: lxsiwzx 3, 5, 3
; P9BE-NEXT: xxperm 2, 0, 1
; P9BE-NEXT: vnegw 2, 2
; P9BE-NEXT: vslw 2, 2, 4
; P9BE-NEXT: vsubuwm 2, 2, 3
; P9BE-NEXT: xxperm 3, 0, 1
; P9BE-NEXT: vnegw 3, 3
; P9BE-NEXT: vslw 3, 3, 4
; P9BE-NEXT: vsubuwm 2, 3, 2
; P9BE-NEXT: xxswapd 0, 2
; P9BE-NEXT: stxv 0, 0(3)
; P9BE-NEXT: blr
;
; P9BE-AIX-LABEL: test32:
; P9BE-AIX: # %bb.0: # %entry
; P9BE-AIX-NEXT: add 5, 3, 4
; P9BE-AIX-NEXT: lfiwzx 0, 3, 4
; P9BE-AIX-NEXT: lxsiwzx 2, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.0
; P9BE-AIX-NEXT: xxlxor 3, 3, 3
; P9BE-AIX-NEXT: xxlxor 2, 2, 2
; P9BE-AIX-NEXT: xxlxor 0, 0, 0
; P9BE-AIX-NEXT: vspltisw 4, 8
; P9BE-AIX-NEXT: vadduwm 4, 4, 4
; P9BE-AIX-NEXT: lxv 1, 0(3)
; P9BE-AIX-NEXT: li 3, 4
; P9BE-AIX-NEXT: xxperm 3, 0, 1
; P9BE-AIX-NEXT: lfiwzx 0, 5, 3
; P9BE-AIX-NEXT: vadduwm 4, 4, 4
; P9BE-AIX-NEXT: lxsiwzx 3, 5, 3
; P9BE-AIX-NEXT: xxperm 2, 0, 1
; P9BE-AIX-NEXT: vnegw 2, 2
; P9BE-AIX-NEXT: vslw 2, 2, 4
; P9BE-AIX-NEXT: vsubuwm 2, 2, 3
; P9BE-AIX-NEXT: xxperm 3, 0, 1
; P9BE-AIX-NEXT: vnegw 3, 3
; P9BE-AIX-NEXT: vslw 3, 3, 4
; P9BE-AIX-NEXT: vsubuwm 2, 3, 2
; P9BE-AIX-NEXT: xxswapd 0, 2
; P9BE-AIX-NEXT: stxv 0, 0(3)
; P9BE-AIX-NEXT: blr
;
; P9BE-AIX32-LABEL: test32:
; P9BE-AIX32: # %bb.0: # %entry
; P9BE-AIX32-NEXT: add 5, 3, 4
; P9BE-AIX32-NEXT: lfiwzx 0, 3, 4
; P9BE-AIX32-NEXT: lxsiwzx 2, 3, 4
; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0
; P9BE-AIX32-NEXT: xxlxor 3, 3, 3
; P9BE-AIX32-NEXT: xxlxor 2, 2, 2
; P9BE-AIX32-NEXT: xxlxor 0, 0, 0
; P9BE-AIX32-NEXT: vspltisw 4, 8
; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
; P9BE-AIX32-NEXT: lxv 1, 0(3)
; P9BE-AIX32-NEXT: li 3, 4
; P9BE-AIX32-NEXT: xxperm 3, 0, 1
; P9BE-AIX32-NEXT: lfiwzx 0, 5, 3
; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
; P9BE-AIX32-NEXT: lxsiwzx 3, 5, 3
; P9BE-AIX32-NEXT: xxperm 2, 0, 1
; P9BE-AIX32-NEXT: vnegw 2, 2
; P9BE-AIX32-NEXT: vslw 2, 2, 4
; P9BE-AIX32-NEXT: vsubuwm 2, 2, 3
; P9BE-AIX32-NEXT: xxperm 3, 0, 1
; P9BE-AIX32-NEXT: vnegw 3, 3
; P9BE-AIX32-NEXT: vslw 3, 3, 4
; P9BE-AIX32-NEXT: vsubuwm 2, 3, 2
; P9BE-AIX32-NEXT: xxswapd 0, 2
; P9BE-AIX32-NEXT: stxv 0, 0(3)
; P9BE-AIX32-NEXT: blr
Expand Down Expand Up @@ -272,25 +268,24 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-NEXT: sldi 4, 4, 1
; P9BE-NEXT: li 7, 16
; P9BE-NEXT: add 6, 3, 4
; P9BE-NEXT: lxsihzx 0, 6, 7
; P9BE-NEXT: lxsihzx 1, 3, 4
; P9BE-NEXT: addis 3, 2, .LCPI2_1@toc@ha
; P9BE-NEXT: lxsihzx 2, 6, 7
; P9BE-NEXT: addis 6, 2, .LCPI2_0@toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI2_1@toc@l
; P9BE-NEXT: addi 6, 6, .LCPI2_0@toc@l
; P9BE-NEXT: lxv 1, 0(6)
; P9BE-NEXT: lxv 0, 0(6)
; P9BE-NEXT: li 6, 0
; P9BE-NEXT: mtvsrwz 2, 6
; P9BE-NEXT: vmr 3, 2
; P9BE-NEXT: vsplth 4, 2, 3
; P9BE-NEXT: xxperm 3, 0, 1
; P9BE-NEXT: lxsihzx 0, 3, 4
; P9BE-NEXT: addis 3, 2, .LCPI2_1@toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI2_1@toc@l
; P9BE-NEXT: xxperm 2, 0, 1
; P9BE-NEXT: mtvsrwz 3, 6
; P9BE-NEXT: xxperm 2, 3, 0
; P9BE-NEXT: xxperm 1, 3, 0
; P9BE-NEXT: vsplth 3, 3, 3
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: li 3, 0
; P9BE-NEXT: xxmrghw 2, 4, 2
; P9BE-NEXT: xxperm 3, 2, 0
; P9BE-NEXT: xxspltw 2, 3, 1
; P9BE-NEXT: vadduwm 2, 3, 2
; P9BE-NEXT: xxmrghw 3, 3, 1
; P9BE-NEXT: xxperm 2, 3, 0
; P9BE-NEXT: xxspltw 3, 2, 1
; P9BE-NEXT: vadduwm 2, 2, 3
; P9BE-NEXT: vextuwlx 3, 3, 2
; P9BE-NEXT: cmpw 3, 5
; P9BE-NEXT: bgelr+ 0
Expand All @@ -301,23 +296,22 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-AIX-NEXT: sldi 4, 4, 1
; P9BE-AIX-NEXT: li 7, 16
; P9BE-AIX-NEXT: add 6, 3, 4
; P9BE-AIX-NEXT: lxsihzx 0, 6, 7
; P9BE-AIX-NEXT: ld 6, L..C3(2) # %const.0
; P9BE-AIX-NEXT: lxv 1, 0(6)
; P9BE-AIX-NEXT: lxsihzx 1, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C3(2) # %const.1
; P9BE-AIX-NEXT: lxsihzx 2, 6, 7
; P9BE-AIX-NEXT: ld 6, L..C4(2) # %const.0
; P9BE-AIX-NEXT: lxv 0, 0(6)
; P9BE-AIX-NEXT: li 6, 0
; P9BE-AIX-NEXT: mtvsrwz 2, 6
; P9BE-AIX-NEXT: vmr 3, 2
; P9BE-AIX-NEXT: vsplth 4, 2, 3
; P9BE-AIX-NEXT: xxperm 3, 0, 1
; P9BE-AIX-NEXT: lxsihzx 0, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C4(2) # %const.1
; P9BE-AIX-NEXT: xxperm 2, 0, 1
; P9BE-AIX-NEXT: mtvsrwz 3, 6
; P9BE-AIX-NEXT: xxperm 2, 3, 0
; P9BE-AIX-NEXT: xxperm 1, 3, 0
; P9BE-AIX-NEXT: vsplth 3, 3, 3
; P9BE-AIX-NEXT: lxv 0, 0(3)
; P9BE-AIX-NEXT: li 3, 0
; P9BE-AIX-NEXT: xxmrghw 2, 4, 2
; P9BE-AIX-NEXT: xxperm 3, 2, 0
; P9BE-AIX-NEXT: xxspltw 2, 3, 1
; P9BE-AIX-NEXT: vadduwm 2, 3, 2
; P9BE-AIX-NEXT: xxmrghw 3, 3, 1
; P9BE-AIX-NEXT: xxperm 2, 3, 0
; P9BE-AIX-NEXT: xxspltw 3, 2, 1
; P9BE-AIX-NEXT: vadduwm 2, 2, 3
; P9BE-AIX-NEXT: vextuwlx 3, 3, 2
; P9BE-AIX-NEXT: cmpw 3, 5
; P9BE-AIX-NEXT: bgelr+ 0
Expand Down Expand Up @@ -413,29 +407,26 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: add 6, 3, 4
; P9BE-NEXT: li 7, 8
; P9BE-NEXT: lxsibzx 3, 3, 4
; P9BE-NEXT: addis 3, 2, .LCPI3_1@toc@ha
; P9BE-NEXT: lxsibzx 0, 6, 7
; P9BE-NEXT: addis 6, 2, .LCPI3_0@toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI3_1@toc@l
; P9BE-NEXT: addi 6, 6, .LCPI3_0@toc@l
; P9BE-NEXT: lxv 1, 0(6)
; P9BE-NEXT: li 6, 0
; P9BE-NEXT: mtvsrwz 2, 6
; P9BE-NEXT: vspltb 3, 2, 7
; P9BE-NEXT: xxperm 0, 2, 1
; P9BE-NEXT: lxsibzx 1, 3, 4
; P9BE-NEXT: addis 3, 2, .LCPI3_1@toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI3_1@toc@l
; P9BE-NEXT: lxv 2, 0(3)
; P9BE-NEXT: addis 3, 2, .LCPI3_2@toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI3_2@toc@l
; P9BE-NEXT: xxperm 2, 1, 2
; P9BE-NEXT: xxspltw 1, 3, 0
; P9BE-NEXT: vmrghh 2, 2, 3
; P9BE-NEXT: xxmrghw 2, 2, 0
; P9BE-NEXT: xxperm 3, 2, 1
; P9BE-NEXT: vspltb 2, 2, 7
; P9BE-NEXT: vmrghh 3, 3, 2
; P9BE-NEXT: xxspltw 1, 2, 0
; P9BE-NEXT: xxmrghw 3, 3, 0
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: li 3, 0
; P9BE-NEXT: xxperm 2, 1, 0
; P9BE-NEXT: xxspltw 3, 2, 1
; P9BE-NEXT: vadduwm 2, 2, 3
; P9BE-NEXT: xxperm 3, 1, 0
; P9BE-NEXT: xxspltw 2, 3, 1
; P9BE-NEXT: vadduwm 2, 3, 2
; P9BE-NEXT: vextuwlx 3, 3, 2
; P9BE-NEXT: cmpw 3, 5
; P9BE-NEXT: bgelr+ 0
Expand All @@ -445,26 +436,24 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9BE-AIX: # %bb.0: # %entry
; P9BE-AIX-NEXT: add 6, 3, 4
; P9BE-AIX-NEXT: li 7, 8
; P9BE-AIX-NEXT: lxsibzx 3, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C5(2) # %const.1
; P9BE-AIX-NEXT: lxsibzx 0, 6, 7
; P9BE-AIX-NEXT: ld 6, L..C5(2) # %const.0
; P9BE-AIX-NEXT: ld 6, L..C6(2) # %const.0
; P9BE-AIX-NEXT: lxv 1, 0(6)
; P9BE-AIX-NEXT: li 6, 0
; P9BE-AIX-NEXT: mtvsrwz 2, 6
; P9BE-AIX-NEXT: vspltb 3, 2, 7
; P9BE-AIX-NEXT: xxperm 0, 2, 1
; P9BE-AIX-NEXT: lxsibzx 1, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C6(2) # %const.1
; P9BE-AIX-NEXT: lxv 2, 0(3)
; P9BE-AIX-NEXT: ld 3, L..C7(2) # %const.2
; P9BE-AIX-NEXT: xxperm 2, 1, 2
; P9BE-AIX-NEXT: xxspltw 1, 3, 0
; P9BE-AIX-NEXT: vmrghh 2, 2, 3
; P9BE-AIX-NEXT: xxmrghw 2, 2, 0
; P9BE-AIX-NEXT: xxperm 3, 2, 1
; P9BE-AIX-NEXT: vspltb 2, 2, 7
; P9BE-AIX-NEXT: vmrghh 3, 3, 2
; P9BE-AIX-NEXT: xxspltw 1, 2, 0
; P9BE-AIX-NEXT: xxmrghw 3, 3, 0
; P9BE-AIX-NEXT: lxv 0, 0(3)
; P9BE-AIX-NEXT: li 3, 0
; P9BE-AIX-NEXT: xxperm 2, 1, 0
; P9BE-AIX-NEXT: xxspltw 3, 2, 1
; P9BE-AIX-NEXT: vadduwm 2, 2, 3
; P9BE-AIX-NEXT: xxperm 3, 1, 0
; P9BE-AIX-NEXT: xxspltw 2, 3, 1
; P9BE-AIX-NEXT: vadduwm 2, 3, 2
; P9BE-AIX-NEXT: vextuwlx 3, 3, 2
; P9BE-AIX-NEXT: cmpw 3, 5
; P9BE-AIX-NEXT: bgelr+ 0
Expand All @@ -474,25 +463,23 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9BE-AIX32: # %bb.0: # %entry
; P9BE-AIX32-NEXT: add 6, 3, 4
; P9BE-AIX32-NEXT: li 7, 8
; P9BE-AIX32-NEXT: lxsibzx 3, 3, 4
; P9BE-AIX32-NEXT: lwz 3, L..C4(2) # %const.1
; P9BE-AIX32-NEXT: lxsibzx 0, 6, 7
; P9BE-AIX32-NEXT: lwz 6, L..C4(2) # %const.0
; P9BE-AIX32-NEXT: lwz 6, L..C5(2) # %const.0
; P9BE-AIX32-NEXT: lxv 1, 0(6)
; P9BE-AIX32-NEXT: li 6, 0
; P9BE-AIX32-NEXT: mtvsrwz 2, 6
; P9BE-AIX32-NEXT: vspltb 3, 2, 7
; P9BE-AIX32-NEXT: xxperm 0, 2, 1
; P9BE-AIX32-NEXT: lxsibzx 1, 3, 4
; P9BE-AIX32-NEXT: lwz 3, L..C5(2) # %const.1
; P9BE-AIX32-NEXT: lxv 2, 0(3)
; P9BE-AIX32-NEXT: lwz 3, L..C6(2) # %const.2
; P9BE-AIX32-NEXT: xxperm 2, 1, 2
; P9BE-AIX32-NEXT: xxspltw 1, 3, 0
; P9BE-AIX32-NEXT: vmrghh 2, 2, 3
; P9BE-AIX32-NEXT: xxmrghw 2, 2, 0
; P9BE-AIX32-NEXT: xxperm 3, 2, 1
; P9BE-AIX32-NEXT: vspltb 2, 2, 7
; P9BE-AIX32-NEXT: vmrghh 3, 3, 2
; P9BE-AIX32-NEXT: xxspltw 1, 2, 0
; P9BE-AIX32-NEXT: xxmrghw 3, 3, 0
; P9BE-AIX32-NEXT: lxv 0, 0(3)
; P9BE-AIX32-NEXT: xxperm 2, 1, 0
; P9BE-AIX32-NEXT: xxspltw 3, 2, 1
; P9BE-AIX32-NEXT: vadduwm 2, 2, 3
; P9BE-AIX32-NEXT: xxperm 3, 1, 0
; P9BE-AIX32-NEXT: xxspltw 2, 3, 1
; P9BE-AIX32-NEXT: vadduwm 2, 3, 2
; P9BE-AIX32-NEXT: stxv 2, -16(1)
; P9BE-AIX32-NEXT: lwz 3, -16(1)
; P9BE-AIX32-NEXT: cmpw 3, 5
Expand Down

0 comments on commit cf0395f

Please sign in to comment.