Skip to content

Commit

Permalink
[PowerPC] Fix vperm codegen
Browse files Browse the repository at this point in the history
Commit rG934d5fa2b8672695c335deed0e19d0e777c98403 changed the vperm codegen
for cases that vperm is not replaced by xxperm, this patch is to revert that.

Reviewed By: stefanp

Differential Revision: https://reviews.llvm.org/D138736
  • Loading branch information
Maryam Moghadas committed Nov 29, 2022
1 parent ca856ff commit 7614ba0
Show file tree
Hide file tree
Showing 13 changed files with 140 additions and 131 deletions.
62 changes: 32 additions & 30 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -10186,9 +10186,6 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
}
}

bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;

// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
// that it is in input element units, not in bytes. Convert now.

Expand All @@ -10199,6 +10196,9 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
EVT EltVT = V1.getValueType().getVectorElementType();
unsigned BytesPerElement = EltVT.getSizeInBits() / 8;

bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;

/*
Vectors will be appended like so: [ V1 | v2 ]
XXSWAPD on V1:
Expand All @@ -10219,24 +10219,27 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];

if (V1HasXXSWAPD) {
if (SrcElt < 8)
SrcElt += 8;
else if (SrcElt < 16)
SrcElt -= 8;
}
if (V2HasXXSWAPD) {
if (SrcElt > 23)
SrcElt -= 8;
else if (SrcElt > 15)
SrcElt += 8;
}
if (NeedSwap) {
if (SrcElt < 16)
SrcElt += 16;
else
SrcElt -= 16;
if (Opcode == PPCISD::XXPERM) {
if (V1HasXXSWAPD) {
if (SrcElt < 8)
SrcElt += 8;
else if (SrcElt < 16)
SrcElt -= 8;
}
if (V2HasXXSWAPD) {
if (SrcElt > 23)
SrcElt -= 8;
else if (SrcElt > 15)
SrcElt += 8;
}
if (NeedSwap) {
if (SrcElt < 16)
SrcElt += 16;
else
SrcElt -= 16;
}
}

for (unsigned j = 0; j != BytesPerElement; ++j)
if (isLittleEndian)
ResultMask.push_back(
Expand All @@ -10246,16 +10249,15 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
}

if (V1HasXXSWAPD) {
dl = SDLoc(V1->getOperand(0));
V1 = V1->getOperand(0)->getOperand(1);
}
if (V2HasXXSWAPD) {
dl = SDLoc(V2->getOperand(0));
V2 = V2->getOperand(0)->getOperand(1);
}

if (V1HasXXSWAPD || V2HasXXSWAPD || Opcode == PPCISD::XXPERM) {
if (Opcode == PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD)) {
if (V1HasXXSWAPD) {
dl = SDLoc(V1->getOperand(0));
V1 = V1->getOperand(0)->getOperand(1);
}
if (V2HasXXSWAPD) {
dl = SDLoc(V2->getOperand(0));
V2 = V2->getOperand(0)->getOperand(1);
}
if (isPPC64 && ValType != MVT::v2f64)
V1 = DAG.getBitcast(MVT::v2f64, V1);
if (isPPC64 && V2.getValueType() != MVT::v2f64)
Expand Down
36 changes: 20 additions & 16 deletions llvm/test/CodeGen/PowerPC/build-vector-tests.ll
Expand Up @@ -1058,14 +1058,15 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
;
; P8LE-LABEL: fromDiffMemVarDi:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r5, r2, .LCPI9_0@toc@ha
; P8LE-NEXT: sldi r4, r4, 2
; P8LE-NEXT: addi r5, r5, .LCPI9_0@toc@l
; P8LE-NEXT: addis r5, r2, .LCPI9_0@toc@ha
; P8LE-NEXT: add r3, r3, r4
; P8LE-NEXT: lxvd2x vs0, 0, r5
; P8LE-NEXT: addi r4, r5, .LCPI9_0@toc@l
; P8LE-NEXT: addi r3, r3, -12
; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: xxswapd v3, vs0
; P8LE-NEXT: lxvd2x vs1, 0, r4
; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: blr
entry:
Expand Down Expand Up @@ -1478,10 +1479,11 @@ define <4 x i32> @fromDiffMemConsDConvftoi(ptr nocapture readonly %ptr) {
; P8LE-LABEL: fromDiffMemConsDConvftoi:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r4, r2, .LCPI18_0@toc@ha
; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: addi r4, r4, .LCPI18_0@toc@l
; P8LE-NEXT: lxvd2x vs0, 0, r4
; P8LE-NEXT: xxswapd v3, vs0
; P8LE-NEXT: lxvd2x vs1, 0, r4
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: xvcvspsxws v2, v2
; P8LE-NEXT: blr
Expand Down Expand Up @@ -2578,14 +2580,15 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
;
; P8LE-LABEL: fromDiffMemVarDui:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r5, r2, .LCPI41_0@toc@ha
; P8LE-NEXT: sldi r4, r4, 2
; P8LE-NEXT: addi r5, r5, .LCPI41_0@toc@l
; P8LE-NEXT: addis r5, r2, .LCPI41_0@toc@ha
; P8LE-NEXT: add r3, r3, r4
; P8LE-NEXT: lxvd2x vs0, 0, r5
; P8LE-NEXT: addi r4, r5, .LCPI41_0@toc@l
; P8LE-NEXT: addi r3, r3, -12
; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: xxswapd v3, vs0
; P8LE-NEXT: lxvd2x vs1, 0, r4
; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: blr
entry:
Expand Down Expand Up @@ -2998,10 +3001,11 @@ define <4 x i32> @fromDiffMemConsDConvftoui(ptr nocapture readonly %ptr) {
; P8LE-LABEL: fromDiffMemConsDConvftoui:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r4, r2, .LCPI50_0@toc@ha
; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: addi r4, r4, .LCPI50_0@toc@l
; P8LE-NEXT: lxvd2x vs0, 0, r4
; P8LE-NEXT: xxswapd v3, vs0
; P8LE-NEXT: lxvd2x vs1, 0, r4
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: xvcvspuxws v2, v2
; P8LE-NEXT: blr
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
Expand Up @@ -491,10 +491,10 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_
; CHECK-P9-BE: # %bb.0: # %entry
; CHECK-P9-BE-NEXT: lxsd v2, 0(r3)
; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI12_0@toc@ha
; CHECK-P9-BE-NEXT: xxlxor vs0, vs0, vs0
; CHECK-P9-BE-NEXT: xxlxor vs1, vs1, vs1
; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI12_0@toc@l
; CHECK-P9-BE-NEXT: lxv vs1, 0(r3)
; CHECK-P9-BE-NEXT: xxperm v2, vs0, vs1
; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
; CHECK-P9-BE-NEXT: xxperm v2, vs1, vs0
; CHECK-P9-BE-NEXT: blr
;
; CHECK-NOVSX-LABEL: testmrglb3:
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
Expand Up @@ -66,11 +66,11 @@ define dso_local <4 x i16> @shufflevector_combine(<4 x i32> %0) #0 {
; BE-LABEL: shufflevector_combine:
; BE: # %bb.0: # %newFuncRoot
; BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; BE-NEXT: xxlxor vs0, vs0, vs0
; BE-NEXT: xxlxor vs1, vs1, vs1
; BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
; BE-NEXT: lxv vs1, 0(r3)
; BE-NEXT: lxv vs0, 0(r3)
; BE-NEXT: li r3, 0
; BE-NEXT: xxperm v2, vs0, vs1
; BE-NEXT: xxperm v2, vs1, vs0
; BE-NEXT: vinsw v2, r3, 8
; BE-NEXT: vpkuwum v2, v2, v2
; BE-NEXT: blr
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
Expand Up @@ -44,15 +44,15 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-NEXT: add 5, 3, 4
; P9BE-NEXT: lxsdx 2, 3, 4
; P9BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; P9BE-NEXT: xxlxor 0, 0, 0
; P9BE-NEXT: xxlxor 1, 1, 1
; P9BE-NEXT: vspltisw 4, 8
; P9BE-NEXT: lxsd 3, 4(5)
; P9BE-NEXT: addi 3, 3, .LCPI0_0@toc@l
; P9BE-NEXT: vadduwm 4, 4, 4
; P9BE-NEXT: lxv 1, 0(3)
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI0_1@toc@l
; P9BE-NEXT: xxperm 2, 0, 1
; P9BE-NEXT: xxperm 2, 1, 0
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: xxperm 3, 3, 0
; P9BE-NEXT: vnegw 3, 3
Expand Down Expand Up @@ -285,10 +285,10 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-NEXT: addis 3, 2, .LCPI2_1@toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI2_1@toc@l
; P9BE-NEXT: xxperm 2, 0, 1
; P9BE-NEXT: lxv 1, 0(3)
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: li 3, 0
; P9BE-NEXT: xxmrghw 0, 4, 2
; P9BE-NEXT: xxperm 3, 0, 1
; P9BE-NEXT: xxmrghw 2, 4, 2
; P9BE-NEXT: xxperm 3, 2, 0
; P9BE-NEXT: xxspltw 2, 3, 1
; P9BE-NEXT: vadduwm 2, 3, 2
; P9BE-NEXT: vextuwlx 3, 3, 2
Expand All @@ -312,10 +312,10 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-AIX-NEXT: lxsihzx 0, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C4(2) # %const.1
; P9BE-AIX-NEXT: xxperm 2, 0, 1
; P9BE-AIX-NEXT: lxv 1, 0(3)
; P9BE-AIX-NEXT: lxv 0, 0(3)
; P9BE-AIX-NEXT: li 3, 0
; P9BE-AIX-NEXT: xxmrghw 0, 4, 2
; P9BE-AIX-NEXT: xxperm 3, 0, 1
; P9BE-AIX-NEXT: xxmrghw 2, 4, 2
; P9BE-AIX-NEXT: xxperm 3, 2, 0
; P9BE-AIX-NEXT: xxspltw 2, 3, 1
; P9BE-AIX-NEXT: vadduwm 2, 3, 2
; P9BE-AIX-NEXT: vextuwlx 3, 3, 2
Expand Down Expand Up @@ -395,13 +395,13 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9LE-NEXT: vmrghb 2, 3, 2
; P9LE-NEXT: addi 3, 3, .LCPI3_0@toc@l
; P9LE-NEXT: vmrglh 2, 2, 4
; P9LE-NEXT: lxv 1, 0(3)
; P9LE-NEXT: lxv 0, 0(3)
; P9LE-NEXT: li 3, 0
; P9LE-NEXT: vmrghb 3, 3, 5
; P9LE-NEXT: xxmrglw 2, 2, 4
; P9LE-NEXT: vmrglh 3, 3, 4
; P9LE-NEXT: xxmrglw 0, 4, 3
; P9LE-NEXT: xxperm 2, 0, 1
; P9LE-NEXT: xxmrglw 3, 4, 3
; P9LE-NEXT: xxperm 2, 3, 0
; P9LE-NEXT: xxspltw 3, 2, 2
; P9LE-NEXT: vadduwm 2, 2, 3
; P9LE-NEXT: vextuwrx 3, 3, 2
Expand Down
14 changes: 8 additions & 6 deletions llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
Expand Up @@ -184,11 +184,12 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_none_v16i8:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha
; CHECK-LE-P8-NEXT: lxvd2x v2, 0, r4
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: mtvsrd v4, r3
; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-LE-P8-NEXT: xxswapd v2, vs0
; CHECK-LE-P8-NEXT: xxswapd v3, vs1
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT: blr
;
Expand Down Expand Up @@ -431,11 +432,12 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_none_v8i16:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha
; CHECK-LE-P8-NEXT: lxvd2x v2, 0, r4
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: mtvsrd v4, r3
; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-LE-P8-NEXT: xxswapd v2, vs0
; CHECK-LE-P8-NEXT: xxswapd v3, vs1
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT: blr
;
Expand Down
37 changes: 19 additions & 18 deletions llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
Expand Up @@ -389,10 +389,10 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; CHECK-LE-P9-NEXT: xxlxor vs1, vs1, vs1
; CHECK-LE-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l
; CHECK-LE-P9-NEXT: lxv vs2, 0(r3)
; CHECK-LE-P9-NEXT: xxperm vs0, vs1, vs2
; CHECK-LE-P9-NEXT: lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
Expand All @@ -411,10 +411,10 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; CHECK-BE-P9-NEXT: xxlxor vs1, vs1, vs1
; CHECK-BE-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l
; CHECK-BE-P9-NEXT: lxv vs2, 0(r3)
; CHECK-BE-P9-NEXT: xxperm vs0, vs1, vs2
; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT: blr
;
Expand Down Expand Up @@ -470,14 +470,15 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-LE-P8-LABEL: test_none_v2i64:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3
; CHECK-LE-P8-NEXT: lxvd2x v3, 0, r4
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha
; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT: xxswapd v4, vs0
; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: xxswapd v4, vs1
; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4
Expand Down Expand Up @@ -544,10 +545,10 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3)
; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4)
; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4)
; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3
; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT: xxperm v2, vs1, vs0
; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
; CHECK-AIX-64-P9-NEXT: blr
Expand Down Expand Up @@ -603,10 +604,10 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lfd f0, 0(r3)
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha
; CHECK-LE-P9-NEXT: xxlxor vs1, vs1, vs1
; CHECK-LE-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l
; CHECK-LE-P9-NEXT: lxv vs2, 0(r3)
; CHECK-LE-P9-NEXT: xxperm vs0, vs1, vs2
; CHECK-LE-P9-NEXT: lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
Expand All @@ -625,10 +626,10 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lfd f0, 0(r3)
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha
; CHECK-BE-P9-NEXT: xxlxor vs1, vs1, vs1
; CHECK-BE-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l
; CHECK-BE-P9-NEXT: lxv vs2, 0(r3)
; CHECK-BE-P9-NEXT: xxperm vs0, vs1, vs2
; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT: blr
;
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/PowerPC/vec-itofp.ll
Expand Up @@ -203,13 +203,13 @@ define void @test2(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
;
; CHECK-P9-LABEL: test2:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: lxv vs0, 0(r4)
; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha
; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0
; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l
; CHECK-P9-NEXT: lxv vs2, 0(r4)
; CHECK-P9-NEXT: xxperm vs1, vs0, vs2
; CHECK-P9-NEXT: xvcvuxddp vs0, vs1
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-P9-NEXT: xvcvuxddp vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
Expand Down

0 comments on commit 7614ba0

Please sign in to comment.