Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 39 additions & 2 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ using namespace llvm;

#define DEBUG_TYPE "ppc-lowering"

static cl::opt<bool> DisableP10StoreForward(
"disable-p10-store-forward",
cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
cl::init(false));

static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);

Expand Down Expand Up @@ -985,6 +990,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
// LE is P8+/64-bit so direct moves are supported and these operations
// are legal. The custom transformation requires 64-bit since we need a
// pair of stores that will cover a 128-bit load for P10.
if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
}

setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
Expand Down Expand Up @@ -11479,9 +11492,33 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

SDValue Val = Op.getOperand(0);
EVT ValVT = Val.getValueType();
// P10 hardware store forwarding requires that a single store contains all
// the data for the load. P10 is able to merge a pair of adjacent stores. Try
// to avoid load hit store on P10 when running binaries compiled for older
// processors by generating two mergeable scalar stores to forward with the
// vector load.
if (!DisableP10StoreForward && Subtarget.isPPC64() &&
!Subtarget.isLittleEndian() && ValVT.isInteger() &&
ValVT.getSizeInBits() <= 64) {
Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
SDValue ShiftBy = DAG.getConstant(
64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
SDValue Plus8 =
DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
SDValue Store2 =
DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
MachinePointerInfo());
}

// Store the input value into Value#0 of the stack slot.
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
MachinePointerInfo());
SDValue Store =
DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
// Load it out.
return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
}
Expand Down
8 changes: 6 additions & 2 deletions llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ define void @test_aix_splatimm(i32 %arg, i32 %arg1, i32 %arg2) {
; CHECK-AIX-NEXT: slwi 3, 3, 8
; CHECK-AIX-NEXT: neg 3, 3
; CHECK-AIX-NEXT: lwz 6, 0(3)
; CHECK-AIX-NEXT: sth 3, -16(1)
; CHECK-AIX-NEXT: sldi 3, 3, 48
; CHECK-AIX-NEXT: std 3, -16(1)
; CHECK-AIX-NEXT: std 3, -8(1)
; CHECK-AIX-NEXT: addi 3, 1, -16
; CHECK-AIX-NEXT: lxvw4x 34, 0, 3
; CHECK-AIX-NEXT: srwi 3, 4, 16
Expand All @@ -24,9 +26,11 @@ define void @test_aix_splatimm(i32 %arg, i32 %arg1, i32 %arg2) {
; CHECK-AIX-NEXT: mullw 3, 3, 4
; CHECK-AIX-NEXT: li 4, 0
; CHECK-AIX-NEXT: neg 3, 3
; CHECK-AIX-NEXT: sldi 3, 3, 48
; CHECK-AIX-NEXT: vsplth 2, 2, 0
; CHECK-AIX-NEXT: stxvw4x 34, 0, 4
; CHECK-AIX-NEXT: sth 3, -32(1)
; CHECK-AIX-NEXT: std 3, -32(1)
; CHECK-AIX-NEXT: std 3, -24(1)
; CHECK-AIX-NEXT: addi 3, 1, -32
; CHECK-AIX-NEXT: lxvw4x 34, 0, 3
; CHECK-AIX-NEXT: vsplth 2, 2, 0
Expand Down
52 changes: 24 additions & 28 deletions llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
Original file line number Diff line number Diff line change
Expand Up @@ -338,17 +338,16 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
; PWR7-BE-LABEL: build_v4i32_load_0:
; PWR7-BE: # %bb.0: # %entry
; PWR7-BE-NEXT: lwz 3, 0(3)
; PWR7-BE-NEXT: li 4, 0
; PWR7-BE-NEXT: stw 4, -16(1)
; PWR7-BE-NEXT: stw 3, -32(1)
; PWR7-BE-NEXT: xxlxor 36, 36, 36
; PWR7-BE-NEXT: sldi 3, 3, 32
; PWR7-BE-NEXT: std 3, -32(1)
; PWR7-BE-NEXT: std 3, -24(1)
; PWR7-BE-NEXT: addis 3, 2, .LCPI8_0@toc@ha
; PWR7-BE-NEXT: addi 3, 3, .LCPI8_0@toc@l
; PWR7-BE-NEXT: lxvw4x 34, 0, 3
; PWR7-BE-NEXT: addi 3, 1, -16
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
; PWR7-BE-NEXT: addi 3, 1, -32
; PWR7-BE-NEXT: lxvw4x 36, 0, 3
; PWR7-BE-NEXT: vperm 2, 4, 3, 2
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
; PWR7-BE-NEXT: vperm 2, 3, 4, 2
; PWR7-BE-NEXT: blr
;
; PWR8-BE-LABEL: build_v4i32_load_0:
Expand Down Expand Up @@ -402,17 +401,16 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
; PWR7-BE-LABEL: build_v4i32_load_1:
; PWR7-BE: # %bb.0: # %entry
; PWR7-BE-NEXT: lwz 3, 0(3)
; PWR7-BE-NEXT: li 4, 0
; PWR7-BE-NEXT: stw 4, -32(1)
; PWR7-BE-NEXT: stw 3, -16(1)
; PWR7-BE-NEXT: xxlxor 36, 36, 36
; PWR7-BE-NEXT: sldi 3, 3, 32
; PWR7-BE-NEXT: std 3, -16(1)
; PWR7-BE-NEXT: std 3, -8(1)
; PWR7-BE-NEXT: addis 3, 2, .LCPI9_0@toc@ha
; PWR7-BE-NEXT: addi 3, 3, .LCPI9_0@toc@l
; PWR7-BE-NEXT: lxvw4x 34, 0, 3
; PWR7-BE-NEXT: addi 3, 1, -32
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
; PWR7-BE-NEXT: addi 3, 1, -16
; PWR7-BE-NEXT: lxvw4x 36, 0, 3
; PWR7-BE-NEXT: vperm 2, 3, 4, 2
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
; PWR7-BE-NEXT: vperm 2, 4, 3, 2
; PWR7-BE-NEXT: blr
;
; PWR8-BE-LABEL: build_v4i32_load_1:
Expand Down Expand Up @@ -466,17 +464,16 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
; PWR7-BE-LABEL: build_v4i32_load_2:
; PWR7-BE: # %bb.0: # %entry
; PWR7-BE-NEXT: lwz 3, 0(3)
; PWR7-BE-NEXT: li 4, 0
; PWR7-BE-NEXT: stw 4, -32(1)
; PWR7-BE-NEXT: stw 3, -16(1)
; PWR7-BE-NEXT: xxlxor 36, 36, 36
; PWR7-BE-NEXT: sldi 3, 3, 32
; PWR7-BE-NEXT: std 3, -16(1)
; PWR7-BE-NEXT: std 3, -8(1)
; PWR7-BE-NEXT: addis 3, 2, .LCPI10_0@toc@ha
; PWR7-BE-NEXT: addi 3, 3, .LCPI10_0@toc@l
; PWR7-BE-NEXT: lxvw4x 34, 0, 3
; PWR7-BE-NEXT: addi 3, 1, -32
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
; PWR7-BE-NEXT: addi 3, 1, -16
; PWR7-BE-NEXT: lxvw4x 36, 0, 3
; PWR7-BE-NEXT: vperm 2, 3, 4, 2
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
; PWR7-BE-NEXT: vperm 2, 4, 3, 2
; PWR7-BE-NEXT: blr
;
; PWR8-BE-LABEL: build_v4i32_load_2:
Expand Down Expand Up @@ -530,17 +527,16 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
; PWR7-BE-LABEL: build_v4i32_load_3:
; PWR7-BE: # %bb.0: # %entry
; PWR7-BE-NEXT: lwz 3, 0(3)
; PWR7-BE-NEXT: li 4, 0
; PWR7-BE-NEXT: stw 4, -32(1)
; PWR7-BE-NEXT: stw 3, -16(1)
; PWR7-BE-NEXT: xxlxor 36, 36, 36
; PWR7-BE-NEXT: sldi 3, 3, 32
; PWR7-BE-NEXT: std 3, -16(1)
; PWR7-BE-NEXT: std 3, -8(1)
; PWR7-BE-NEXT: addis 3, 2, .LCPI11_0@toc@ha
; PWR7-BE-NEXT: addi 3, 3, .LCPI11_0@toc@l
; PWR7-BE-NEXT: lxvw4x 34, 0, 3
; PWR7-BE-NEXT: addi 3, 1, -32
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
; PWR7-BE-NEXT: addi 3, 1, -16
; PWR7-BE-NEXT: lxvw4x 36, 0, 3
; PWR7-BE-NEXT: vperm 2, 3, 4, 2
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
; PWR7-BE-NEXT: vperm 2, 4, 3, 2
; PWR7-BE-NEXT: blr
;
; PWR8-BE-LABEL: build_v4i32_load_3:
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/PowerPC/load-and-splat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,7 @@ define <16 x i8> @adjusted_lxvwsx(ptr %s, ptr %t) {
; P7: # %bb.0: # %entry
; P7-NEXT: ld r3, 0(r3)
; P7-NEXT: std r3, -16(r1)
; P7-NEXT: std r3, -8(r1)
; P7-NEXT: addi r3, r1, -16
; P7-NEXT: lxvw4x vs0, 0, r3
; P7-NEXT: xxspltw v2, vs0, 1
Expand Down
Loading