diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 4bdaf097aedb2b..99bc1bbb07f07b 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1152,22 +1152,6 @@ let SubtargetPredicate = isGFX10Plus in { // MUBUF Patterns //===----------------------------------------------------------------------===// -def extract_glc : SDNodeXFormgetTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8); -}]>; - -def extract_slc : SDNodeXFormgetTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8); -}]>; - -def extract_dlc : SDNodeXFormgetTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8); -}]>; - -def extract_swz : SDNodeXFormgetTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8); -}]>; - //===----------------------------------------------------------------------===// // buffer_load/store_format patterns //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 48f2bad6891175..134de6124b3e94 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5622,7 +5622,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, } SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, - SDValue Offset, SDValue GLC, SDValue DLC, + SDValue Offset, SDValue CachePolicy, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -5640,8 +5640,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Ops[] = { Rsrc, Offset, // Offset - GLC, - DLC, + CachePolicy }; // Widen vec3 load to vec4. @@ -5675,7 +5674,6 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, } SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue}); - unsigned CachePolicy = cast(GLC)->getZExtValue(); SDValue Ops[] = { DAG.getEntryNode(), // Chain Rsrc, // rsrc @@ -5683,7 +5681,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, {}, // voffset {}, // soffset {}, // offset - DAG.getTargetConstant(CachePolicy, DL, MVT::i32), // cachepolicy + CachePolicy, // cachepolicy DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; @@ -5867,7 +5865,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, if (!parseCachePolicy(Op.getOperand(3), DAG, &GLC, nullptr, IsGFX10 ? &DLC : nullptr)) return Op; - return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), GLC, DLC, + return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), DAG); } case Intrinsic::amdgcn_fdiv_fast: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index dbdac2722c871f..2ba9c43e9353d4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -60,7 +60,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr, SelectionDAG &DAG) const; SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset, - SDValue GLC, SDValue DLC, SelectionDAG &DAG) const; + SDValue CachePolicy, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index f2226cdfba49b4..bf6f02965475d0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -39,8 +39,7 @@ def SIEncodingFamily { def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", - SDTypeProfile<1, 4, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>, - SDTCisVT<4, i1>]>, + SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, [SDNPMayLoad, SDNPMemOperand] >; @@ -794,6 +793,26 @@ def NegSubInlineConstV216 : PatLeaf<(build_vector), [{ (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode())); }], getNegV2I16Imm>; +//===----------------------------------------------------------------------===// +// MUBUF/SMEM Patterns +//===----------------------------------------------------------------------===// + +def extract_glc : SDNodeXFormgetTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8); +}]>; + +def extract_slc : SDNodeXFormgetTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8); +}]>; + +def extract_dlc : SDNodeXFormgetTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8); +}]>; + +def extract_swz : SDNodeXFormgetTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8); +}]>; + //===----------------------------------------------------------------------===// // Custom Operands //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index e054434ba0e1b3..2119691544031d 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -768,23 +768,23 @@ multiclass SMRD_Pattern { multiclass SMLoad_Pattern { // 1. Offset as an immediate def : GCNPat < - (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc, i1:$dlc), - (vt (!cast(Instr#"_IMM") $sbase, $offset, (as_i1timm $glc), - (as_i1timm $dlc))) + (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy), + (vt (!cast(Instr#"_IMM") $sbase, $offset, (extract_glc $cachepolicy), + (extract_dlc $cachepolicy))) >; // 2. 32-bit IMM offset on CI def : GCNPat < - (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc, i1:$dlc)), - (!cast(Instr#"_IMM_ci") $sbase, $offset, (as_i1timm $glc), (as_i1timm $dlc))> { + (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)), + (!cast(Instr#"_IMM_ci") $sbase, $offset, (extract_glc $cachepolicy), (extract_dlc $cachepolicy))> { let OtherPredicates = [isGFX7Only]; } // 3. Offset loaded in an 32bit SGPR def : GCNPat < - (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc, i1:$dlc), - (vt (!cast(Instr#"_SGPR") $sbase, $offset, (as_i1timm $glc), - (as_i1timm $dlc))) + (SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy), + (vt (!cast(Instr#"_SGPR") $sbase, $offset, (extract_glc $cachepolicy), + (extract_dlc $cachepolicy))) >; }