Skip to content

Commit

Permalink
[AMDGPU] Fix v3f16 interaction with image store workaround
Browse files Browse the repository at this point in the history
In some cases, the wrong amount of registers was reserved.

Also enable more v3f16 tests.

Differential Revision: https://reviews.llvm.org/D90847
  • Loading branch information
Flakebi committed Nov 18, 2020
1 parent 3abaf6c commit 72ccec1
Show file tree
Hide file tree
Showing 6 changed files with 418 additions and 76 deletions.
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Expand Up @@ -3560,9 +3560,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
auto Unmerge = B.buildUnmerge(S16, Reg);
for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
PackedRegs.push_back(Unmerge.getReg(I));
PackedRegs.resize(8, B.buildUndef(S16).getReg(0));
Reg = B.buildBuildVector(LLT::vector(8, S16), PackedRegs).getReg(0);
return B.buildBitcast(LLT::vector(4, S32), Reg).getReg(0);
PackedRegs.resize(6, B.buildUndef(S16).getReg(0));
Reg = B.buildBuildVector(LLT::vector(6, S16), PackedRegs).getReg(0);
return B.buildBitcast(LLT::vector(3, S32), Reg).getReg(0);
}

if (StoreVT.getNumElements() == 4) {
Expand Down
34 changes: 22 additions & 12 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Expand Up @@ -7455,17 +7455,6 @@ SDValue SITargetLowering::handleD16VData(SDValue VData, SelectionDAG &DAG,
EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElements);
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, EquivStoreVT, IntVData);
return DAG.UnrollVectorOp(ZExt.getNode());
} else if (NumElements == 3) {
EVT IntStoreVT =
EVT::getIntegerVT(*DAG.getContext(), StoreVT.getStoreSizeInBits());
SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);

EVT WidenedStoreVT = EVT::getVectorVT(
*DAG.getContext(), StoreVT.getVectorElementType(), NumElements + 1);
EVT WidenedIntVT = EVT::getIntegerVT(*DAG.getContext(),
WidenedStoreVT.getStoreSizeInBits());
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenedIntVT, IntVData);
return DAG.getNode(ISD::BITCAST, DL, WidenedStoreVT, ZExt);
}

// The sq block of gfx8.1 does not estimate register use correctly for d16
Expand All @@ -7488,16 +7477,37 @@ SDValue SITargetLowering::handleD16VData(SDValue VData, SelectionDAG &DAG,
SDValue IntPair = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Pair);
PackedElts.push_back(IntPair);
}
if ((NumElements % 2) == 1) {
// Handle v3i16
unsigned I = Elts.size() / 2;
SDValue Pair = DAG.getBuildVector(MVT::v2i16, DL,
{Elts[I * 2], DAG.getUNDEF(MVT::i16)});
SDValue IntPair = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Pair);
PackedElts.push_back(IntPair);
}

// Pad using UNDEF
PackedElts.resize(PackedElts.size() * 2, DAG.getUNDEF(MVT::i32));
PackedElts.resize(Elts.size(), DAG.getUNDEF(MVT::i32));

// Build final vector
EVT VecVT =
EVT::getVectorVT(*DAG.getContext(), MVT::i32, PackedElts.size());
return DAG.getBuildVector(VecVT, DL, PackedElts);
}

if (NumElements == 3) {
EVT IntStoreVT =
EVT::getIntegerVT(*DAG.getContext(), StoreVT.getStoreSizeInBits());
SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);

EVT WidenedStoreVT = EVT::getVectorVT(
*DAG.getContext(), StoreVT.getVectorElementType(), NumElements + 1);
EVT WidenedIntVT = EVT::getIntegerVT(*DAG.getContext(),
WidenedStoreVT.getStoreSizeInBits());
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenedIntVT, IntVData);
return DAG.getNode(ISD::BITCAST, DL, WidenedStoreVT, ZExt);
}

assert(isTypeLegal(StoreVT));
return VData;
}
Expand Down

0 comments on commit 72ccec1

Please sign in to comment.