Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,13 @@ std::pair<Register, Register> RegBankLegalizeHelper::unpackAExt(Register Reg) {
return {Lo.getReg(0), Hi.getReg(0)};
}

std::pair<Register, Register>
RegBankLegalizeHelper::unpackAExtTruncS16(Register Reg) {
auto [Lo32, Hi32] = unpackAExt(Reg);
return {B.buildTrunc(SgprRB_S16, Lo32).getReg(0),
B.buildTrunc(SgprRB_S16, Hi32).getReg(0)};
}

void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
Register Lo, Hi;
switch (MI.getOpcode()) {
Expand Down Expand Up @@ -629,14 +636,21 @@ void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
void RegBankLegalizeHelper::lowerSplitTo16(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
assert(MRI.getType(Dst) == V2S16);
auto [Op1Lo32, Op1Hi32] = unpackAExt(MI.getOperand(1).getReg());
auto [Op2Lo32, Op2Hi32] = unpackAExt(MI.getOperand(2).getReg());
unsigned Opc = MI.getOpcode();
auto Flags = MI.getFlags();
auto Op1Lo = B.buildTrunc(SgprRB_S16, Op1Lo32);
auto Op1Hi = B.buildTrunc(SgprRB_S16, Op1Hi32);
auto Op2Lo = B.buildTrunc(SgprRB_S16, Op2Lo32);
auto Op2Hi = B.buildTrunc(SgprRB_S16, Op2Hi32);

if (MI.getNumOperands() == 2) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this here? These don't require splitting to handle?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is for unary opcodes, old one below was for binary

auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg());
auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo}, Flags);
auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi}, Flags);
B.buildMergeLikeInstr(Dst, {Lo, Hi});
MI.eraseFromParent();
return;
}

assert(MI.getNumOperands() == 3);
auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg());
auto [Op2Lo, Op2Hi] = unpackAExtTruncS16(MI.getOperand(2).getReg());
auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo, Op2Lo}, Flags);
auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi, Op2Hi}, Flags);
B.buildMergeLikeInstr(Dst, {Lo, Hi});
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ class RegBankLegalizeHelper {
std::pair<Register, Register> unpackZExt(Register Reg);
std::pair<Register, Register> unpackSExt(Register Reg);
std::pair<Register, Register> unpackAExt(Register Reg);
std::pair<Register, Register> unpackAExtTruncS16(Register Reg);
void lowerUnpackBitShift(MachineInstr &MI);
void lowerV_BFE(MachineInstr &MI);
void lowerS_BFE(MachineInstr &MI);
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -951,6 +951,25 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32}}})
.Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32, VgprV2S32}}});

// FNEG and FABS are either folded as source modifiers or can be selected as
// bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
// targets without SALU float we still select them as VGPR since there would
// be no real sgpr use.
addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
.Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat)
.Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat)
.Div(S16, {{Vgpr16}, {Vgpr16}})
.Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat)
.Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat)
.Div(S32, {{Vgpr32}, {Vgpr32}})
.Uni(S64, {{UniInVgprS64}, {Vgpr64}})
.Div(S64, {{Vgpr64}, {Vgpr64}})
.Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat)
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat)
.Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
.Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
.Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});

addRulesForGOpcs({G_FPTOUI})
.Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
.Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
Expand Down
Loading
Loading