Skip to content

Commit 73f2bf8

Browse files
AMDGPU/GlobalISel: RegBankLegalize rules for G_FABS and G_FNEG
1 parent 9e70882 commit 73f2bf8

File tree

5 files changed

+683
-6
lines changed

5 files changed

+683
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,13 @@ std::pair<Register, Register> RegBankLegalizeHelper::unpackAExt(Register Reg) {
437437
return {Lo.getReg(0), Hi.getReg(0)};
438438
}
439439

440+
std::pair<Register, Register>
441+
RegBankLegalizeHelper::unpackAExtTruncS16(Register Reg) {
442+
auto [Lo32, Hi32] = unpackAExt(Reg);
443+
return {B.buildTrunc(SgprRB_S16, Lo32).getReg(0),
444+
B.buildTrunc(SgprRB_S16, Hi32).getReg(0)};
445+
}
446+
440447
void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
441448
Register Lo, Hi;
442449
switch (MI.getOpcode()) {
@@ -629,14 +636,21 @@ void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
629636
void RegBankLegalizeHelper::lowerSplitTo16(MachineInstr &MI) {
630637
Register Dst = MI.getOperand(0).getReg();
631638
assert(MRI.getType(Dst) == V2S16);
632-
auto [Op1Lo32, Op1Hi32] = unpackAExt(MI.getOperand(1).getReg());
633-
auto [Op2Lo32, Op2Hi32] = unpackAExt(MI.getOperand(2).getReg());
634639
unsigned Opc = MI.getOpcode();
635640
auto Flags = MI.getFlags();
636-
auto Op1Lo = B.buildTrunc(SgprRB_S16, Op1Lo32);
637-
auto Op1Hi = B.buildTrunc(SgprRB_S16, Op1Hi32);
638-
auto Op2Lo = B.buildTrunc(SgprRB_S16, Op2Lo32);
639-
auto Op2Hi = B.buildTrunc(SgprRB_S16, Op2Hi32);
641+
642+
if (MI.getNumOperands() == 2) {
643+
auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg());
644+
auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo}, Flags);
645+
auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi}, Flags);
646+
B.buildMergeLikeInstr(Dst, {Lo, Hi});
647+
MI.eraseFromParent();
648+
return;
649+
}
650+
651+
assert(MI.getNumOperands() == 3);
652+
auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg());
653+
auto [Op2Lo, Op2Hi] = unpackAExtTruncS16(MI.getOperand(2).getReg());
640654
auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo, Op2Lo}, Flags);
641655
auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi, Op2Hi}, Flags);
642656
B.buildMergeLikeInstr(Dst, {Lo, Hi});

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ class RegBankLegalizeHelper {
118118
std::pair<Register, Register> unpackZExt(Register Reg);
119119
std::pair<Register, Register> unpackSExt(Register Reg);
120120
std::pair<Register, Register> unpackAExt(Register Reg);
121+
std::pair<Register, Register> unpackAExtTruncS16(Register Reg);
121122
void lowerUnpackBitShift(MachineInstr &MI);
122123
void lowerV_BFE(MachineInstr &MI);
123124
void lowerS_BFE(MachineInstr &MI);

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,25 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
951951
.Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32}}})
952952
.Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32, VgprV2S32}}});
953953

954+
// FNEG and FABS are either folded as source modifiers or can be selected as
955+
// bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
956+
// targets without SALU float we still select them as VGPR since there would
957+
// be no real sgpr use.
958+
addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
959+
.Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat)
960+
.Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat)
961+
.Div(S16, {{Vgpr16}, {Vgpr16}})
962+
.Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat)
963+
.Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat)
964+
.Div(S32, {{Vgpr32}, {Vgpr32}})
965+
.Uni(S64, {{UniInVgprS64}, {Vgpr64}})
966+
.Div(S64, {{Vgpr64}, {Vgpr64}})
967+
.Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat)
968+
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat)
969+
.Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
970+
.Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
971+
.Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
972+
954973
addRulesForGOpcs({G_FPTOUI})
955974
.Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
956975
.Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);

0 commit comments

Comments
 (0)