Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 75 additions & 36 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3573,54 +3573,93 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
assert(!UseMI.getOperand(0).getSubReg() && "Expected SSA form");

Register DstReg = UseMI.getOperand(0).getReg();
unsigned OpSize = getOpSize(UseMI, 0);
bool Is16Bit = OpSize == 2;
bool Is64Bit = OpSize == 8;
bool isVGPRCopy = RI.isVGPR(*MRI, DstReg);
unsigned NewOpc = isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
: AMDGPU::V_MOV_B32_e32
: Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
: AMDGPU::S_MOV_B32;

std::optional<int64_t> SubRegImm =
extractSubregFromImm(Imm, UseMI.getOperand(1).getSubReg());

APInt Imm(Is64Bit ? 64 : 32, *SubRegImm,
/*isSigned=*/true, /*implicitTrunc=*/true);

if (RI.isAGPR(*MRI, DstReg)) {
if (Is64Bit || !isInlineConstant(Imm))
return false;
NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
}
Register UseSubReg = UseMI.getOperand(1).getSubReg();

const TargetRegisterClass *DstRC = RI.getRegClassForReg(*MRI, DstReg);

bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
RI.getSubRegIdxSize(UseSubReg) == 16;

if (Is16Bit) {
if (isVGPRCopy)
if (RI.hasVGPRs(DstRC))
return false; // Do not clobber vgpr_hi16

if (DstReg.isVirtual() && UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
if (DstReg.isVirtual() && UseSubReg != AMDGPU::lo16)
return false;

UseMI.getOperand(0).setSubReg(0);
if (DstReg.isPhysical()) {
DstReg = RI.get32BitRegister(DstReg);
UseMI.getOperand(0).setReg(DstReg);
}
assert(UseMI.getOperand(1).getReg().isVirtual());
}

MachineFunction *MF = UseMI.getMF();
const MCInstrDesc &NewMCID = get(NewOpc);
const TargetRegisterClass *NewDefRC = getRegClass(NewMCID, 0, &RI, *MF);

if (DstReg.isPhysical()) {
if (!NewDefRC->contains(DstReg))
return false;
} else if (!MRI->constrainRegClass(DstReg, NewDefRC))
unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
MCRegister MovDstPhysReg =
DstReg.isPhysical() ? DstReg.asMCReg() : MCRegister();

std::optional<int64_t> SubRegImm = extractSubregFromImm(Imm, UseSubReg);

// TODO: Try to fold with AMDGPU::V_MOV_B16_t16_e64
for (unsigned MovOp :
{AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
const MCInstrDesc &MovDesc = get(MovOp);

const TargetRegisterClass *MovDstRC = getRegClass(MovDesc, 0, &RI, *MF);
if (Is16Bit) {
// We just need to find a correctly sized register class, so the
// subregister index compatibility doesn't matter since we're statically
// extracting the immediate value.
MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
if (!MovDstRC)
continue;

if (MovDstPhysReg) {
// FIXME: We probably should not do this. If there is a live value in
// the high half of the register, it will be corrupted.
MovDstPhysReg =
RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
if (!MovDstPhysReg)
continue;
}
}

// Result class isn't the right size, try the next instruction.
if (MovDstPhysReg) {
if (!MovDstRC->contains(MovDstPhysReg))
return false;
} else if (!MRI->constrainRegClass(DstReg, MovDstRC)) {
// TODO: This will be overly conservative in the case of 16-bit virtual
// SGPRs. We could hack up the virtual register uses to use a compatible
// 32-bit class.
continue;
}

const MCOperandInfo &OpInfo = MovDesc.operands()[1];

// Ensure the interpreted immediate value is a valid operand in the new
// mov.
//
// FIXME: isImmOperandLegal should have form that doesn't require existing
// MachineInstr or MachineOperand
if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
!isInlineConstant(*SubRegImm, OpInfo.OperandType))
break;

NewOpc = MovOp;
break;
}

if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
return false;

if (Is16Bit) {
UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
if (MovDstPhysReg)
UseMI.getOperand(0).setReg(MovDstPhysReg);
assert(UseMI.getOperand(1).getReg().isVirtual());
}

const MCInstrDesc &NewMCID = get(NewOpc);
UseMI.setDesc(NewMCID);
UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
UseMI.addImplicitDefUseOperands(*MF);
return true;
}
Expand Down
69 changes: 63 additions & 6 deletions llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,7 @@ body: |

; GCN-LABEL: name: fold_sreg_64_to_sreg_64
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1311768467750121200
; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
; GCN-NEXT: SI_RETURN_TO_EPILOG [[S_MOV_B]]
; GCN-NEXT: SI_RETURN_TO_EPILOG [[S_MOV_B64_]]
%0:sreg_64 = S_MOV_B64 1311768467750121200
%1:sreg_64 = COPY killed %0
SI_RETURN_TO_EPILOG %1
Expand Down Expand Up @@ -761,8 +760,8 @@ body: |
bb.0:
; GCN-LABEL: name: fold_av_mov_b32_imm_pseudo_inlineimm_to_av
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY killed [[AV_MOV_]]
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[COPY]]
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 64, implicit $exec
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_MOV_B32_e32_]]
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
%1:av_32 = COPY killed %0
SI_RETURN_TO_EPILOG implicit %1
Expand Down Expand Up @@ -800,9 +799,67 @@ body: |
bb.0:
; GCN-LABEL: name: fold_av_mov_b64_imm_pseudo_inlineimm_to_av
; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO 64, implicit $exec
; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY killed [[AV_MOV_]]
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[COPY]]
; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 64, implicit $exec
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_MOV_B]]
%0:av_64_align2 = AV_MOV_B64_IMM_PSEUDO 64, implicit $exec
%1:av_64_align2 = COPY killed %0
SI_RETURN_TO_EPILOG implicit %1
...

---
name: fold_simm_16_sub_to_lo_from_mov_64_virt_sgpr16
body: |
bb.0:

; GCN-LABEL: name: fold_simm_16_sub_to_lo_from_mov_64_virt_sgpr16
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 64
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_lo16 = COPY killed [[S_MOV_B64_]].lo16
; GCN-NEXT: SI_RETURN_TO_EPILOG [[COPY]]
%0:sreg_64 = S_MOV_B64 64
%1:sgpr_lo16 = COPY killed %0.lo16
SI_RETURN_TO_EPILOG %1

...
---
name: fold_simm_16_sub_to_hi_from_mov_64_inline_imm_virt_sgpr16
body: |
bb.0:

; GCN-LABEL: name: fold_simm_16_sub_to_hi_from_mov_64_inline_imm_virt_sgpr16
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 64
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_lo16 = COPY killed [[S_MOV_B64_]].hi16
; GCN-NEXT: SI_RETURN_TO_EPILOG [[COPY]]
%0:sreg_64 = S_MOV_B64 64
%1:sgpr_lo16 = COPY killed %0.hi16
SI_RETURN_TO_EPILOG %1

...

---
name: fold_simm_16_sub_to_lo_from_mov_64_phys_sgpr16_lo
body: |
bb.0:

; GCN-LABEL: name: fold_simm_16_sub_to_lo_from_mov_64_phys_sgpr16_lo
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 64
; GCN-NEXT: $sgpr0 = S_MOV_B32 64
; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0_lo16
%0:sreg_64 = S_MOV_B64 64
$sgpr0_lo16 = COPY killed %0.lo16
SI_RETURN_TO_EPILOG $sgpr0_lo16

...
---
name: fold_simm_16_sub_to_hi_from_mov_64_inline_imm_phys_sgpr16_lo
body: |
bb.0:

; GCN-LABEL: name: fold_simm_16_sub_to_hi_from_mov_64_inline_imm_phys_sgpr16_lo
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 64
; GCN-NEXT: $sgpr0 = S_MOV_B32 0
; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0_lo16
%0:sreg_64 = S_MOV_B64 64
$sgpr0_lo16 = COPY killed %0.hi16
SI_RETURN_TO_EPILOG $sgpr0_lo16

...