diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 88c75a0f86a6c..856121be78031 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -159,7 +159,7 @@ bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const { } bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const { - return isInt<16>(Src.getImm()) && + return isInt<16>(SignExtend64(Src.getImm(), 32)) && !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo()); } @@ -170,7 +170,7 @@ bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const { bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const { - if (isInt<16>(Src.getImm())) { + if (isInt<16>(SignExtend64(Src.getImm(), 32))) { IsUnsigned = false; return !TII->isInlineConstant(Src); } @@ -221,7 +221,7 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const { if (!Src0.isReg()) return; - const MachineOperand &Src1 = MI.getOperand(1); + MachineOperand &Src1 = MI.getOperand(1); if (!Src1.isImm()) return; @@ -237,6 +237,7 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const { if (!HasUImm) { SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ? AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32; + Src1.setImm(SignExtend32(Src1.getImm(), 32)); } MI.setDesc(TII->get(SOPKOpc)); @@ -249,6 +250,8 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const { if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) || (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) { + if (!TII->sopkIsZext(SOPKOpc)) + Src1.setImm(SignExtend64(Src1.getImm(), 32)); MI.setDesc(NewDesc); } } @@ -838,6 +841,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ? AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32; + Src1->setImm(SignExtend64(Src1->getImm(), 32)); MI.setDesc(TII->get(Opc)); MI.tieOperands(0, 1); } @@ -857,9 +861,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (Src.isImm() && Dst.getReg().isPhysical()) { int32_t ReverseImm; - if (isKImmOperand(Src)) + if (isKImmOperand(Src)) { MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); - else if (isReverseInlineImm(Src, ReverseImm)) { + Src.setImm(SignExtend64(Src.getImm(), 32)); + } else if (isReverseInlineImm(Src, ReverseImm)) { MI.setDesc(TII->get(AMDGPU::S_BREV_B32)); Src.setImm(ReverseImm); } diff --git a/llvm/test/CodeGen/AMDGPU/shrink-i32-kimm.mir b/llvm/test/CodeGen/AMDGPU/shrink-i32-kimm.mir new file mode 100644 index 0000000000000..e2198faf13f71 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/shrink-i32-kimm.mir @@ -0,0 +1,57 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-shrink-instructions -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: shrink_kimm32_mov_b32 +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: shrink_kimm32_mov_b32 + ; GCN: $sgpr0 = S_MOVK_I32 -2048 + $sgpr0 = S_MOV_B32 4294965248 +... + +--- +name: shrink_kimm32_cmp_eq_u32 +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: shrink_kimm32_cmp_eq_u32 + ; GCN: S_CMPK_EQ_I32 undef $sgpr0, -2048, implicit-def $scc + S_CMP_EQ_U32 undef $sgpr0, 4294965248, implicit-def $scc +... + +--- +name: shrink_kimm32_cmp_gt_i32 +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: shrink_kimm32_cmp_gt_i32 + ; GCN: S_CMPK_GT_I32 undef $sgpr0, -2048, implicit-def $scc + S_CMP_GT_I32 undef $sgpr0, 4294965248, implicit-def $scc +... + +--- +name: shrink_kimm32_add_i32 +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: shrink_kimm32_add_i32 + ; GCN: $sgpr0 = S_ADDK_I32 undef $sgpr0, -2048, implicit-def $scc + $sgpr0 = S_ADD_I32 undef $sgpr0, 4294965248, implicit-def $scc +... + +--- +name: shrink_kimm32_mul_i32 +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: shrink_kimm32_mul_i32 + ; GCN: $sgpr0 = S_MULK_I32 undef $sgpr0, -2048, implicit-def $scc + $sgpr0 = S_MUL_I32 undef $sgpr0, 4294965248, implicit-def $scc +...