diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d4f747dc9e8fe..64ba869ed2b91 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -8009,7 +8009,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, this](int64_t ExpectedValue, - unsigned SrcSize) -> bool { + unsigned SrcSize, + bool IsReversable) -> bool { // s_cmp_eq_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1 // s_cmp_eq_i32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1 // s_cmp_ge_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1 @@ -8023,9 +8024,22 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, // // If result of the AND is unused except in the compare: // s_and_b(32|64) $src, 1 => s_bitcmp1_b(32|64) $src, 0 - - if (CmpValue != ExpectedValue) - return false; + // + // s_cmp_eq_u32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0 + // s_cmp_eq_i32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0 + // s_cmp_eq_u64 (s_and_b64 $src, 1), 0 => s_bitcmp0_b64 $src, 0 + // s_cmp_lg_u32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0 + // s_cmp_lg_i32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0 + // s_cmp_lg_u64 (s_and_b64 $src, 1), 1 => s_bitcmp0_b64 $src, 0 + + bool IsReversedCC = false; + if (CmpValue != ExpectedValue) { + if (!IsReversable) + return false; + IsReversedCC = CmpValue == (ExpectedValue ^ 1); + if (!IsReversedCC) + return false; + } MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg); if (!Def || Def->getParent() != CmpInstr.getParent()) @@ -8041,6 +8055,10 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, else if (!Def->getOperand(2).isImm() || Def->getOperand(2).getImm() != 1) return false; + Register DefReg = Def->getOperand(0).getReg(); + if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg)) + return false; + for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator(); I != E; ++I) { if (I->modifiesRegister(AMDGPU::SCC, &RI) || @@ -8052,17 +8070,20 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, SccDef->setIsDead(false); CmpInstr.eraseFromParent(); - if (!MRI->use_nodbg_empty(Def->getOperand(0).getReg())) + if (!MRI->use_nodbg_empty(DefReg)) { + assert(!IsReversedCC); return true; + } // Replace AND with unused result with a S_BITCMP. // TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can // process any power of 2. MachineBasicBlock *MBB = Def->getParent(); - // TODO: Reverse conditions can use S_BITCMP0_*. - unsigned NewOpc = (SrcSize == 32) ? AMDGPU::S_BITCMP1_B32 - : AMDGPU::S_BITCMP1_B64; + unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32 + : AMDGPU::S_BITCMP1_B32 + : IsReversedCC ? AMDGPU::S_BITCMP0_B64 + : AMDGPU::S_BITCMP1_B64; BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc)) .add(*SrcOp) @@ -8077,26 +8098,28 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, break; case AMDGPU::S_CMP_EQ_U32: case AMDGPU::S_CMP_EQ_I32: - case AMDGPU::S_CMP_GE_U32: - case AMDGPU::S_CMP_GE_I32: case AMDGPU::S_CMPK_EQ_U32: case AMDGPU::S_CMPK_EQ_I32: + return optimizeCmpAnd(1, 32, true); + case AMDGPU::S_CMP_GE_U32: + case AMDGPU::S_CMP_GE_I32: case AMDGPU::S_CMPK_GE_U32: case AMDGPU::S_CMPK_GE_I32: - return optimizeCmpAnd(1, 32); + return optimizeCmpAnd(1, 32, false); case AMDGPU::S_CMP_EQ_U64: - return optimizeCmpAnd(1, 64); + return optimizeCmpAnd(1, 64, true); case AMDGPU::S_CMP_LG_U32: case AMDGPU::S_CMP_LG_I32: - case AMDGPU::S_CMP_GT_U32: - case AMDGPU::S_CMP_GT_I32: case AMDGPU::S_CMPK_LG_U32: case AMDGPU::S_CMPK_LG_I32: + return optimizeCmpAnd(0, 32, true); + case AMDGPU::S_CMP_GT_U32: + case AMDGPU::S_CMP_GT_I32: case AMDGPU::S_CMPK_GT_U32: case AMDGPU::S_CMPK_GT_I32: - return optimizeCmpAnd(0, 32); + return optimizeCmpAnd(0, 32, false); case AMDGPU::S_CMP_LG_U64: - return optimizeCmpAnd(0, 64); + return optimizeCmpAnd(0, 64, true); } return false; diff --git a/llvm/test/CodeGen/AMDGPU/optimize-compare.mir b/llvm/test/CodeGen/AMDGPU/optimize-compare.mir index d38bb221553eb..b38a2252e3793 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-compare.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-compare.mir @@ -408,14 +408,14 @@ body: | ... --- -name: and_1_cmp_eq_0 +name: and_1_cmp_eq_2 body: | - ; GCN-LABEL: name: and_1_cmp_eq_0 + ; GCN-LABEL: name: and_1_cmp_eq_2 ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc - ; GCN: S_CMP_EQ_U32 killed [[S_AND_B32_]], 0, implicit-def $scc + ; GCN: S_CMP_EQ_U32 killed [[S_AND_B32_]], 2, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -428,7 +428,7 @@ body: | %0:sreg_32 = COPY $sgpr0 %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc - S_CMP_EQ_U32 killed %1:sreg_32, 0, implicit-def $scc + S_CMP_EQ_U32 killed %1:sreg_32, 2, implicit-def $scc S_CBRANCH_SCC0 %bb.2, implicit $scc S_BRANCH %bb.1 @@ -1047,3 +1047,296 @@ body: | S_ENDPGM 0 ... + +--- +name: and_1_cmp_eq_u32_0 +body: | + ; GCN-LABEL: name: and_1_cmp_eq_u32_0 + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $sgpr0, $vgpr0_vgpr1 + + %0:sreg_32 = COPY $sgpr0 + %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc + S_CMP_EQ_U32 killed %1:sreg_32, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + bb.2: + S_ENDPGM 0 + +... + +--- +name: and_1_cmp_eq_i32_0 +body: | + ; GCN-LABEL: name: and_1_cmp_eq_i32_0 + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $sgpr0, $vgpr0_vgpr1 + + %0:sreg_32 = COPY $sgpr0 + %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc + S_CMP_EQ_I32 killed %1:sreg_32, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + bb.2: + S_ENDPGM 0 + +... + +--- +name: and_1_cmp_eq_u64_0 +body: | + ; GCN-LABEL: name: and_1_cmp_eq_u64_0 + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN: S_BITCMP0_B64 killed [[COPY]], 0, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + + %0:sreg_64 = COPY $sgpr0_sgpr1 + %1:sreg_64 = S_AND_B64 1, killed %0, implicit-def dead $scc + S_CMP_EQ_U64 killed %1:sreg_64, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + bb.2: + S_ENDPGM 0 + +... + +--- +name: and_1_cmp_lg_u32_1 +body: | + ; GCN-LABEL: name: and_1_cmp_lg_u32_1 + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $sgpr0, $vgpr0_vgpr1 + + %0:sreg_32 = COPY $sgpr0 + %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc + S_CMP_LG_U32 killed %1:sreg_32, 1, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + bb.2: + S_ENDPGM 0 + +... + +--- +name: and_1_cmp_lg_i32_1 +body: | + ; GCN-LABEL: name: and_1_cmp_lg_i32_1 + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $sgpr0, $vgpr0_vgpr1 + + %0:sreg_32 = COPY $sgpr0 + %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc + S_CMP_LG_I32 killed %1:sreg_32, 1, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + bb.2: + S_ENDPGM 0 + +... + +--- +name: and_1_cmp_lg_u64_1 +body: | + ; GCN-LABEL: name: and_1_cmp_lg_u64_1 + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN: S_BITCMP0_B64 killed [[COPY]], 0, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + + %0:sreg_64 = COPY $sgpr0_sgpr1 + %1:sreg_64 = S_AND_B64 1, killed %0, implicit-def dead $scc + S_CMP_LG_U64 killed %1:sreg_64, 1, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + bb.2: + S_ENDPGM 0 + +... + +--- +name: and_1_cmp_eq_u32_0_used_and +body: | + ; GCN-LABEL: name: and_1_cmp_eq_u32_0_used_and + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc + ; GCN: S_CMP_EQ_U32 killed [[S_AND_B32_]], 0, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: S_NOP 0, implicit [[S_AND_B32_]] + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $sgpr0, $vgpr0_vgpr1 + + %0:sreg_32 = COPY $sgpr0 + %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc + S_CMP_EQ_U32 killed %1:sreg_32, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + S_NOP 0, implicit %1 + bb.2: + S_ENDPGM 0 + +... + +--- +name: and_1_cmp_ge_u32_0 +body: | + ; GCN-LABEL: name: and_1_cmp_ge_u32_0 + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc + ; GCN: S_CMP_GE_U32 killed [[S_AND_B32_]], 0, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $sgpr0, $vgpr0_vgpr1 + + %0:sreg_32 = COPY $sgpr0 + %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc + S_CMP_GE_U32 killed %1:sreg_32, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + bb.2: + S_ENDPGM 0 + +... + +--- +name: and_1_cmp_gt_u32_1 +body: | + ; GCN-LABEL: name: and_1_cmp_gt_u32_1 + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc + ; GCN: S_CMP_GT_U32 killed [[S_AND_B32_]], 1, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $sgpr0, $vgpr0_vgpr1 + + %0:sreg_32 = COPY $sgpr0 + %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc + S_CMP_GT_U32 killed %1:sreg_32, 1, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + bb.2: + S_ENDPGM 0 + +...