diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 162e96655df24..5f89f38266833 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -416,15 +416,20 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { continue; Register SavedExec = I->getOperand(0).getReg(); - if (SavedExec.isVirtual() && MRI->hasOneNonDBGUse(SavedExec) && - MRI->use_instr_nodbg_begin(SavedExec)->getParent() == - I->getParent()) { - LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *I << '\n'); - LIS->RemoveMachineInstrFromMaps(*I); - I->eraseFromParent(); - MRI->replaceRegWith(SavedExec, ExecReg); - LIS->removeInterval(SavedExec); - Changed = true; + if (SavedExec.isVirtual() && MRI->hasOneNonDBGUse(SavedExec)) { + MachineInstr *SingleExecUser = &*MRI->use_instr_nodbg_begin(SavedExec); + int Idx = SingleExecUser->findRegisterUseOperandIdx(SavedExec); + assert(Idx != -1); + if (SingleExecUser->getParent() == I->getParent() && + !SingleExecUser->getOperand(Idx).isImplicit() && + TII->isOperandLegal(*SingleExecUser, Idx, &I->getOperand(1))) { + LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *I << '\n'); + LIS->RemoveMachineInstrFromMaps(*I); + I->eraseFromParent(); + MRI->replaceRegWith(SavedExec, ExecReg); + LIS->removeInterval(SavedExec); + Changed = true; + } } break; } diff --git a/llvm/test/CodeGen/AMDGPU/opt_exec_copy_fold.mir b/llvm/test/CodeGen/AMDGPU/opt_exec_copy_fold.mir new file mode 100644 index 0000000000000..4ebfa2cc9643a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/opt_exec_copy_fold.mir @@ -0,0 +1,23 @@ +# RUN: llc -run-pass si-optimize-exec-masking-pre-ra -march=amdgcn -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s +--- +# GCN-LABEL: name: opt_exec_copy_fold +# GCN: %2:vreg_64 = COPY $exec +name: opt_exec_copy_fold +tracksRegLiveness: true +liveins: + - { reg: '$sgpr0_sgpr1' } +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + %0:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr0_sgpr1, implicit $exec + %1:sreg_64 = V_CMP_NE_U32_e64 0, %0, implicit $exec + %2:vreg_64 = COPY $exec + %3:sreg_64 = V_CMP_EQ_U64_e64 %1, %2, implicit $exec + $scc = COPY %3 + S_CBRANCH_SCC0 %bb.1, implicit $scc + + bb.1: +... + +