From eb1092ada32d6855dcb4f763ce48ede21f4d7441 Mon Sep 17 00:00:00 2001 From: alex-t Date: Mon, 18 May 2020 23:40:27 +0300 Subject: [PATCH] [AMDGPU] Fix for the lost CarryOut/CarryIn register operands in S_ADD/SUB_CO_PSEUDO. Summary: This fixes the 5b898bddff51 bug when the carry-in and carry-out registers became lost in lowering S_ADD/SUB_CO_PSEUDO. Reviewers: rampitec, arsenm Reviewed By: arsenm Subscribers: msearles, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80158 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 18 ++++--- .../AMDGPU/s_add_co_pseudo_lowering.mir | 50 +++++++++++++++++++ 2 files changed, 62 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 18d08362512d4..5392abfa8f6e5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5248,18 +5248,24 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst, ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64; const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - Register DummyCReg = MRI.createVirtualRegister(CarryRC); - Register CarryReg = MRI.createVirtualRegister(CarryRC); + + Register CarryInReg = Inst.getOperand(4).getReg(); + if (!MRI.constrainRegClass(CarryInReg, CarryRC)) { + Register NewCarryReg = MRI.createVirtualRegister(CarryRC); + BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), NewCarryReg) + .addReg(CarryInReg); + } + + Register CarryOutReg = Inst.getOperand(1).getReg(); + Register DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass( MRI.getRegClass(Inst.getOperand(0).getReg()))); - BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), CarryReg) - .addReg(Inst.getOperand(4).getReg()); MachineInstr *CarryOp = BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(Opc), DestReg) - .addReg(DummyCReg, RegState::Define | RegState::Dead) + .addReg(CarryOutReg, RegState::Define) .add(Inst.getOperand(2)) .add(Inst.getOperand(3)) - .addReg(CarryReg, RegState::Kill) + .addReg(CarryInReg) .addImm(0); legalizeOperands(*CarryOp); MRI.replaceRegWith(Inst.getOperand(0).getReg(), DestReg); diff --git a/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir b/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir new file mode 100644 index 0000000000000..40bdf8e643175 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir @@ -0,0 +1,50 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck -check-prefix=GCN %s +--- +name: s_add_co_pseudo_test +tracksRegLiveness: true +body: | + + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0, $sgpr1, $sgpr2 + ; GCN-LABEL: name: s_add_co_pseudo_test + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0, $sgpr1, $sgpr2 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_32 = COPY [[COPY3]] + ; GCN: [[V_MUL_LO_U32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[COPY]], [[COPY4]], implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 killed [[V_MUL_LO_U32_]], [[COPY6]], 0, implicit $exec + ; GCN: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY4]], [[COPY5]] + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -614296167 + ; GCN: [[V_MUL_LO_U32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[COPY]], [[COPY3]], implicit $exec + ; GCN: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_]] + ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 killed [[V_MUL_LO_U32_1]], [[COPY7]], [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GCN: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY4]], [[V_ADDC_U32_e64_]], implicit $exec + ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -181084736 + ; GCN: [[V_MUL_LO_U32_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_MUL_HI_U32_]], [[S_MOV_B32_1]], implicit $exec + ; GCN: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_1]] + ; GCN: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY8]], killed [[V_MUL_LO_U32_2]], [[V_ADDC_U32_e64_1]], 0, implicit $exec + %0:vgpr_32 = COPY $vgpr0 + %6:sreg_32 = COPY %0 + %1:vgpr_32 = COPY $vgpr1 + %2:vgpr_32 = COPY $vgpr2 + %3:sreg_32 = COPY $sgpr0 + %4:sreg_32 = COPY $sgpr1 + %5:sreg_32 = COPY $sgpr2 + %20:vgpr_32 = COPY %3 + %7:sreg_32 = S_MUL_I32 %6, %4 + %9:vgpr_32, %10:sreg_64_xexec = V_ADD_I32_e64 killed %7, %20, 0, implicit $exec + %8:sreg_32 = S_MUL_HI_U32 %4, %5 + %11:sreg_32 = S_MOV_B32 -614296167 + %12:sreg_32 = S_MUL_I32 %6, %3 + %14:sreg_32, %13:sreg_64_xexec = S_ADD_CO_PSEUDO killed %12, killed %11, killed %10, implicit-def dead $scc + %15:sreg_32 = S_MUL_HI_U32 %4, %14 + %16:sreg_32 = S_MOV_B32 -181084736 + %17:sreg_32 = S_MUL_I32 %15, %16 + %19:sreg_32, %18:sreg_64_xexec = S_ADD_CO_PSEUDO killed %16, killed %17, killed %13, implicit-def dead $scc +...