Skip to content

Commit

Permalink
[AMDGPU] Fix for the lost CarryOut/CarryIn register operands in S_ADD…
Browse files Browse the repository at this point in the history
…/SUB_CO_PSEUDO.

Summary: This fixes the 5b898bd bug when the carry-in and carry-out registers became lost in lowering S_ADD/SUB_CO_PSEUDO.

Reviewers: rampitec, arsenm

Reviewed By: arsenm

Subscribers: msearles, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80158
  • Loading branch information
alex-t committed May 27, 2020
1 parent 3345521 commit eb1092a
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 6 deletions.
18 changes: 12 additions & 6 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Expand Up @@ -5248,18 +5248,24 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
? AMDGPU::V_ADDC_U32_e64
: AMDGPU::V_SUBB_U32_e64;
const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
Register DummyCReg = MRI.createVirtualRegister(CarryRC);
Register CarryReg = MRI.createVirtualRegister(CarryRC);

Register CarryInReg = Inst.getOperand(4).getReg();
if (!MRI.constrainRegClass(CarryInReg, CarryRC)) {
Register NewCarryReg = MRI.createVirtualRegister(CarryRC);
BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), NewCarryReg)
.addReg(CarryInReg);
}

Register CarryOutReg = Inst.getOperand(1).getReg();

Register DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
MRI.getRegClass(Inst.getOperand(0).getReg())));
BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), CarryReg)
.addReg(Inst.getOperand(4).getReg());
MachineInstr *CarryOp =
BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(Opc), DestReg)
.addReg(DummyCReg, RegState::Define | RegState::Dead)
.addReg(CarryOutReg, RegState::Define)
.add(Inst.getOperand(2))
.add(Inst.getOperand(3))
.addReg(CarryReg, RegState::Kill)
.addReg(CarryInReg)
.addImm(0);
legalizeOperands(*CarryOp);
MRI.replaceRegWith(Inst.getOperand(0).getReg(), DestReg);
Expand Down
50 changes: 50 additions & 0 deletions llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir
@@ -0,0 +1,50 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck -check-prefix=GCN %s
---
name: s_add_co_pseudo_test
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0, $sgpr1, $sgpr2
; GCN-LABEL: name: s_add_co_pseudo_test
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0, $sgpr1, $sgpr2
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GCN: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[COPY6:%[0-9]+]]:sgpr_32 = COPY [[COPY3]]
; GCN: [[V_MUL_LO_U32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[COPY]], [[COPY4]], implicit $exec
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 killed [[V_MUL_LO_U32_]], [[COPY6]], 0, implicit $exec
; GCN: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY4]], [[COPY5]]
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -614296167
; GCN: [[V_MUL_LO_U32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[COPY]], [[COPY3]], implicit $exec
; GCN: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_]]
; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 killed [[V_MUL_LO_U32_1]], [[COPY7]], [[V_ADD_I32_e64_1]], 0, implicit $exec
; GCN: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY4]], [[V_ADDC_U32_e64_]], implicit $exec
; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -181084736
; GCN: [[V_MUL_LO_U32_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_MUL_HI_U32_]], [[S_MOV_B32_1]], implicit $exec
; GCN: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_1]]
; GCN: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY8]], killed [[V_MUL_LO_U32_2]], [[V_ADDC_U32_e64_1]], 0, implicit $exec
%0:vgpr_32 = COPY $vgpr0
%6:sreg_32 = COPY %0
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr2
%3:sreg_32 = COPY $sgpr0
%4:sreg_32 = COPY $sgpr1
%5:sreg_32 = COPY $sgpr2
%20:vgpr_32 = COPY %3
%7:sreg_32 = S_MUL_I32 %6, %4
%9:vgpr_32, %10:sreg_64_xexec = V_ADD_I32_e64 killed %7, %20, 0, implicit $exec
%8:sreg_32 = S_MUL_HI_U32 %4, %5
%11:sreg_32 = S_MOV_B32 -614296167
%12:sreg_32 = S_MUL_I32 %6, %3
%14:sreg_32, %13:sreg_64_xexec = S_ADD_CO_PSEUDO killed %12, killed %11, killed %10, implicit-def dead $scc
%15:sreg_32 = S_MUL_HI_U32 %4, %14
%16:sreg_32 = S_MOV_B32 -181084736
%17:sreg_32 = S_MUL_I32 %15, %16
%19:sreg_32, %18:sreg_64_xexec = S_ADD_CO_PSEUDO killed %16, killed %17, killed %13, implicit-def dead $scc
...

0 comments on commit eb1092a

Please sign in to comment.