Skip to content

Commit

Permalink
[TwoAddressInstruction] Recompute live intervals for partial defs (#7…
Browse files Browse the repository at this point in the history
…4431)

Force live interval recomputation for a register if its definition is
narrowed to become partial. The live interval repair process cannot
otherwise detect these changes.
  • Loading branch information
perlfu committed Jan 12, 2024
1 parent cc0065a commit 6752f15
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 2 deletions.
13 changes: 11 additions & 2 deletions llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1936,13 +1936,16 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
}

bool DefEmitted = false;
bool DefIsPartial = false;
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
MachineOperand &UseMO = MI.getOperand(i);
Register SrcReg = UseMO.getReg();
unsigned SubIdx = MI.getOperand(i+1).getImm();
// Nothing needs to be inserted for undef operands.
if (UseMO.isUndef())
if (UseMO.isUndef()) {
DefIsPartial = true;
continue;
}

// Defer any kill flag to the last operand using SrcReg. Otherwise, we
// might insert a COPY that uses SrcReg after is was killed.
Expand Down Expand Up @@ -1987,8 +1990,14 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)
MI.removeOperand(j);
} else {
if (LIS)
if (LIS) {
// Force interval recomputation if we moved from full definition
// of register to partial.
if (DefIsPartial && LIS->hasInterval(DstReg) &&
MRI->shouldTrackSubRegLiveness(DstReg))
LIS->removeInterval(DstReg);
LIS->RemoveMachineInstrFromMaps(MI);
}

LLVM_DEBUG(dbgs() << "Eliminated: " << MI);
MI.eraseFromParent();
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/ds_gws_align.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -early-live-intervals -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s

; GCN-LABEL: {{^}}gws_init_odd_reg:
; GFX908-DAG: ds_gws_init v1 gds
Expand Down
52 changes: 52 additions & 0 deletions llvm/test/CodeGen/AMDGPU/early-lis-two-address-partial-def.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=liveintervals -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefix=GFX90A %s

---
name: aligned_partial_vgpr_64
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1
; GFX90A-LABEL: name: aligned_partial_vgpr_64
; GFX90A: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr7
; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr6
; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr5
; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr4
; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr3
; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr2
; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GFX90A-NEXT: undef [[COPY10:%[0-9]+]].sub0:sgpr_256 = COPY [[COPY9]]
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub1:sgpr_256 = COPY [[COPY8]]
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub2:sgpr_256 = COPY [[COPY7]]
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub3:sgpr_256 = COPY [[COPY6]]
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub4:sgpr_256 = COPY [[COPY5]]
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub5:sgpr_256 = COPY [[COPY4]]
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub6:sgpr_256 = COPY [[COPY3]]
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub7:sgpr_256 = COPY [[COPY2]]
; GFX90A-NEXT: undef [[COPY11:%[0-9]+]].sub0:vreg_64_align2 = COPY [[COPY]]
; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a [[COPY12]], [[COPY11]].sub0, [[COPY10]], 1, -1, 1, 0, 0, 0, implicit $exec, implicit [[COPY11]] :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX90A-NEXT: $vgpr0 = COPY [[COPY12]]
; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
%9:vgpr_32 = COPY $vgpr1
%8:vgpr_32 = COPY $vgpr0
%7:sgpr_32 = COPY $sgpr7
%6:sgpr_32 = COPY $sgpr6
%5:sgpr_32 = COPY $sgpr5
%4:sgpr_32 = COPY $sgpr4
%3:sgpr_32 = COPY $sgpr3
%2:sgpr_32 = COPY $sgpr2
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%11:sgpr_256 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3, %4:sgpr_32, %subreg.sub4, %5:sgpr_32, %subreg.sub5, %6:sgpr_32, %subreg.sub6, %7:sgpr_32, %subreg.sub7
%14:vreg_64_align2 = REG_SEQUENCE %9:vgpr_32, %subreg.sub0, undef %13:vgpr_32, %subreg.sub1
%12:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a %8:vgpr_32, %14.sub0:vreg_64_align2, %11:sgpr_256, 1, -1, 1, 0, 0, 0, implicit $exec, implicit %14:vreg_64_align2 :: (volatile dereferenceable load store (s32), addrspace 8)
$vgpr0 = COPY %12:vgpr_32
SI_RETURN_TO_EPILOG $vgpr0
...
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX90A %s
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -early-live-intervals < %s | FileCheck -check-prefixes=GCN,GFX90A %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX12 %s
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -early-live-intervals < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s

; GCN-LABEL: {{^}}load_1d:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,SDAG %s
; RUN: llc -march=amdgcn -mcpu=gfx90a -early-live-intervals -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,SDAG %s
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,GISEL %s

; GFX90A-LABEL: {{^}}sample_1d:
Expand Down

0 comments on commit 6752f15

Please sign in to comment.