Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1823,6 +1823,16 @@ void SIRegisterInfo::buildSpillLoadStore(
}
}

Register FinalValueReg = ValueReg;
if (LoadStoreOp == AMDGPU::SCRATCH_LOAD_USHORT_SADDR) {
// If we are loading 16-bit value with SRAMECC endabled we need a temp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no check for sramecc enabled?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SCRATCH_LOAD_USHORT_SADDR is only used with sramecc, that was checked earlier.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this also an issue for the 8-bit cases?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do not have V8 spills.

// 32-bit VGPR to load and extract 16-bits into the final register.
ValueReg =
RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
SubReg = ValueReg;
IsKill = false;
}

MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
MachineMemOperand *NewMMO =
MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
Expand Down Expand Up @@ -1863,6 +1873,17 @@ void SIRegisterInfo::buildSpillLoadStore(
MIB.addImm(0); // swz
MIB.addMemOperand(NewMMO);

if (FinalValueReg != ValueReg) {
// Extract 16-bit from the loaded 32-bit value.
ValueReg = getSubReg(ValueReg, AMDGPU::lo16);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically I can even use the same SCRATCH_LOAD_SHORT_D16_SADDR_t16 as w/o sramecc, but I would need to chose lo16 or hi16 here. I do not think this is really needed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. It is simpler the way you have it now.

MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B16_t16_e64))
.addReg(FinalValueReg, getDefRegState(true))
.addImm(0)
.addReg(ValueReg, getKillRegState(true))
.addImm(0);
ValueReg = FinalValueReg;
}

if (!IsAGPR && NeedSuperRegDef)
MIB.addReg(ValueReg, RegState::ImplicitDefine);

Expand Down Expand Up @@ -2505,7 +2526,9 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
unsigned Opc;
if (MI->getOpcode() == AMDGPU::SI_SPILL_V16_RESTORE) {
assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!");
Opc = AMDGPU::SCRATCH_LOAD_SHORT_D16_SADDR_t16;
Opc = ST.d16PreservesUnusedBits()
? AMDGPU::SCRATCH_LOAD_SHORT_D16_SADDR_t16
: AMDGPU::SCRATCH_LOAD_USHORT_SADDR;
} else {
Opc = MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE
? AMDGPU::SCRATCH_LOAD_BLOCK_SADDR
Expand Down
67 changes: 67 additions & 0 deletions llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -verify-machineinstrs -mcpu=gfx1100 -mattr=+real-true16 -run-pass=prologepilog -o - %s | FileCheck -check-prefix=EXPANDED %s
# RUN: llc -march=amdgcn -verify-machineinstrs -mcpu=gfx1250 -mattr=+real-true16 -run-pass=prologepilog -o - %s | FileCheck -check-prefix=SRAMECC-EXPANDED %s

---
name: spill_restore_vgpr16
Expand Down Expand Up @@ -31,6 +32,28 @@ body: |
; EXPANDED-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
; EXPANDED-NEXT: $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
;
; SRAMECC-EXPANDED-LABEL: name: spill_restore_vgpr16
; SRAMECC-EXPANDED: bb.0:
; SRAMECC-EXPANDED-NEXT: successors: %bb.1(0x80000000)
; SRAMECC-EXPANDED-NEXT: {{ $}}
; SRAMECC-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
; SRAMECC-EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5)
; SRAMECC-EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16
; SRAMECC-EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5)
; SRAMECC-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; SRAMECC-EXPANDED-NEXT: {{ $}}
; SRAMECC-EXPANDED-NEXT: bb.1:
; SRAMECC-EXPANDED-NEXT: successors: %bb.2(0x80000000)
; SRAMECC-EXPANDED-NEXT: {{ $}}
; SRAMECC-EXPANDED-NEXT: S_NOP 1
; SRAMECC-EXPANDED-NEXT: {{ $}}
; SRAMECC-EXPANDED-NEXT: bb.2:
; SRAMECC-EXPANDED-NEXT: $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
; SRAMECC-EXPANDED-NEXT: $vgpr0_lo16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
; SRAMECC-EXPANDED-NEXT: $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
; SRAMECC-EXPANDED-NEXT: $vgpr0_hi16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
; SRAMECC-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
bb.0:
successors: %bb.1(0x80000000)
S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
Expand Down Expand Up @@ -78,6 +101,29 @@ body: |
; EXPANDED-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
; EXPANDED-NEXT: $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
;
; SRAMECC-EXPANDED-LABEL: name: spill_restore_vgpr16_middle_of_block
; SRAMECC-EXPANDED: bb.0:
; SRAMECC-EXPANDED-NEXT: successors: %bb.1(0x80000000)
; SRAMECC-EXPANDED-NEXT: {{ $}}
; SRAMECC-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
; SRAMECC-EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5)
; SRAMECC-EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16
; SRAMECC-EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5)
; SRAMECC-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; SRAMECC-EXPANDED-NEXT: {{ $}}
; SRAMECC-EXPANDED-NEXT: bb.1:
; SRAMECC-EXPANDED-NEXT: successors: %bb.2(0x80000000)
; SRAMECC-EXPANDED-NEXT: {{ $}}
; SRAMECC-EXPANDED-NEXT: S_NOP 1
; SRAMECC-EXPANDED-NEXT: {{ $}}
; SRAMECC-EXPANDED-NEXT: bb.2:
; SRAMECC-EXPANDED-NEXT: S_NOP 1
; SRAMECC-EXPANDED-NEXT: $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
; SRAMECC-EXPANDED-NEXT: $vgpr0_lo16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
; SRAMECC-EXPANDED-NEXT: $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
; SRAMECC-EXPANDED-NEXT: $vgpr0_hi16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
; SRAMECC-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
bb.0:
successors: %bb.1(0x80000000)
S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
Expand Down Expand Up @@ -124,6 +170,27 @@ body: |
; EXPANDED-NEXT: bb.2:
; EXPANDED-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
; EXPANDED-NEXT: $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
;
; SRAMECC-EXPANDED-LABEL: name: spill_restore_vgpr16_end_of_block
; SRAMECC-EXPANDED: bb.0:
; SRAMECC-EXPANDED-NEXT: successors: %bb.1(0x80000000)
; SRAMECC-EXPANDED-NEXT: {{ $}}
; SRAMECC-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
; SRAMECC-EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5)
; SRAMECC-EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16
; SRAMECC-EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5)
; SRAMECC-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; SRAMECC-EXPANDED-NEXT: {{ $}}
; SRAMECC-EXPANDED-NEXT: bb.1:
; SRAMECC-EXPANDED-NEXT: successors: %bb.2(0x80000000)
; SRAMECC-EXPANDED-NEXT: {{ $}}
; SRAMECC-EXPANDED-NEXT: S_NOP 1
; SRAMECC-EXPANDED-NEXT: {{ $}}
; SRAMECC-EXPANDED-NEXT: bb.2:
; SRAMECC-EXPANDED-NEXT: $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
; SRAMECC-EXPANDED-NEXT: $vgpr0_lo16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
; SRAMECC-EXPANDED-NEXT: $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
; SRAMECC-EXPANDED-NEXT: $vgpr0_hi16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
bb.0:
successors: %bb.1(0x80000000)
S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
Expand Down
Loading