From f52c3cf27251cb1e254d9829b74c538be7adfd06 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Mon, 11 Jul 2016 21:44:40 +0000 Subject: [PATCH] AMDGPU: fix local stack slot allocation bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: The main bug fix here is using the 32-bit encoding of V_ADD_I32 in materializeFrameBaseRegister and resolveFrameIndex, so that arbitrary immediates work. The second part is that we may now require the SegmentWaveByteOffset even when there are initially no stack objects and VGPR spilling isn't enabled, for stack slots that are allocated later. This means that some bits become effectively dead and can be cleaned up. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96602 Tested-by: Kai Wasserbäch Reviewers: arsenm, tstellarAMD Subscribers: arsenm, llvm-commits, kzhuravl Differential Revision: http://reviews.llvm.org/D21551 llvm-svn: 275108 --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 10 +++++++-- .../CodeGen/AMDGPU/local-stack-slot-bug.ll | 22 +++++++++++++++++++ .../CodeGen/AMDGPU/selected-stack-object.ll | 12 ++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll create mode 100644 llvm/test/CodeGen/AMDGPU/selected-stack-object.ll diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 748209bd06585..a57f4a0fe91a2 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -285,10 +285,13 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) + .addImm(Offset); BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead) - .addImm(Offset) + .addReg(OffsetReg, RegState::Kill) .addFrameIndex(FrameIdx); } @@ -335,13 +338,16 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, assert(Offset != 0 && "Non-zero offset expected"); unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); // In the case the instruction already had an immediate offset, here only // the requested new offset is added because we are leaving the original // immediate in place. + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) + .addImm(Offset); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), NewReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead) - .addImm(Offset) + .addReg(OffsetReg, RegState::Kill) .addReg(BaseReg); FIOp->ChangeToRegister(NewReg, false); diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll new file mode 100644 index 0000000000000..6e6f289f5d6d6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=amdgcn -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck %s + +; This used to fail due to a v_add_i32 instruction with an illegal immediate +; operand that was created during Local Stack Slot Allocation. Test case derived +; from https://bugs.freedesktop.org/show_bug.cgi?id=96602 +; +; CHECK-LABEL: {{^}}main: +; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 +; CHECK: v_mov_b32_e32 [[HI_CONST:v[0-9]+]], 0x200 +; CHECK: v_mov_b32_e32 [[LO_CONST:v[0-9]+]], 0 +; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, [[BYTES]], [[HI_CONST]] +; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BYTES]], [[LO_CONST]] +; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen +; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen +define amdgpu_ps float @main(i32 %idx) { +main_body: + %v1 = extractelement <81 x float> , i32 %idx + %v2 = extractelement <81 x float> , i32 %idx + %r = fadd float %v1, %v2 + ret float %r +} diff --git a/llvm/test/CodeGen/AMDGPU/selected-stack-object.ll b/llvm/test/CodeGen/AMDGPU/selected-stack-object.ll new file mode 100644 index 0000000000000..0519914ee5a18 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/selected-stack-object.ll @@ -0,0 +1,12 @@ +; XFAIL: * +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s + +; See also local-stack-slot-bug.ll +; This fails because a stack object is created during instruction selection. + +; CHECK-LABEL: {{^}}main: +define amdgpu_ps float @main(i32 %idx) { +main_body: + %v1 = extractelement <81 x float> , i32 %idx + ret float %v1 +}