diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 4f106bf0dfb11..eae666ab0e7d7 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -683,6 +683,12 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, } assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg); + if (hasFP(MF)) { + Register FPReg = MFI->getFrameOffsetReg(); + assert(FPReg != AMDGPU::FP_REG); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0); + } + if (requiresStackPointerReference(MF)) { Register SPReg = MFI->getStackPtrOffsetReg(); assert(SPReg != AMDGPU::SP_REG); @@ -690,12 +696,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST)); } - if (hasFP(MF)) { - Register FPReg = MFI->getFrameOffsetReg(); - assert(FPReg != AMDGPU::FP_REG); - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0); - } - bool NeedsFlatScratchInit = MFI->getUserSGPRInfo().hasFlatScratchInit() && (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() || diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll index b940dc74839b2..eaaeb3dc77a41 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll @@ -16,8 +16,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; GCN-NEXT: s_load_dword s6, s[4:5], 0x8 ; GCN-NEXT: s_add_u32 s0, s0, s9 ; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_movk_i32 s32, 0x400 ; GCN-NEXT: s_mov_b32 s33, 0 +; GCN-NEXT: s_movk_i32 s32, 0x400 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_cmp_lg_u32 s6, 0 ; GCN-NEXT: s_cbranch_scc1 .LBB0_3 @@ -87,8 +87,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; GCN-NEXT: s_load_dword s6, s[4:5], 0x8 ; GCN-NEXT: s_add_u32 s0, s0, s9 ; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_movk_i32 s32, 0x1000 ; GCN-NEXT: s_mov_b32 s33, 0 +; GCN-NEXT: s_movk_i32 s32, 0x1000 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_cmp_lg_u32 s6, 0 ; GCN-NEXT: s_cbranch_scc1 .LBB1_2 diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll index c674aebabcc8d..8e773cad3b335 100644 --- a/llvm/test/CodeGen/AMDGPU/cc-update.ll +++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll @@ -321,8 +321,8 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 { ; GFX803-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX803-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX803-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX803-NEXT: s_mov_b32 s32, 0 ; GFX803-NEXT: s_mov_b32 s33, 0 +; GFX803-NEXT: s_mov_b32 s32, 0 ; GFX803-NEXT: s_getpc_b64 s[16:17] ; GFX803-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 ; GFX803-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 @@ -340,8 +340,8 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 { ; GFX900-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX900-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX900-NEXT: s_mov_b32 s32, 0 ; GFX900-NEXT: s_mov_b32 s33, 0 +; GFX900-NEXT: s_mov_b32 s32, 0 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 @@ -351,8 +351,8 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 { ; GFX1010-LABEL: test_force_fp_kern_call: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_add_u32 s10, s10, s15 -; GFX1010-NEXT: s_mov_b32 s32, 0 ; GFX1010-NEXT: s_mov_b32 s33, 0 +; GFX1010-NEXT: s_mov_b32 s32, 0 ; GFX1010-NEXT: s_addc_u32 s11, s11, 0 ; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10 ; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11 @@ -378,16 +378,16 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 { ; GFX1100-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX1100-NEXT: s_mov_b32 s13, s14 ; GFX1100-NEXT: s_mov_b32 s14, s15 -; GFX1100-NEXT: s_mov_b32 s32, 0 ; GFX1100-NEXT: s_mov_b32 s33, 0 +; GFX1100-NEXT: s_mov_b32 s32, 0 ; GFX1100-NEXT: s_getpc_b64 s[6:7] ; GFX1100-NEXT: s_add_u32 s6, s6, ex@rel32@lo+4 ; GFX1100-NEXT: s_addc_u32 s7, s7, ex@rel32@hi+12 ; GFX1100-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX1100-NEXT: s_endpgm ; GFX1010-NEXT s_add_u32 s12, s12, s17 -; GFX1010-NEXT s_mov_b32 s32, 0 ; GFX1010-NEXT s_mov_b32 s33, 0 +; GFX1010-NEXT s_mov_b32 s32, 0 ; GFX1010-NEXT s_addc_u32 s13, s13, 0 ; GFX1010-NEXT s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 ; GFX1010-NEXT s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 @@ -459,8 +459,8 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add ; GFX1010-LABEL: test_force_fp_kern_stack_and_call: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_add_u32 s10, s10, s15 -; GFX1010-NEXT: s_movk_i32 s32, 0x200 ; GFX1010-NEXT: s_mov_b32 s33, 0 +; GFX1010-NEXT: s_movk_i32 s32, 0x200 ; GFX1010-NEXT: s_addc_u32 s11, s11, 0 ; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10 ; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11 diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll index fbf2ee1145ae9..ec446f1f3bf27 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll @@ -7,8 +7,8 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 { ; CHECK-LABEL: test_kernel: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_mov_b32 s32, 0x180000 ; CHECK-NEXT: s_mov_b32 s33, 0 +; CHECK-NEXT: s_mov_b32 s32, 0x180000 ; CHECK-NEXT: s_add_u32 flat_scratch_lo, s10, s15 ; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 ; CHECK-NEXT: s_add_u32 s0, s0, s15 diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll index 125e6bc0f787f..ba012b208c957 100644 --- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll @@ -21,8 +21,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; MUBUF-NEXT: s_add_u32 s0, s0, s9 ; MUBUF-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x8 ; MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; MUBUF-NEXT: s_movk_i32 s32, 0x400 ; MUBUF-NEXT: s_mov_b32 s33, 0 +; MUBUF-NEXT: s_movk_i32 s32, 0x400 ; MUBUF-NEXT: s_waitcnt lgkmcnt(0) ; MUBUF-NEXT: s_cmp_lg_u32 s8, 0 ; MUBUF-NEXT: s_cbranch_scc1 .LBB0_3 @@ -57,8 +57,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; FLATSCR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; FLATSCR-NEXT: s_mov_b32 s32, 16 ; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: s_mov_b32 s32, 16 ; FLATSCR-NEXT: s_waitcnt lgkmcnt(0) ; FLATSCR-NEXT: s_cmp_lg_u32 s4, 0 ; FLATSCR-NEXT: s_cbranch_scc1 .LBB0_3 @@ -125,8 +125,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; MUBUF-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x8 ; MUBUF-NEXT: s_add_u32 s0, s0, s9 ; MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; MUBUF-NEXT: s_movk_i32 s32, 0x1000 ; MUBUF-NEXT: s_mov_b32 s33, 0 +; MUBUF-NEXT: s_movk_i32 s32, 0x1000 ; MUBUF-NEXT: s_waitcnt lgkmcnt(0) ; MUBUF-NEXT: s_cmp_lg_u32 s6, 0 ; MUBUF-NEXT: s_cbranch_scc1 .LBB1_2 @@ -159,8 +159,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; FLATSCR-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 -; FLATSCR-NEXT: s_mov_b32 s32, 64 ; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: s_mov_b32 s32, 64 ; FLATSCR-NEXT: s_waitcnt lgkmcnt(0) ; FLATSCR-NEXT: s_cmp_lg_u32 s2, 0 ; FLATSCR-NEXT: s_cbranch_scc1 .LBB1_2