diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 0a16a07cb5ec3..ffcbf48461aa5 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -292,6 +292,7 @@ def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64", [], 1>; def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32", [], 1>; def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64", [], 1>; +let isReMaterializable = 1 in def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64", [(set i64:$sdst, (int_amdgcn_s_getpc))] >; diff --git a/llvm/test/CodeGen/AMDGPU/remat-sop.mir b/llvm/test/CodeGen/AMDGPU/remat-sop.mir index 649f0d7f77996..e41c42c4f40b8 100644 --- a/llvm/test/CodeGen/AMDGPU/remat-sop.mir +++ b/llvm/test/CodeGen/AMDGPU/remat-sop.mir @@ -573,3 +573,84 @@ body: | S_NOP 0, implicit %2 S_ENDPGM 0 ... + +--- +name: test_remat_s_getpc_b64 +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: test_remat_s_getpc_b64 + ; GCN: renamable $sgpr0_sgpr1 = S_GETPC_B64 + ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_GETPC_B64 + ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1 + ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3 + ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64 + ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1 + ; GCN-NEXT: S_ENDPGM 0 + %0:sgpr_64 = S_GETPC_B64 + %1:sgpr_64 = S_GETPC_B64 + %2:sgpr_64 = S_GETPC_B64 + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... + +--- +name: test_remat_s_getpc_b64_2 +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: test_remat_s_getpc_b64_2 + ; GCN: renamable $sgpr0_sgpr1 = S_GETPC_B64 + ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_GETPC_B64 + ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.3, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = COPY renamable $sgpr2 + ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = COPY killed renamable $sgpr3 + ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.2, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.2, addrspace 5) + ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64 + ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.5, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.5, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = COPY killed renamable $sgpr1 + ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.4, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.4, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.3, addrspace 5) + ; GCN-NEXT: dead renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr1, killed renamable $sgpr0, implicit-def $scc + ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5) + ; GCN-NEXT: dead renamable $sgpr0 = S_ADDC_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc, implicit $scc + ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.3, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.5, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.5, addrspace 5) + ; GCN-NEXT: dead renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc + ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.4, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.4, addrspace 5) + ; GCN-NEXT: dead renamable $sgpr0 = S_ADDC_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc, implicit $scc + ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.5, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.5, addrspace 5) + ; GCN-NEXT: dead renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc + ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.4, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.4, addrspace 5) + ; GCN-NEXT: dead renamable $sgpr0 = S_ADDC_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc, implicit $scc + ; GCN-NEXT: S_ENDPGM 0 + %0:sreg_64 = S_GETPC_B64 + %1:sreg_64 = S_GETPC_B64 + %2:sreg_64 = S_GETPC_B64 + %4:sreg_32 = COPY %0.sub0:sreg_64 + %5:sreg_32 = COPY %0.sub1:sreg_64 + %6:sreg_32 = COPY %1.sub0:sreg_64 + %7:sreg_32 = COPY %1.sub1:sreg_64 + %8:sreg_32 = COPY %2.sub0:sreg_64 + %9:sreg_32 = COPY %2.sub1:sreg_64 + %10:sreg_32 = S_ADD_U32 %4:sreg_32, %6:sreg_32, implicit-def $scc + %11:sreg_32 = S_ADDC_U32 %5:sreg_32, %7:sreg_32, implicit-def $scc, implicit $scc + %12:sreg_32 = S_ADD_U32 %4:sreg_32, %8:sreg_32, implicit-def $scc + %13:sreg_32 = S_ADDC_U32 %5:sreg_32, %9:sreg_32, implicit-def $scc, implicit $scc + %14:sreg_32 = S_ADD_U32 %6:sreg_32, %8:sreg_32, implicit-def $scc + %15:sreg_32 = S_ADDC_U32 %7:sreg_32, %9:sreg_32, implicit-def $scc, implicit $scc + S_ENDPGM 0 +... + + diff --git a/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll b/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll new file mode 100644 index 0000000000000..598d7a8033c2e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stress-regalloc=2 -verify-machineinstrs < %s | FileCheck %s + + +define void @test_remat_s_getpc_b64() { +; CHECK-LABEL: test_remat_s_getpc_b64: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: v_writelane_b32 v0, s30, 0 +; CHECK-NEXT: s_getpc_b64 s[4:5] +; CHECK-NEXT: v_writelane_b32 v0, s31, 1 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_getpc_b64 s[4:5] +; CHECK-NEXT: v_mov_b32_e32 v1, s4 +; CHECK-NEXT: v_mov_b32_e32 v2, s5 +; CHECK-NEXT: global_store_dwordx2 v[1:2], v[1:2], off +; CHECK-NEXT: v_readlane_b32 s31, v0, 1 +; CHECK-NEXT: v_readlane_b32 s30, v0, 0 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + %0 = tail call i64 @llvm.amdgcn.s.getpc() + tail call void asm sideeffect "", "s"(i64 %0) + tail call void asm sideeffect "", "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() + store i64 %0, ptr addrspace(1) undef + ret void +} + +declare i64 @llvm.amdgcn.s.getpc()