-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU][GISel] Add RegBankLegalize support for G_SI_CALL #165747
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -170,6 +170,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, | |
| return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg); | ||
| case _: | ||
| return true; | ||
| case PhysReg: | ||
| return true; | ||
| default: | ||
| llvm_unreachable("missing matchUniformityAndLLT"); | ||
| } | ||
|
|
@@ -915,6 +917,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, | |
|
|
||
| addRulesForGOpcs({G_READSTEADYCOUNTER}, Standard).Uni(S64, {{Sgpr64}, {}}); | ||
|
|
||
| addRulesForGOpcs({G_SI_CALL}) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about other addr spaces, are P0 and P4 only ones that are required to be supported? |
||
| .Any({{PhysReg, UniP0}, {{None}, {SgprP0}}}) | ||
| .Any({{PhysReg, DivP0}, {{None}, {VgprP0}, WaterfallCall}}) | ||
| .Any({{PhysReg, UniP4}, {{None}, {SgprP4}}}) | ||
| .Any({{PhysReg, DivP4}, {{None}, {VgprP4}, WaterfallCall}}); | ||
|
|
||
| bool hasSALUFloat = ST->hasSALUFloatInsts(); | ||
|
|
||
| addRulesForGOpcs({G_FADD}, Standard) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,6 +38,7 @@ bool isAnyPtr(LLT Ty, unsigned Width); | |
| // be checked. | ||
| enum UniformityLLTOpPredicateID { | ||
| _, | ||
| PhysReg, | ||
| // scalars | ||
| S1, | ||
| S16, | ||
|
|
@@ -134,6 +135,7 @@ enum RegBankLLTMappingApplyID { | |
| Sgpr32, | ||
| Sgpr64, | ||
| Sgpr128, | ||
| SgprP0, | ||
| SgprP1, | ||
| SgprP3, | ||
| SgprP4, | ||
|
|
@@ -224,7 +226,8 @@ enum LoweringMethodID { | |
| SplitLoad, | ||
| WidenLoad, | ||
| WidenMMOToS32, | ||
| UnpackAExt | ||
| UnpackAExt, | ||
| WaterfallCall | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think it makes sense to treat this as its own standalone action. The waterfall is an implementation detail of particular scenarios, and isn't associated with a specific type or operand |
||
| }; | ||
|
|
||
| enum FastRulesTypes { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | ||
| ; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s | ||
| ; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. -new-reg-bank-select |
||
|
|
||
| ; amdgpu_gfx calling convention | ||
| declare hidden amdgpu_gfx void @external_gfx_void_func_void() #0 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | ||
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs -run-pass=localizer -o - %s | FileCheck %s | ||
| # RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs -run-pass=localizer -o - %s | FileCheck %s | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. -global-isel -new-reg-bank-select |
||
|
|
||
| # Previously this was placing the new G_CONSTANT after the use call | ||
| --- | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,215 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | ||
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize -o - %s | FileCheck %s | ||
|
|
||
| --- | ||
| name: waterfall_divergent_call_p0_no_args | ||
| legalized: true | ||
| body: | | ||
| bb.0: | ||
| liveins: $sgpr0_sgpr1 | ||
|
|
||
| ; CHECK-LABEL: name: waterfall_divergent_call_p0_no_args | ||
| ; CHECK: successors: %bb.1(0x80000000) | ||
| ; CHECK-NEXT: liveins: $sgpr0_sgpr1 | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF | ||
| ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .1: | ||
| ; CHECK-NEXT: successors: %bb.2(0x80000000) | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p0) | ||
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) | ||
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) | ||
| ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p0) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32) | ||
| ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p0), %func_ptr | ||
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) | ||
| ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .2: | ||
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc | ||
| ; CHECK-NEXT: %g_ptr:sgpr(p0) = COPY $sgpr0_sgpr1 | ||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY %g_ptr(p0) | ||
| ; CHECK-NEXT: %func_ptr:vgpr(p0) = G_LOAD [[COPY]](p0) :: (load (p0)) | ||
| ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu | ||
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc | ||
| ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc | ||
| ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .3: | ||
| ; CHECK-NEXT: successors: %bb.4(0x80000000) | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .4: | ||
| ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3 | ||
| ADJCALLSTACKUP 0, 0, implicit-def $scc | ||
| %g_ptr:_(p0) = COPY $sgpr0_sgpr1 | ||
| %func_ptr:_(p0) = G_LOAD %g_ptr(p0) :: (load (p0)) | ||
| $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu | ||
| ADJCALLSTACKDOWN 0, 0, implicit-def $scc | ||
| S_SETPC_B64_return undef $sgpr2_sgpr3 | ||
|
|
||
| ... | ||
|
|
||
| --- | ||
| name: waterfall_divergent_call_p4_no_args | ||
| legalized: true | ||
| body: | | ||
| bb.0: | ||
| liveins: $sgpr0_sgpr1 | ||
|
|
||
| ; CHECK-LABEL: name: waterfall_divergent_call_p4_no_args | ||
| ; CHECK: successors: %bb.1(0x80000000) | ||
| ; CHECK-NEXT: liveins: $sgpr0_sgpr1 | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF | ||
| ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .1: | ||
| ; CHECK-NEXT: successors: %bb.2(0x80000000) | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p4) | ||
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) | ||
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) | ||
| ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p4) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32) | ||
| ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p4), %func_ptr | ||
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) | ||
| ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .2: | ||
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc | ||
| ; CHECK-NEXT: %g_ptr:sgpr(p4) = COPY $sgpr0_sgpr1 | ||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %g_ptr(p4) | ||
| ; CHECK-NEXT: %func_ptr:vgpr(p4) = G_LOAD [[COPY]](p4) :: (load (p4)) | ||
| ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p4), 0, csr_amdgpu | ||
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc | ||
| ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc | ||
| ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .3: | ||
| ; CHECK-NEXT: successors: %bb.4(0x80000000) | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .4: | ||
| ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3 | ||
| ADJCALLSTACKUP 0, 0, implicit-def $scc | ||
| %g_ptr:_(p4) = COPY $sgpr0_sgpr1 | ||
| %func_ptr:_(p4) = G_LOAD %g_ptr(p4) :: (load (p4)) | ||
| $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu | ||
| ADJCALLSTACKDOWN 0, 0, implicit-def $scc | ||
| S_SETPC_B64_return undef $sgpr2_sgpr3 | ||
|
|
||
| ... | ||
|
|
||
| --- | ||
| name: waterfall_divergent_call_p0_with_args | ||
| legalized: true | ||
| body: | | ||
| bb.0: | ||
| liveins: $sgpr0_sgpr1 | ||
|
|
||
| ; CHECK-LABEL: name: waterfall_divergent_call_p0_with_args | ||
| ; CHECK: successors: %bb.1(0x80000000) | ||
| ; CHECK-NEXT: liveins: $sgpr0_sgpr1 | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF | ||
| ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .1: | ||
| ; CHECK-NEXT: successors: %bb.2(0x80000000) | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p0) | ||
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) | ||
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) | ||
| ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p0) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32) | ||
| ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p0), %func_ptr | ||
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) | ||
| ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .2: | ||
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc | ||
| ; CHECK-NEXT: %g_ptr:sgpr(p0) = COPY $sgpr0_sgpr1 | ||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY %g_ptr(p0) | ||
| ; CHECK-NEXT: %func_ptr:vgpr(p0) = G_LOAD [[COPY]](p0) :: (load (p0)) | ||
| ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0 | ||
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc | ||
| ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc | ||
| ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .3: | ||
| ; CHECK-NEXT: successors: %bb.4(0x80000000) | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .4: | ||
| ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3 | ||
| ADJCALLSTACKUP 0, 0, implicit-def $scc | ||
| %g_ptr:_(p0) = COPY $sgpr0_sgpr1 | ||
| %func_ptr:_(p0) = G_LOAD %g_ptr(p0) :: (load (p0)) | ||
| $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0 | ||
| ADJCALLSTACKDOWN 0, 0, implicit-def $scc | ||
| S_SETPC_B64_return undef $sgpr2_sgpr3 | ||
|
|
||
| ... | ||
|
|
||
| --- | ||
| name: waterfall_divergent_call_p4_with_args | ||
| legalized: true | ||
| body: | | ||
| bb.0: | ||
| liveins: $sgpr0_sgpr1 | ||
|
|
||
| ; CHECK-LABEL: name: waterfall_divergent_call_p4_with_args | ||
| ; CHECK: successors: %bb.1(0x80000000) | ||
| ; CHECK-NEXT: liveins: $sgpr0_sgpr1 | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF | ||
| ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .1: | ||
| ; CHECK-NEXT: successors: %bb.2(0x80000000) | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p4) | ||
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) | ||
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) | ||
| ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p4) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32) | ||
| ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p4), %func_ptr | ||
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) | ||
| ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .2: | ||
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc | ||
| ; CHECK-NEXT: %g_ptr:sgpr(p4) = COPY $sgpr0_sgpr1 | ||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %g_ptr(p4) | ||
| ; CHECK-NEXT: %func_ptr:vgpr(p4) = G_LOAD [[COPY]](p4) :: (load (p4)) | ||
| ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p4), 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0 | ||
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc | ||
| ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc | ||
| ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .3: | ||
| ; CHECK-NEXT: successors: %bb.4(0x80000000) | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: .4: | ||
| ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3 | ||
| ADJCALLSTACKUP 0, 0, implicit-def $scc | ||
| %g_ptr:_(p4) = COPY $sgpr0_sgpr1 | ||
| %func_ptr:_(p4) = G_LOAD %g_ptr(p4) :: (load (p4)) | ||
| $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0 | ||
| ADJCALLSTACKDOWN 0, 0, implicit-def $scc | ||
| S_SETPC_B64_return undef $sgpr2_sgpr3 | ||
|
|
||
| ... | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Physical registers shouldn't require consideration by any of this code
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
True, but call instructions are exception since they are the only G_ instructions that use physical registers
The "_" does not work since it is for non-register operands
Do you have suggestions how to deal with this other the introducing PhysReg