From 21a65ff5739db06725ed2aac983199d73de6fe85 Mon Sep 17 00:00:00 2001 From: Anshil Gandhi Date: Fri, 19 Sep 2025 12:06:18 -0500 Subject: [PATCH] [AMDGPU] Add regbankselect rules for G_FSHR --- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 10 ++ .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 11 ++ .../AMDGPU/GlobalISel/regbankselect-fshr.mir | 158 +++++++++++++++--- 3 files changed, 158 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 73b2660727342..89287e13d4cc1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -461,6 +461,16 @@ void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) { Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0); break; } + case AMDGPU::G_FSHR: { + auto [X0, X1] = unpackAExt(MI.getOperand(1).getReg()); + auto [Y0, Y1] = unpackAExt(MI.getOperand(2).getReg()); + auto [S0, S1] = unpackZExt(MI.getOperand(3).getReg()); + + const RegisterBank *DstRB = MRI.getRegBank(MI.getOperand(0).getReg()); + Lo = B.buildInstr(AMDGPU::G_FSHR, {{DstRB, S32}}, {X0, Y0, S0}).getReg(0); + Hi = B.buildInstr(AMDGPU::G_FSHR, {{DstRB, S32}}, {X1, Y1, S1}).getReg(0); + break; + } default: llvm_unreachable("Unpack lowering not implemented"); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 0776d14a84067..7384c407b1573 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -514,6 +514,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}) .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}}); + addRulesForGOpcs({G_FSHR}, Standard) + .Uni(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}}) + .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}}) + .Uni(V2S16, + {{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}, UnpackBitShift}) + .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}}) + .Uni(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}}) + .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}}) + .Uni(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr32}}) + .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr32}}); + addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}}); addRulesForGOpcs({G_UBFX, G_SBFX}, Standard) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir index b1a55fe7bc42f..8e94a8c10bf80 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir @@ -1,6 +1,46 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: fshr_s16_sss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: fshr_s16_sss + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s16) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s16) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s16) = COPY $sgpr2 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s16) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s16) + %0:_(s16) = COPY $sgpr0 + %1:_(s16) = COPY $sgpr1 + %2:_(s16) = COPY $sgpr2 + %3:_(s16) = G_FSHR %0, %1, %2 +... + +--- +name: fshr_s16_vvv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-LABEL: name: fshr_s16_vvv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s16) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY $vgpr2 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s16) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s16) + %0:_(s16) = COPY $vgpr0 + %1:_(s16) = COPY $vgpr1 + %2:_(s16) = COPY $vgpr2 + %3:_(s16) = G_FSHR %0, %1, %2 +... --- name: fshr_sss @@ -15,10 +55,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY5]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -37,9 +74,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY3]], [[COPY4]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -58,9 +93,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY4]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $sgpr1 @@ -79,9 +112,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY2]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -100,8 +131,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY3]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr0 @@ -120,8 +150,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr1 @@ -140,8 +169,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY2]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -166,3 +194,91 @@ body: | %2:_(s32) = COPY $vgpr2 %3:_(s32) = G_FSHR %0, %1, %2 ... + +--- +name: fshr_v2s16_sss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: fshr_v2s16_sss + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr2 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](<2 x s16>) + %0:_(<2 x s16>) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $sgpr1 + %2:_(<2 x s16>) = COPY $sgpr2 + %3:_(<2 x s16>) = G_FSHR %0, %1, %2 +... + +--- +name: fshr_v2s16_vvv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-LABEL: name: fshr_v2s16_vvv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = COPY $vgpr2 + %3:_(<2 x s16>) = G_FSHR %0, %1, %2 +... + +--- +name: fshr_s64_sss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: fshr_s64_sss + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[COPY]](s32) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s64) = G_ZEXT [[COPY1]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s64) = G_FSHR [[ZEXT]], [[ZEXT1]], [[COPY2]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s64) = G_ZEXT %0 + %4:_(s64) = G_ZEXT %1 + %5:_(s64) = G_FSHR %3, %4, %2 +... + +--- +name: fshr_s64_vvv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-LABEL: name: fshr_s64_vvv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[COPY]](s32) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:vgpr(s64) = G_ZEXT [[COPY1]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s64) = G_FSHR [[ZEXT]], [[ZEXT1]], [[COPY2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s64) = G_ZEXT %0 + %4:_(s64) = G_ZEXT %1 + %5:_(s64) = G_FSHR %3, %4, %2 +...