-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU] Drop high 32 bits of aperture registers #158725
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Drop high 32 bits of aperture registers #158725
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesPatch is 63.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158725.diff 9 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 31dd6b9e8d84d..af92422db4e55 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -228,16 +228,12 @@ def SGPR_NULL64 :
// need them, we need to do a 64 bit load and extract the bits manually.
multiclass ApertureRegister<string name, bits<10> regIdx> {
let isConstant = true in {
- // FIXME: We shouldn't need to define subregisters for these (nor add them to any 16 bit
- // register classes), but if we don't it seems to confuse the TableGen
- // backend and we end up with a lot of weird register pressure sets and classes.
defm _LO : SIRegLoHi16 <name, regIdx>;
- defm _HI : SIRegLoHi16 <"", regIdx>;
-
- def "" : RegisterWithSubRegs<name, [!cast<Register>(NAME#_LO), !cast<Register>(NAME#_HI)]> {
+ def "" : RegisterWithSubRegs<name, [!cast<Register>(NAME#_LO)]> {
let Namespace = "AMDGPU";
- let SubRegIndices = [sub0, sub1];
+ let SubRegIndices = [sub0];
let HWEncoding = !cast<Register>(NAME#_LO).HWEncoding;
+ let CoveredBySubRegs = 0;
}
} // isConstant = true
}
@@ -790,8 +786,7 @@ let GeneratePressureSet = 0, HasSGPR = 1 in {
def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE_LO,
- SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_SHARED_BASE_HI,
- SRC_SHARED_LIMIT_HI, SRC_PRIVATE_BASE_HI, SRC_PRIVATE_LIMIT_HI, SRC_POPS_EXITING_WAVE_ID,
+ SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_POPS_EXITING_WAVE_ID,
SRC_VCCZ, SRC_EXECZ, SRC_SCC, SRC_FLAT_SCRATCH_BASE_LO, SRC_FLAT_SCRATCH_BASE_HI)> {
let AllocationPriority = 0;
}
@@ -801,10 +796,9 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16,
TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16,
SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16,
- SRC_SHARED_BASE_HI_LO16, SRC_SHARED_LIMIT_HI_LO16, SRC_PRIVATE_BASE_HI_LO16,
- SRC_PRIVATE_LIMIT_HI_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16,
- SRC_EXECZ_LO16, SRC_SCC_LO16, EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16,
- SRC_FLAT_SCRATCH_BASE_LO_LO16, SRC_FLAT_SCRATCH_BASE_HI_LO16)> {
+ SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16,
+ EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16,
+ SRC_FLAT_SCRATCH_BASE_HI_LO16)> {
let Size = 16;
let isAllocatable = 0;
let BaseClassOrder = 16;
@@ -825,6 +819,13 @@ def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2
let AllocationPriority = 0;
}
+def APERTURE_Class : SIRegisterClass<"AMDGPU", Reg64Types.types, 32,
+ (add SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
+ let isAllocatable = 0;
+ let Size = 64;
+ let BaseClassOrder = 10000;
+}
+
} // End GeneratePressureSet = 0
// Register class for all scalar registers (SGPRs + Special Registers)
@@ -876,8 +877,7 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16],
}
def SReg_64_XEXEC_XNULL : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
- (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SRC_SHARED_BASE,
- SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA,
+ (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA,
SRC_FLAT_SCRATCH_BASE)> {
let CopyCost = 1;
let AllocationPriority = 1;
@@ -900,6 +900,14 @@ def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f1
let Size = 64;
}
+def SReg_64_Plus : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
+ (add SReg_64, APERTURE_Class)> {
+ let CopyCost = 1;
+ let isAllocatable = 0;
+ let HasSGPR = 1;
+ let Size = 64;
+}
+
def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
(add SReg_64_XEXEC, SReg_32_XEXEC)> {
let CopyCost = 1;
@@ -1225,7 +1233,7 @@ def SSrc_bf16: SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_BF16">;
def SSrc_f16 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_FP16">;
def SSrc_b32 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_INT32">;
def SSrc_f32 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_FP32">;
-def SSrc_b64 : SrcRegOrImm9 <SReg_64, "OPERAND_REG_IMM_INT64">;
+def SSrc_b64 : SrcRegOrImm9 <SReg_64_Plus, "OPERAND_REG_IMM_INT64">;
def SSrcOrLds_b32 : SrcRegOrImm9 <SRegOrLds_32, "OPERAND_REG_IMM_INT32">;
diff --git a/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir b/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir
index 9345e92789327..04cb0b14679bb 100644
--- a/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir
@@ -72,7 +72,7 @@ body: |
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:areg_96 = COPY [[COPY]]
; CHECK-NEXT: [[COPY3:%[0-9]+]].sub1:areg_96 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY3:%[0-9]+]].sub2:areg_96 = COPY [[COPY2]]
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, [[COPY3]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, [[COPY3]]
; CHECK-NEXT: SI_RETURN
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
@@ -80,7 +80,7 @@ body: |
undef %3.sub0:areg_96 = COPY %0
%3.sub1:areg_96 = COPY %1
%3.sub2:areg_96 = COPY %2
- INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, %3
+ INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, %3
SI_RETURN
...
@@ -101,7 +101,7 @@ body: |
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY3:%[0-9]+]].sub1:areg_96_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY3:%[0-9]+]].sub2:areg_96_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96_Align2 */, [[COPY3]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6357001 /* reguse:AReg_96_Align2 */, [[COPY3]]
; CHECK-NEXT: SI_RETURN
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
@@ -109,7 +109,7 @@ body: |
undef %3.sub0:areg_96_align2 = COPY %0
%3.sub1:areg_96_align2 = COPY %1
%3.sub2:areg_96_align2 = COPY %2
- INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96_Align2 */, %3
+ INLINEASM &"; use $0", 0 /* attdialect */, 6357001 /* reguse:AReg_96_Align2 */, %3
SI_RETURN
...
@@ -128,13 +128,13 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0_sub1:areg_128 = COPY [[COPY]]
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2_sub3:areg_128 = COPY [[COPY1]]
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 7733257 /* reguse:AReg_128 */, [[COPY2]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 7929865 /* reguse:AReg_128 */, [[COPY2]]
; CHECK-NEXT: SI_RETURN
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
undef %2.sub0_sub1:areg_128 = COPY %0
%2.sub2_sub3:areg_128 = COPY %1
- INLINEASM &"; use $0", 0 /* attdialect */, 7733257 /* reguse:AReg_128 */, killed %2
+ INLINEASM &"; use $0", 0 /* attdialect */, 7929865 /* reguse:AReg_128 */, killed %2
SI_RETURN
...
@@ -153,13 +153,13 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2_sub3:areg_128_align2 = COPY [[COPY1]]
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8060937 /* reguse:AReg_128_Align2 */, [[COPY2]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8257545 /* reguse:AReg_128_Align2 */, [[COPY2]]
; CHECK-NEXT: SI_RETURN
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
undef %2.sub0_sub1:areg_128_align2 = COPY %0
%2.sub2_sub3:areg_128_align2 = COPY %1
- INLINEASM &"; use $0", 0 /* attdialect */, 8060937 /* reguse:AReg_128_Align2 */, %2
+ INLINEASM &"; use $0", 0 /* attdialect */, 8257545 /* reguse:AReg_128_Align2 */, %2
SI_RETURN
...
@@ -203,13 +203,13 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0:areg_96 = COPY [[COPY]]
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub1_sub2:areg_96 = COPY [[COPY1]]
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, [[COPY2]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, [[COPY2]]
; CHECK-NEXT: SI_RETURN
%0:vgpr_32 = COPY $vgpr0
%1:vreg_64 = COPY $vgpr1_vgpr2
undef %2.sub0:areg_96 = COPY %0
%2.sub1_sub2:areg_96 = COPY %1
- INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, %2
+ INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, %2
SI_RETURN
...
@@ -253,13 +253,13 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0_sub1:areg_96 = COPY [[COPY]]
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2:areg_96 = COPY [[COPY1]]
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, [[COPY2]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, [[COPY2]]
; CHECK-NEXT: SI_RETURN
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
undef %2.sub0_sub1:areg_96 = COPY %0
%2.sub2:areg_96 = COPY %1
- INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, %2
+ INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, %2
SI_RETURN
...
@@ -350,12 +350,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]]
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96 = COPY [[COPY]]
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, [[COPY1]]
; CHECK-NEXT: SI_RETURN
%0:vgpr_32 = COPY $vgpr0
undef %1.sub0:areg_96 = COPY %0
%1.sub1:areg_96 = COPY %0
- INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, %1
+ INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, %1
SI_RETURN
...
@@ -373,12 +373,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96_align2 = COPY [[COPY]]
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96_Align2 */, [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6357001 /* reguse:AReg_96_Align2 */, [[COPY1]]
; CHECK-NEXT: SI_RETURN
%0:vgpr_32 = COPY $vgpr0
undef %1.sub0:areg_96_align2 = COPY %0
%1.sub1:areg_96_align2 = COPY %0
- INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96_Align2 */, %1
+ INLINEASM &"; use $0", 0 /* attdialect */, 6357001 /* reguse:AReg_96_Align2 */, %1
SI_RETURN
...
@@ -398,14 +398,14 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128 = COPY [[COPY]]
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128 = COPY [[COPY]]
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub3:areg_128 = COPY [[COPY]]
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 7733257 /* reguse:AReg_128 */, [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 7929865 /* reguse:AReg_128 */, [[COPY1]]
; CHECK-NEXT: SI_RETURN
%0:vgpr_32 = COPY $vgpr0
undef %1.sub0:areg_128 = COPY %0
%1.sub1:areg_128 = COPY %0
%1.sub2:areg_128 = COPY %0
%1.sub3:areg_128 = COPY %0
- INLINEASM &"; use $0", 0 /* attdialect */, 7733257 /* reguse:AReg_128 */, killed %1
+ INLINEASM &"; use $0", 0 /* attdialect */, 7929865 /* reguse:AReg_128 */, killed %1
SI_RETURN
...
@@ -425,14 +425,14 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub3:areg_128_align2 = COPY [[COPY]]
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8060937 /* reguse:AReg_128_Align2 */, [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8257545 /* reguse:AReg_128_Align2 */, [[COPY1]]
; CHECK-NEXT: SI_RETURN
%0:vgpr_32 = COPY $vgpr0
undef %1.sub0:areg_128_align2 = COPY %0
%1.sub1:areg_128_align2 = COPY %0
%1.sub2:areg_128_align2 = COPY %0
%1.sub3:areg_128_align2 = COPY %0
- INLINEASM &"; use $0", 0 /* attdialect */, 8060937 /* reguse:AReg_128_Align2 */, %1
+ INLINEASM &"; use $0", 0 /* attdialect */, 8257545 /* reguse:AReg_128_Align2 */, %1
SI_RETURN
...
@@ -585,7 +585,7 @@ body: |
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96 = COPY [[COPY]].sub1
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub2
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, [[COPY1]]
; CHECK-NEXT: SI_RETURN
undef %0.sub0:vreg_96 =COPY $vgpr0
%0.sub1:vreg_96 = COPY $vgpr1
@@ -593,7 +593,7 @@ body: |
undef %3.sub0:areg_96 = COPY %0.sub0
%3.sub1:areg_96 = COPY %0.sub1
%3.sub2:areg_96 = COPY %0.sub2
- INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, %3
+ INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, %3
SI_RETURN
...
@@ -614,7 +614,7 @@ body: |
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96_align2 = COPY [[COPY]].sub1
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96_align2 = COPY [[COPY]].sub2
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96_Align2 */, [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6357001 /* reguse:AReg_96_Align2 */, [[COPY1]]
; CHECK-NEXT: SI_RETURN
undef %0.sub0:vreg_96 =COPY $vgpr0
%0.sub1:vreg_96 = COPY $vgpr1
@@ -622,7 +622,7 @@ body: |
undef %3.sub0:areg_96_align2 = COPY %0.sub0
%3.sub1:areg_96_align2 = COPY %0.sub1
%3.sub2:areg_96_align2 = COPY %0.sub2
- INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96_Align2 */, %3
+ INLINEASM &"; use $0", 0 /* attdialect */, 6357001 /* reguse:AReg_96_Align2 */, %3
SI_RETURN
...
@@ -641,13 +641,13 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]].sub2_sub3:vreg_128 = COPY $vgpr2_vgpr3
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_128 = COPY [[COPY]].sub0_sub1
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2_sub3:areg_128 = COPY [[COPY]].sub2_sub3
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 7733257 /* reguse:AReg_128 */, [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 7929865 /* reguse:AReg_128 */, [[COPY1]]
; CHECK-NEXT: SI_RETURN
undef %0.sub0_sub1:vreg_128 =COPY $vgpr0_vgpr1
%0.sub2_sub3:vreg_128 = COPY $vgpr2_vgpr3
undef %2.sub0_sub1:areg_128 = COPY %0.sub0_sub1
%2.sub2_sub3:areg_128 = COPY %0.sub2_sub3
- INLINEASM &"; use $0", 0 /* attdialect */, 7733257 /* reguse:AReg_128 */, killed %2
+ INLINEASM &"; use $0", 0 /* attdialect */, 7929865 /* reguse:AReg_128 */, killed %2
SI_RETURN
...
@@ -668,13 +668,13 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_128 = COPY $vgpr2_vgpr3
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2_sub3:areg_128_align2 = COPY [[COPY]].sub1
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8060937 /* reguse:AReg_128_Align2 */, [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 8257545 /* reguse:AReg_128_Align2 */, [[COPY1]]
; CHECK-NEXT: SI_RETURN
undef %0.sub0:vreg_128 =COPY $vgpr0_vgpr1
%0.sub1:vreg_128 = COPY $vgpr2_vgpr3
undef %2.sub0_sub1:areg_128_align2 = COPY %0.sub0
%2.sub2_sub3:areg_128_align2 = COPY %0.sub1
- INLINEASM &"; use $0", 0 /* attdialect */, 8060937 /* reguse:AReg_128_Align2 */, %2
+ INLINEASM &"; use $0", 0 /* attdialect */, 8257545 /* reguse:AReg_128_Align2 */, %2
SI_RETURN
...
@@ -718,13 +718,13 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]].sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1_sub2:areg_96 = COPY [[COPY]].sub1_sub2
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, [[COPY1]]
; CHECK-NEXT: SI_RETURN
undef %0.sub0:vreg_96 =COPY $vgpr0
%0.sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2
undef %2.sub0:areg_96 = COPY %0.sub0
%2.sub1_sub2:areg_96 = COPY %0.sub1_sub2
- INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, %2
+ INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, %2
SI_RETURN
...
@@ -768,13 +768,13 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96 = COPY $vgpr2
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_96 = COPY [[COPY]].sub0_sub1
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub2
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, [[COPY1]]
; CHECK-NEXT: SI_RETURN
undef %0.sub0_sub1:vreg_96 = COPY $vgpr0_vgpr1
%0.sub2:vreg_96 = COPY $vgpr2
undef %2.sub0_sub1:areg_96 = COPY %0.sub0_sub1
%2.sub2:areg_96 = COPY %0.sub2
- INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, %2
+ INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, %2
SI_RETURN
...
@@ -841,13 +841,13 @@ body: |
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub0
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, [[COPY1]]
; CHECK-NEXT: SI_RETURN
undef %0.sub0:vreg_64 = COPY $vgpr0
undef %1.sub0:areg_96 = COPY %0.sub0
%1.sub1:areg_96 = COPY %0.sub0
%1.sub2:areg_96 = COPY %0.sub0
- INLINEASM &"; use $0", 0 /* attdialect */, 5832713 /* reguse:AReg_96 */, %1
+ INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_96 */, %1
SI_RETURN
...
@@ -865,12 +865,12 @@ body: |
; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96_align2 = COPY [[COPY]].sub0
- ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6160393 /* reguse:AReg_96_Align2 */, [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6357001 /* reguse:AReg_96_Align2 */, [[COPY1]]
; CHECK-NEXT: SI_RETURN
undef %0.sub0:vreg_64 = COPY $vgpr0
undef %1.sub0:areg_96_align2 = COPY %0.sub0
%...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
d73a2b6
to
f09aff5
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should also be the class used for VS_64's SGPR component
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And that is I am not so sure. First there is only one context where aperture regs appear in the codegen, that is copy from reg. Then it is not really useful on its low part. And then extending register classes for no reason will inflate generated inc, disturb tests, and give no positive outcome. Likely. I.e., this is a question to explore, but to me as a separate PR if any.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I.e., 9 of your new tests with INLINEASM and opaque constraint numbers fail again that way. For no apparent benefit. Nothing really beneficial comes out of it. Let's do it separately if you insist? That's all of the outcome of the adding it to the VS_64:
Failed Tests (9):
LLVM :: CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
LLVM :: CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir
LLVM :: CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
LLVM :: CodeGen/AMDGPU/inline-asm.i128.ll
LLVM :: CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
LLVM :: CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir
LLVM :: CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-subreg-insert-extract.mir
LLVM :: CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-subreg-src2-chain.mir
LLVM :: CodeGen/AMDGPU/spill-vector-superclass.ll
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Like, I am not even sure if you add it to a pointer in a 64-bit integer context it will give you a correct result, this is really a separate change. But you cannot anyway, because both lowering paths discard low32.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These tests are incredibly stupid, and I think it does not do anything, but if it is needed to unblock this, then: #158823. I still insist it shall not be a part of this PR.
Fixes: SWDEV-551181
f09aff5
to
1d04421
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/66/builds/19239 Here is the relevant piece of the build log for the reference
|
Fixes: SWDEV-551181