-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[TableGen] Fix regunit superset calculation #81850
base: main
Are you sure you want to change the base?
Conversation
// In theory, this is combinatorial. In practice, it needs to be bounded | ||
// by a small number of sets for regpressure to be efficient. | ||
// If the assert is hit, we need to implement pruning. | ||
assert(Idx < (2 * NumRegUnitSubSets) && "runaway unit set inference"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Previously this assertion was trivially true because Idx could never exceed NumRegUnitSubSets.
|
||
// Compare new sets with all original classes. | ||
for (unsigned SearchIdx = (Idx >= NumRegUnitSubSets) ? 0 : Idx + 1; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Previously Idx >= NumRegUnitSubSets was trivially false.
This gives a measurable X86 -O3 compile time speedup: https://llvm-compile-time-tracker.com/compare.php?from=594d57e07a92e3a2cefb262114db2608989f874d&to=aa8fca82b83239cfb186801238c282bd01fc48c2&stat=instructions:u |
This is still a draft because I'd like to understand why it affected X86 codegen. I assume it is due to some difference in the pressure sets affecting the scheduler. |
@llvm/pr-subscribers-backend-x86 Author: Jay Foad (jayfoad) ChangesKeep creating new supersets from new supersets until we reach a fixed Patch is 587.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/81850.diff 39 Files Affected:
diff --git a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
index 65ff22f960f233..606253b35326c6 100644
--- a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
+++ b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -17,21 +17,20 @@ target triple = "x86_64-unknown-linux-gnu"
define dso_local i32 @main() nounwind uwtable {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movl i(%rip), %esi
+; CHECK-NEXT: movl i(%rip), %edx
; CHECK-NEXT: movl j(%rip), %eax
-; CHECK-NEXT: movl %esi, %edx
-; CHECK-NEXT: shrl $8, %edx
+; CHECK-NEXT: movl %edx, %esi
+; CHECK-NEXT: shrl $8, %esi
; CHECK-NEXT: movsbl %al, %ecx
; CHECK-NEXT: shrl $8, %eax
; CHECK-NEXT: cbtw
-; CHECK-NEXT: idivb %dl
-; CHECK-NEXT: movl %eax, %edx
-; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: idivb %sil
-; CHECK-NEXT: movzbl %dl, %ecx
+; CHECK-NEXT: movzbl %al, %esi
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: idivb %dl
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: movd %eax, %xmm0
-; CHECK-NEXT: pinsrb $1, %ecx, %xmm0
+; CHECK-NEXT: pinsrb $1, %esi, %xmm0
; CHECK-NEXT: pextrw $0, %xmm0, res(%rip)
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll
index 5969aae43f82e8..6a07fe0633d7fd 100644
--- a/llvm/test/CodeGen/X86/abs.ll
+++ b/llvm/test/CodeGen/X86/abs.ll
@@ -487,10 +487,10 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: subl $12, %esp
+; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
; X86-NEXT: movb {{[0-9]+}}(%esp), %bh
; X86-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
@@ -509,26 +509,23 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
; X86-NEXT: xorb %al, %ah
; X86-NEXT: subb %al, %ah
; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movb %ch, %al
-; X86-NEXT: sarb $7, %al
-; X86-NEXT: xorb %al, %ch
-; X86-NEXT: subb %al, %ch
-; X86-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movb %dh, %al
; X86-NEXT: sarb $7, %al
; X86-NEXT: xorb %al, %dh
; X86-NEXT: subb %al, %dh
-; X86-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: sarb $7, %al
; X86-NEXT: xorb %al, %bl
; X86-NEXT: subb %al, %bl
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movb %bh, %al
; X86-NEXT: sarb $7, %al
; X86-NEXT: xorb %al, %bh
; X86-NEXT: subb %al, %bh
-; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movb %ch, %al
+; X86-NEXT: sarb $7, %al
+; X86-NEXT: xorb %al, %ch
+; X86-NEXT: subb %al, %ch
+; X86-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarb $7, %al
@@ -547,21 +544,24 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
; X86-NEXT: xorb %al, %cl
; X86-NEXT: subb %al, %cl
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movb {{[0-9]+}}(%esp), %bh
-; X86-NEXT: movb %bh, %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarb $7, %al
-; X86-NEXT: xorb %al, %bh
-; X86-NEXT: subb %al, %bh
-; X86-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: xorb %al, %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarb $7, %al
-; X86-NEXT: xorb %al, %bl
-; X86-NEXT: subb %al, %bl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
-; X86-NEXT: movb %dh, %al
+; X86-NEXT: xorb %al, %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarb $7, %al
-; X86-NEXT: xorb %al, %dh
-; X86-NEXT: subb %al, %dh
+; X86-NEXT: xorb %al, %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
; X86-NEXT: movb %ch, %al
; X86-NEXT: sarb $7, %al
@@ -581,9 +581,12 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
; X86-NEXT: movb %cl, 15(%eax)
; X86-NEXT: movb %dl, 14(%eax)
; X86-NEXT: movb %ch, 13(%eax)
-; X86-NEXT: movb %dh, 12(%eax)
-; X86-NEXT: movb %bl, 11(%eax)
-; X86-NEXT: movb %bh, 10(%eax)
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT: movb %cl, 12(%eax)
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT: movb %cl, 11(%eax)
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT: movb %cl, 10(%eax)
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-NEXT: movb %cl, 9(%eax)
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -592,12 +595,9 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
; X86-NEXT: movb %cl, 7(%eax)
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-NEXT: movb %cl, 6(%eax)
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-NEXT: movb %cl, 5(%eax)
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-NEXT: movb %cl, 4(%eax)
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-NEXT: movb %cl, 3(%eax)
+; X86-NEXT: movb %bh, 5(%eax)
+; X86-NEXT: movb %bl, 4(%eax)
+; X86-NEXT: movb %dh, 3(%eax)
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-NEXT: movb %cl, 2(%eax)
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
diff --git a/llvm/test/CodeGen/X86/apx/mul-i1024.ll b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
index 2b99c44fc769a2..e66ca42b45bee9 100644
--- a/llvm/test/CodeGen/X86/apx/mul-i1024.ll
+++ b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
@@ -759,20 +759,20 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: adcq %r15, %r27
; EGPR-NEXT: adcq %r14, %r24
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
-; EGPR-NEXT: movq 80(%r11), %rbx
-; EGPR-NEXT: movq %rbx, %rax
+; EGPR-NEXT: movq 88(%r11), %r28
+; EGPR-NEXT: movq 80(%r11), %r14
+; EGPR-NEXT: movq %r14, %rax
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r19 # 8-byte Reload
; EGPR-NEXT: mulq %r19
-; EGPR-NEXT: movq %rax, %r21
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq 88(%r11), %r28
+; EGPR-NEXT: movq %rax, %r21
; EGPR-NEXT: movq %r28, %rax
; EGPR-NEXT: mulq %r19
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
; EGPR-NEXT: addq %r8, %r16
; EGPR-NEXT: adcq $0, %r9
-; EGPR-NEXT: movq %rbx, %rax
+; EGPR-NEXT: movq %r14, %rax
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r17 # 8-byte Reload
; EGPR-NEXT: mulq %r17
; EGPR-NEXT: movq %rdx, %r8
@@ -787,13 +787,13 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: addq %r8, %r16
; EGPR-NEXT: movzbl %cl, %eax
; EGPR-NEXT: adcq %rax, %r9
+; EGPR-NEXT: movq 72(%r11), %rbx
; EGPR-NEXT: movq 64(%r11), %r15
; EGPR-NEXT: movq %r15, %rax
; EGPR-NEXT: mulq %r19
-; EGPR-NEXT: movq %rax, %r23
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq 72(%r11), %r14
-; EGPR-NEXT: movq %r14, %rax
+; EGPR-NEXT: movq %rax, %r23
+; EGPR-NEXT: movq %rbx, %rax
; EGPR-NEXT: mulq %r19
; EGPR-NEXT: movq %rdx, %r30
; EGPR-NEXT: movq %rax, %r31
@@ -806,7 +806,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: addq %r31, %r29
; EGPR-NEXT: adcq %r30, %r8
; EGPR-NEXT: setb %cl
-; EGPR-NEXT: movq %r14, %rax
+; EGPR-NEXT: movq %rbx, %rax
; EGPR-NEXT: mulq %r17
; EGPR-NEXT: movq %rdx, %r31
; EGPR-NEXT: movq %rax, %r13
@@ -822,7 +822,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: mulq %rdi
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %r30
-; EGPR-NEXT: movq %r14, %rax
+; EGPR-NEXT: movq %rbx, %rax
; EGPR-NEXT: mulq %rdi
; EGPR-NEXT: movq %rdx, %r26
; EGPR-NEXT: movq %rax, %rcx
@@ -836,7 +836,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: addq %rcx, %r21
; EGPR-NEXT: adcq %r26, %r10
; EGPR-NEXT: setb %cl
-; EGPR-NEXT: movq %r14, %rax
+; EGPR-NEXT: movq %rbx, %rax
; EGPR-NEXT: mulq %r18
; EGPR-NEXT: movq %rdx, %r26
; EGPR-NEXT: movq %rax, %r8
@@ -850,7 +850,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: addq %r16, %r8
; EGPR-NEXT: adcq %r9, %r26
; EGPR-NEXT: setb %sil
-; EGPR-NEXT: movq %rbx, %rax
+; EGPR-NEXT: movq %r14, %rax
; EGPR-NEXT: mulq %rdi
; EGPR-NEXT: movq %rdx, %rcx
; EGPR-NEXT: movq %rax, %r31
@@ -860,7 +860,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movq %rax, %r10
; EGPR-NEXT: addq %rcx, %r10
; EGPR-NEXT: adcq $0, %r9
-; EGPR-NEXT: movq %rbx, %rax
+; EGPR-NEXT: movq %r14, %rax
; EGPR-NEXT: mulq %r18
; EGPR-NEXT: movq %rdx, %rcx
; EGPR-NEXT: movq %rax, %r13
@@ -935,13 +935,13 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movq %rax, %rcx
; EGPR-NEXT: addq %rdi, %rdx
; EGPR-NEXT: movq %r8, %rax
-; EGPR-NEXT: imulq %r14, %rax
+; EGPR-NEXT: imulq %rbx, %rax
; EGPR-NEXT: addq %rdx, %rax
; EGPR-NEXT: movq %rax, %r18
-; EGPR-NEXT: movq %rbx, %rdi
+; EGPR-NEXT: movq %r14, %rdi
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r19 # 8-byte Reload
; EGPR-NEXT: imulq %r19, %rdi
-; EGPR-NEXT: movq %rbx, %rax
+; EGPR-NEXT: movq %r14, %rax
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
; EGPR-NEXT: mulq %r8
; EGPR-NEXT: movq %rax, %r26
@@ -957,19 +957,19 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movq %rax, %r8
; EGPR-NEXT: movq %r19, %rax
; EGPR-NEXT: mulq %r15
-; EGPR-NEXT: movq %rdx, %rbx
+; EGPR-NEXT: movq %rdx, %r14
; EGPR-NEXT: movq %rax, %r15
; EGPR-NEXT: addq %rcx, %r15
-; EGPR-NEXT: adcq $0, %rbx
+; EGPR-NEXT: adcq $0, %r14
; EGPR-NEXT: movq %rdi, %rax
-; EGPR-NEXT: mulq %r14
+; EGPR-NEXT: mulq %rbx
; EGPR-NEXT: movq %rdx, %rcx
; EGPR-NEXT: movq %rax, %r18
; EGPR-NEXT: addq %r15, %r18
-; EGPR-NEXT: adcq %rbx, %rcx
+; EGPR-NEXT: adcq %r14, %rcx
; EGPR-NEXT: setb %dil
; EGPR-NEXT: movq %r19, %rax
-; EGPR-NEXT: mulq %r14
+; EGPR-NEXT: mulq %rbx
; EGPR-NEXT: addq %rcx, %rax
; EGPR-NEXT: movzbl %dil, %ecx
; EGPR-NEXT: adcq %rcx, %rdx
@@ -1041,7 +1041,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: pushq %r13
; EGPR-NDD-NEXT: pushq %r12
; EGPR-NDD-NEXT: pushq %rbx
-; EGPR-NDD-NEXT: subq $104, %rsp
+; EGPR-NDD-NEXT: subq $96, %rsp
; EGPR-NDD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %rdi, %r20
; EGPR-NDD-NEXT: movq (%rdi), %r16
@@ -1298,7 +1298,6 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: adcq $0, %r29, %r8
; EGPR-NDD-NEXT: adcq $0, %rsi, %r9
; EGPR-NDD-NEXT: movq %r11, %r14
-; EGPR-NDD-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq 48(%r11), %r11
; EGPR-NDD-NEXT: movq %r10, %rsi
; EGPR-NDD-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -1393,22 +1392,22 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %rax, %rdi
; EGPR-NDD-NEXT: movzbl %r8b, %eax
; EGPR-NDD-NEXT: adcq %rax, %rdx, %r8
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; EGPR-NDD-NEXT: movq %r15, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r29
; EGPR-NDD-NEXT: movq %rax, %r25
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; EGPR-NDD-NEXT: movq %r12, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: addq %r29, %rax, %r9
; EGPR-NDD-NEXT: adcq $0, %rdx, %r10
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: movq %r15, %rax
; EGPR-NDD-NEXT: mulq %r18
; EGPR-NDD-NEXT: addq %r9, %rax, %rbx
; EGPR-NDD-NEXT: adcq %rdx, %r10, %r9
; EGPR-NDD-NEXT: setb %r10b
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: movq %r12, %rax
; EGPR-NDD-NEXT: mulq %r18
; EGPR-NDD-NEXT: addq %r9, %rax
; EGPR-NDD-NEXT: movzbl %r10b, %r9d
@@ -1417,20 +1416,20 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: adcq %rdx, %rsi
; EGPR-NDD-NEXT: adcq $0, %rdi
; EGPR-NDD-NEXT: adcq $0, %r8
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: movq %r15, %rax
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: movq %rdx, %r28
; EGPR-NDD-NEXT: movq %rax, %r29
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: movq %r12, %rax
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: addq %r28, %rax, %r10
; EGPR-NDD-NEXT: adcq $0, %rdx, %r27
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: movq %r15, %rax
; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: addq %rax, %r10
; EGPR-NDD-NEXT: adcq %rdx, %r27
; EGPR-NDD-NEXT: setb %r28b
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: movq %r12, %rax
; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: addq %r27, %rax
; EGPR-NDD-NEXT: movzbl %r28b, %r27d
@@ -1446,7 +1445,6 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: movq %rdx, %r28
; EGPR-NDD-NEXT: movq %rax, %r29
-; EGPR-NDD-NEXT: movq %r23, %r14
; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: addq %r28, %rax, %r27
@@ -1476,9 +1474,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movzbl %bpl, %ecx
; EGPR-NDD-NEXT: adcq %rdi, %rcx
-; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq $0, %r8, %rcx
; EGPR-NDD-NEXT: movq %rcx, (%rsp) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq $0, %r8, %rcx
+; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq $0, %rdx, %rax
@@ -1660,19 +1658,18 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: adcq %rcx, %r19, %rbx
; EGPR-NDD-NEXT: adcq %rax, %r31, %rbp
; EGPR-NDD-NEXT: adcq %rdx, %r12, %r30
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r18 # 8-byte Reload
-; EGPR-NDD-NEXT: movq 80(%r18), %r22
-; EGPR-NDD-NEXT: movq %r22, %rax
+; EGPR-NDD-NEXT: movq 88(%r14), %r20
+; EGPR-NDD-NEXT: movq 80(%r14), %r23
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Reload
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: movq %rax, %r26
; EGPR-NDD-NEXT: movq %rdx, %rdi
-; EGPR-NDD-NEXT: movq 88(%r18), %r20
+; EGPR-NDD-NEXT: movq %rax, %r26
; EGPR-NDD-NEXT: movq %r20, %rax
; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %rdi, %rax, %rcx
; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
-; EGPR-NDD-NEXT: movq %r22, %rax
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
; EGPR-NDD-NEXT: mulq %r12
; EGPR-NDD-NEXT: addq %rax, %rcx
@@ -1683,13 +1680,13 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %rax, %rsi
; EGPR-NDD-NEXT: movzbl %dil, %eax
; EGPR-NDD-NEXT: adcq %rax, %rdx, %rdi
-; EGPR-NDD-NEXT: movq 64(%r18), %r24
+; EGPR-NDD-NEXT: movq 72(%r14), %r22
+; EGPR-NDD-NEXT: movq 64(%r14), %r24
; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: movq %rax, %r29
; EGPR-NDD-NEXT: movq %rdx, %r27
-; EGPR-NDD-NEXT: movq 72(%r18), %r23
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %rax, %r29
+; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %r27, %rax, %r8
; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
@@ -1698,7 +1695,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %r8, %rax, %r31
; EGPR-NDD-NEXT: adcq %rdx, %r9, %r8
; EGPR-NDD-NEXT: setb %r9b
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: mulq %r12
; EGPR-NDD-NEXT: addq %r8, %rax
; EGPR-NDD-NEXT: movzbl %r9b, %r8d
@@ -1712,7 +1709,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: movq %rdx, %r26
; EGPR-NDD-NEXT: movq %rax, %r27
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: addq %r26, %rax, %r9
; EGPR-NDD-NEXT: adcq $0, %rdx, %r10
@@ -1721,7 +1718,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %rax, %r9
; EGPR-NDD-NEXT: adcq %rdx, %r10
; EGPR-NDD-NEXT: setb %r11b
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: mulq %r17
; EGPR-NDD-NEXT: addq %r10, %rax
; EGPR-NDD-NEXT: movzbl %r11b, %r10d
@@ -1733,7 +1730,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %rax, %rsi
; EGPR-NDD-NEXT: adcq %rdi, %rcx
; EGPR-NDD-NEXT: setb %dil
-; EGPR-NDD-NEXT: movq %r22, %rax
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: movq %rdx, %r26
; EGPR-NDD-NEXT: movq %rax, %r27
@@ -1741,7 +1738,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: addq %r26, %rax, %r8
; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
-; EGPR-NDD-NEXT: movq %r22, %rax
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r17
; EGPR-NDD-NEXT: addq %rax, %r8
; EGPR-NDD-NEXT: adcq %rdx, %r9
@@ -1756,22 +1753,20 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movzbl %dil, %ecx
; EGPR-NDD-NEXT: adcq %rax, %rcx
; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
-; EGPR-NDD-NEXT: movq %r18, %r9
-; EGPR-NDD-NEXT: movq 96(%r18), %r26
+; EGPR-NDD-NEXT: movq 96(%r14), %r26
; EGPR-NDD-NEXT: imulq %r17, %r26, %rsi
; EGPR-NDD-NEXT: movq %r26, %rax
; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: movq %rax, %r18
; EGPR-NDD-NEXT: addq %rsi, %rdx, %rax
-; EGPR-NDD-NEXT: movq 104(%r9), %r8
+; EGPR-NDD-NEXT: movq 104(%r14), %r8
; EGPR-NDD-NEXT: imulq %r16, %r8, %rdx
; EGPR-NDD-NEXT: addq %rdx, %rax, %rsi
-; EGPR-NDD-NEXT: movq 112(%r9), %rax
-; EGPR-NDD-NEXT: movq %r9, %r11
+; EGPR-NDD-NEXT: movq 112(%r14), %rax
; EGPR-NDD-NEXT: imulq %r12, %rax, %r9
; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %r9, %rdx
-; EGPR-NDD-NEXT: imulq 120(%r11), %r21, %r9
+; EGPR-NDD-NEXT: imulq 120(%r14), %r21, %r9
; EGPR-NDD-NEXT: addq %r9, %rdx
; EGPR-NDD-NEXT: addq %r18, %rax, %r9
; EGPR-NDD-NEXT: adcq %rsi, %rdx, %r16
@@ -1795,16 +1790,16 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: adcq %r8, %rdx
; EGPR-NDD-NEXT: addq %r9, %rax, %r10
; EGPR-NDD-NEXT: adcq %r16, %rdx, %r17
-; EGPR-NDD-NEXT: imulq %r14, %r24, %r8
+; EGPR-NDD-NEXT: imulq {{[-0-9]+}}(%r{{[sb]}}p), %r24, %r8 # 8-byte Folded Reload
; EGPR-NDD-NEXT: movq %r24...
[truncated]
|
Keep creating new supersets from new supersets until we reach a fixed point. For most targets this ends up with fewer regunitsets overall because it makes pruning more effective.
Confirmed. I looked at a couple of cases and it was due to different scheduler heuristics that depend on pressure sets. |
Do you have an example of a diff of the I found it fascinating (in a good way) that changing how we compute these improves compile time, so I'm curious to see a diff firsthand. |
jayfoad/llvm-generated@547ef8c#diff-54007a1ac1ec6e07cbf61a38f863ea3b1e8064558ab29655338ece8b7299acbd This is all related to how The comment on
Is that still a realistic way forward, and would it allow us to remove this pruning code? |
Thanks for the diff @jayfoad !
No idea, I don't think I ever touched this part of the code base.
I don't know, let me educate myself on that stuff and try to come up with useful insights :). |
Keep creating new supersets from new supersets until we reach a fixed
point. For most targets this ends up with fewer regunitsets overall
because it makes pruning more effective.