-
Notifications
You must be signed in to change notification settings - Fork 12k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[X86] AMD Znver2 (Rome) Scheduler enablement
The patch gives out the details of the znver2 scheduler model. There are few improvements with respect to execution units, latencies and throughput when compared with znver1. The tests that were present for znver1 for llvm-mca tool were replicated. The latencies, execution units, timeline and throughput information are updated for znver2. Reviewers: craig.topper, Simon Pilgrim Differential Revision: https://reviews.llvm.org/D66088
- Loading branch information
Showing
59 changed files
with
14,151 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
47 changes: 47 additions & 0 deletions
47
llvm/test/tools/llvm-mca/X86/Znver2/partial-reg-update-2.s
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s | ||
|
||
imul %rax, %rbx | ||
lzcnt %ax, %bx | ||
add %ecx, %ebx | ||
|
||
# CHECK: Iterations: 1 | ||
# CHECK-NEXT: Instructions: 3 | ||
# CHECK-NEXT: Total Cycles: 9 | ||
# CHECK-NEXT: Total uOps: 4 | ||
|
||
# CHECK: Dispatch Width: 4 | ||
# CHECK-NEXT: uOps Per Cycle: 0.44 | ||
# CHECK-NEXT: IPC: 0.33 | ||
# CHECK-NEXT: Block RThroughput: 1.0 | ||
|
||
# CHECK: Instruction Info: | ||
# CHECK-NEXT: [1]: #uOps | ||
# CHECK-NEXT: [2]: Latency | ||
# CHECK-NEXT: [3]: RThroughput | ||
# CHECK-NEXT: [4]: MayLoad | ||
# CHECK-NEXT: [5]: MayStore | ||
# CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
# CHECK-NEXT: 2 4 1.00 imulq %rax, %rbx | ||
# CHECK-NEXT: 1 1 0.25 lzcntw %ax, %bx | ||
# CHECK-NEXT: 1 1 0.25 addl %ecx, %ebx | ||
|
||
# CHECK: Timeline view: | ||
# CHECK-NEXT: Index 012345678 | ||
|
||
# CHECK: [0,0] DeeeeER . imulq %rax, %rbx | ||
# CHECK-NEXT: [0,1] D====eER. lzcntw %ax, %bx | ||
# CHECK-NEXT: [0,2] D=====eER addl %ecx, %ebx | ||
|
||
# CHECK: Average Wait times (based on the timeline view): | ||
# CHECK-NEXT: [0]: Executions | ||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
||
# CHECK: [0] [1] [2] [3] | ||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rbx | ||
# CHECK-NEXT: 1. 1 5.0 0.0 0.0 lzcntw %ax, %bx | ||
# CHECK-NEXT: 2. 1 6.0 0.0 0.0 addl %ecx, %ebx |
91 changes: 91 additions & 0 deletions
91
llvm/test/tools/llvm-mca/X86/Znver2/partial-reg-update-3.s
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1500 -timeline -timeline-max-iterations=6 < %s | FileCheck %s | ||
|
||
# The ILP is limited by the false dependency on %dx. So, the mov cannot execute | ||
# in parallel with the add. | ||
|
||
add %cx, %dx | ||
mov %ax, %dx | ||
xor %bx, %dx | ||
|
||
# CHECK: Iterations: 1500 | ||
# CHECK-NEXT: Instructions: 4500 | ||
# CHECK-NEXT: Total Cycles: 4503 | ||
# CHECK-NEXT: Total uOps: 4500 | ||
|
||
# CHECK: Dispatch Width: 4 | ||
# CHECK-NEXT: uOps Per Cycle: 1.00 | ||
# CHECK-NEXT: IPC: 1.00 | ||
# CHECK-NEXT: Block RThroughput: 0.8 | ||
|
||
# CHECK: Instruction Info: | ||
# CHECK-NEXT: [1]: #uOps | ||
# CHECK-NEXT: [2]: Latency | ||
# CHECK-NEXT: [3]: RThroughput | ||
# CHECK-NEXT: [4]: MayLoad | ||
# CHECK-NEXT: [5]: MayStore | ||
# CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
# CHECK-NEXT: 1 1 0.25 addw %cx, %dx | ||
# CHECK-NEXT: 1 1 0.25 movw %ax, %dx | ||
# CHECK-NEXT: 1 1 0.25 xorw %bx, %dx | ||
|
||
# CHECK: Resources: | ||
# CHECK-NEXT: [0] - Zn2AGU0 | ||
# CHECK-NEXT: [1] - Zn2AGU1 | ||
# CHECK-NEXT: [2] - Zn2AGU2 | ||
# CHECK-NEXT: [3] - Zn2ALU0 | ||
# CHECK-NEXT: [4] - Zn2ALU1 | ||
# CHECK-NEXT: [5] - Zn2ALU2 | ||
# CHECK-NEXT: [6] - Zn2ALU3 | ||
# CHECK-NEXT: [7] - Zn2Divider | ||
# CHECK-NEXT: [8] - Zn2FPU0 | ||
# CHECK-NEXT: [9] - Zn2FPU1 | ||
# CHECK-NEXT: [10] - Zn2FPU2 | ||
# CHECK-NEXT: [11] - Zn2FPU3 | ||
# CHECK-NEXT: [12] - Zn2Multiplier | ||
|
||
# CHECK: Resource pressure per iteration: | ||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] | ||
# CHECK-NEXT: - - - 0.75 0.75 0.75 0.75 - - - - - - | ||
|
||
# CHECK: Resource pressure by instruction: | ||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: | ||
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - addw %cx, %dx | ||
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movw %ax, %dx | ||
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - xorw %bx, %dx | ||
|
||
# CHECK: Timeline view: | ||
# CHECK-NEXT: 0123456789 | ||
# CHECK-NEXT: Index 0123456789 0 | ||
|
||
# CHECK: [0,0] DeER . . . . addw %cx, %dx | ||
# CHECK-NEXT: [0,1] D=eER. . . . movw %ax, %dx | ||
# CHECK-NEXT: [0,2] D==eER . . . xorw %bx, %dx | ||
# CHECK-NEXT: [1,0] D===eER . . . addw %cx, %dx | ||
# CHECK-NEXT: [1,1] .D===eER . . . movw %ax, %dx | ||
# CHECK-NEXT: [1,2] .D====eER . . . xorw %bx, %dx | ||
# CHECK-NEXT: [2,0] .D=====eER. . . addw %cx, %dx | ||
# CHECK-NEXT: [2,1] .D======eER . . movw %ax, %dx | ||
# CHECK-NEXT: [2,2] . D======eER . . xorw %bx, %dx | ||
# CHECK-NEXT: [3,0] . D=======eER . . addw %cx, %dx | ||
# CHECK-NEXT: [3,1] . D========eER . . movw %ax, %dx | ||
# CHECK-NEXT: [3,2] . D=========eER. . xorw %bx, %dx | ||
# CHECK-NEXT: [4,0] . D=========eER . addw %cx, %dx | ||
# CHECK-NEXT: [4,1] . D==========eER . movw %ax, %dx | ||
# CHECK-NEXT: [4,2] . D===========eER . xorw %bx, %dx | ||
# CHECK-NEXT: [5,0] . D============eER . addw %cx, %dx | ||
# CHECK-NEXT: [5,1] . D============eER. movw %ax, %dx | ||
# CHECK-NEXT: [5,2] . D=============eER xorw %bx, %dx | ||
|
||
# CHECK: Average Wait times (based on the timeline view): | ||
# CHECK-NEXT: [0]: Executions | ||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
||
# CHECK: [0] [1] [2] [3] | ||
# CHECK-NEXT: 0. 6 7.0 0.2 0.0 addw %cx, %dx | ||
# CHECK-NEXT: 1. 6 7.7 0.0 0.0 movw %ax, %dx | ||
# CHECK-NEXT: 2. 6 8.5 0.0 0.0 xorw %bx, %dx |
94 changes: 94 additions & 0 deletions
94
llvm/test/tools/llvm-mca/X86/Znver2/partial-reg-update-4.s
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1500 -timeline -timeline-max-iterations=7 < %s | FileCheck %s | ||
|
||
# The lzcnt cannot execute in parallel with the imul because there is a false | ||
# dependency on %bx. | ||
|
||
imul %ax, %bx | ||
lzcnt %ax, %bx | ||
add %cx, %bx | ||
|
||
# CHECK: Iterations: 1500 | ||
# CHECK-NEXT: Instructions: 4500 | ||
# CHECK-NEXT: Total Cycles: 7503 | ||
# CHECK-NEXT: Total uOps: 4500 | ||
|
||
# CHECK: Dispatch Width: 4 | ||
# CHECK-NEXT: uOps Per Cycle: 0.60 | ||
# CHECK-NEXT: IPC: 0.60 | ||
# CHECK-NEXT: Block RThroughput: 1.0 | ||
|
||
# CHECK: Instruction Info: | ||
# CHECK-NEXT: [1]: #uOps | ||
# CHECK-NEXT: [2]: Latency | ||
# CHECK-NEXT: [3]: RThroughput | ||
# CHECK-NEXT: [4]: MayLoad | ||
# CHECK-NEXT: [5]: MayStore | ||
# CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
# CHECK-NEXT: 1 3 1.00 imulw %ax, %bx | ||
# CHECK-NEXT: 1 1 0.25 lzcntw %ax, %bx | ||
# CHECK-NEXT: 1 1 0.25 addw %cx, %bx | ||
|
||
# CHECK: Resources: | ||
# CHECK-NEXT: [0] - Zn2AGU0 | ||
# CHECK-NEXT: [1] - Zn2AGU1 | ||
# CHECK-NEXT: [2] - Zn2AGU2 | ||
# CHECK-NEXT: [3] - Zn2ALU0 | ||
# CHECK-NEXT: [4] - Zn2ALU1 | ||
# CHECK-NEXT: [5] - Zn2ALU2 | ||
# CHECK-NEXT: [6] - Zn2ALU3 | ||
# CHECK-NEXT: [7] - Zn2Divider | ||
# CHECK-NEXT: [8] - Zn2FPU0 | ||
# CHECK-NEXT: [9] - Zn2FPU1 | ||
# CHECK-NEXT: [10] - Zn2FPU2 | ||
# CHECK-NEXT: [11] - Zn2FPU3 | ||
# CHECK-NEXT: [12] - Zn2Multiplier | ||
|
||
# CHECK: Resource pressure per iteration: | ||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] | ||
# CHECK-NEXT: - - 0.67 1.00 0.67 0.67 - - - - - 1.00 | ||
|
||
# CHECK: Resource pressure by instruction: | ||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: | ||
# CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 imulw %ax, %bx | ||
# CHECK-NEXT: - - 0.33 - 0.33 0.33 - - - - - - lzcntw %ax, %bx | ||
# CHECK-NEXT: - - 0.33 - 0.33 0.33 - - - - - - addw %cx, %bx | ||
|
||
# CHECK: Timeline view: | ||
# CHECK-NEXT: 0123456789 01234567 | ||
# CHECK-NEXT: Index 0123456789 0123456789 | ||
|
||
# CHECK: [0,0] DeeeER . . . . . . . imulw %ax, %bx | ||
# CHECK-NEXT: [0,1] D===eER . . . . . . . lzcntw %ax, %bx | ||
# CHECK-NEXT: [0,2] D====eER . . . . . . . addw %cx, %bx | ||
# CHECK-NEXT: [1,0] D=====eeeER . . . . . . imulw %ax, %bx | ||
# CHECK-NEXT: [1,1] .D=======eER . . . . . . lzcntw %ax, %bx | ||
# CHECK-NEXT: [1,2] .D========eER . . . . . . addw %cx, %bx | ||
# CHECK-NEXT: [2,0] .D=========eeeER . . . . . imulw %ax, %bx | ||
# CHECK-NEXT: [2,1] .D============eER . . . . . lzcntw %ax, %bx | ||
# CHECK-NEXT: [2,2] . D============eER . . . . . addw %cx, %bx | ||
# CHECK-NEXT: [3,0] . D=============eeeER . . . . imulw %ax, %bx | ||
# CHECK-NEXT: [3,1] . D================eER . . . . lzcntw %ax, %bx | ||
# CHECK-NEXT: [3,2] . D=================eER . . . . addw %cx, %bx | ||
# CHECK-NEXT: [4,0] . D=================eeeER . . . imulw %ax, %bx | ||
# CHECK-NEXT: [4,1] . D====================eER . . . lzcntw %ax, %bx | ||
# CHECK-NEXT: [4,2] . D=====================eER . . . addw %cx, %bx | ||
# CHECK-NEXT: [5,0] . D======================eeeER . . imulw %ax, %bx | ||
# CHECK-NEXT: [5,1] . D========================eER . . lzcntw %ax, %bx | ||
# CHECK-NEXT: [5,2] . D=========================eER . . addw %cx, %bx | ||
# CHECK-NEXT: [6,0] . D==========================eeeER . imulw %ax, %bx | ||
# CHECK-NEXT: [6,1] . D=============================eER. lzcntw %ax, %bx | ||
# CHECK-NEXT: [6,2] . D=============================eER addw %cx, %bx | ||
|
||
# CHECK: Average Wait times (based on the timeline view): | ||
# CHECK-NEXT: [0]: Executions | ||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
||
# CHECK: [0] [1] [2] [3] | ||
# CHECK-NEXT: 0. 7 14.1 0.1 0.0 imulw %ax, %bx | ||
# CHECK-NEXT: 1. 7 16.9 0.0 0.0 lzcntw %ax, %bx | ||
# CHECK-NEXT: 2. 7 17.6 0.0 0.0 addw %cx, %bx |
70 changes: 70 additions & 0 deletions
70
llvm/test/tools/llvm-mca/X86/Znver2/partial-reg-update-5.s
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1500 -timeline -timeline-max-iterations=8 < %s | FileCheck %s | ||
|
||
lzcnt %ax, %bx ## partial register stall. | ||
|
||
# CHECK: Iterations: 1500 | ||
# CHECK-NEXT: Instructions: 1500 | ||
# CHECK-NEXT: Total Cycles: 1503 | ||
# CHECK-NEXT: Total uOps: 1500 | ||
|
||
# CHECK: Dispatch Width: 4 | ||
# CHECK-NEXT: uOps Per Cycle: 1.00 | ||
# CHECK-NEXT: IPC: 1.00 | ||
# CHECK-NEXT: Block RThroughput: 0.3 | ||
|
||
# CHECK: Instruction Info: | ||
# CHECK-NEXT: [1]: #uOps | ||
# CHECK-NEXT: [2]: Latency | ||
# CHECK-NEXT: [3]: RThroughput | ||
# CHECK-NEXT: [4]: MayLoad | ||
# CHECK-NEXT: [5]: MayStore | ||
# CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
# CHECK-NEXT: 1 1 0.25 lzcntw %ax, %bx | ||
|
||
# CHECK: Resources: | ||
# CHECK-NEXT: [0] - Zn2AGU0 | ||
# CHECK-NEXT: [1] - Zn2AGU1 | ||
# CHECK-NEXT: [2] - Zn2AGU2 | ||
# CHECK-NEXT: [3] - Zn2ALU0 | ||
# CHECK-NEXT: [4] - Zn2ALU1 | ||
# CHECK-NEXT: [5] - Zn2ALU2 | ||
# CHECK-NEXT: [6] - Zn2ALU3 | ||
# CHECK-NEXT: [7] - Zn2Divider | ||
# CHECK-NEXT: [8] - Zn2FPU0 | ||
# CHECK-NEXT: [9] - Zn2FPU1 | ||
# CHECK-NEXT: [10] - Zn2FPU2 | ||
# CHECK-NEXT: [11] - Zn2FPU3 | ||
# CHECK-NEXT: [12] - Zn2Multiplier | ||
|
||
# CHECK: Resource pressure per iteration: | ||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] | ||
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - | ||
|
||
# CHECK: Resource pressure by instruction: | ||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: | ||
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - lzcntw %ax, %bx | ||
|
||
# CHECK: Timeline view: | ||
# CHECK-NEXT: 0 | ||
# CHECK-NEXT: Index 0123456789 | ||
|
||
# CHECK: [0,0] DeER . . lzcntw %ax, %bx | ||
# CHECK-NEXT: [1,0] D=eER. . lzcntw %ax, %bx | ||
# CHECK-NEXT: [2,0] D==eER . lzcntw %ax, %bx | ||
# CHECK-NEXT: [3,0] D===eER . lzcntw %ax, %bx | ||
# CHECK-NEXT: [4,0] .D===eER . lzcntw %ax, %bx | ||
# CHECK-NEXT: [5,0] .D====eER . lzcntw %ax, %bx | ||
# CHECK-NEXT: [6,0] .D=====eER. lzcntw %ax, %bx | ||
# CHECK-NEXT: [7,0] .D======eER lzcntw %ax, %bx | ||
|
||
# CHECK: Average Wait times (based on the timeline view): | ||
# CHECK-NEXT: [0]: Executions | ||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
||
# CHECK: [0] [1] [2] [3] | ||
# CHECK-NEXT: 0. 8 4.0 0.1 0.0 lzcntw %ax, %bx |
Oops, something went wrong.