Skip to content

Commit

Permalink
[X86] Add schedule module for Alderlake-P
Browse files Browse the repository at this point in the history
The X86SchedAlderlakeP.td file is automatically generated by schedtool
(D130897). Most of instruction's scheduling information is based on
measured ADL-P data in uops.info. Some data is from GLC tpt/lat data
provided by intel doc. The rest instruction's scheduling information is
from skylake client schedule model in order to get a relative complete
model.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D130959
  • Loading branch information
HaohaiWen committed Aug 18, 2022
1 parent 0e0e8b6 commit f4410d4
Show file tree
Hide file tree
Showing 35 changed files with 15,229 additions and 1 deletion.
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86.td
Expand Up @@ -675,6 +675,7 @@ include "X86ScheduleBtVer2.td"
include "X86SchedSkylakeClient.td"
include "X86SchedSkylakeServer.td"
include "X86SchedIceLake.td"
include "X86SchedAlderlakeP.td"

//===----------------------------------------------------------------------===//
// X86 Processor Feature Lists
Expand Down Expand Up @@ -1479,7 +1480,7 @@ def : ProcModel<"tigerlake", IceLakeModel,
ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
def : ProcModel<"sapphirerapids", SkylakeServerModel,
ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
def : ProcModel<"alderlake", SkylakeClientModel,
def : ProcModel<"alderlake", AlderlakePModel,
ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;

// AMD CPUs.
Expand Down
2,448 changes: 2,448 additions & 0 deletions llvm/lib/Target/X86/X86SchedAlderlakeP.td

Large diffs are not rendered by default.

142 changes: 142 additions & 0 deletions llvm/test/tools/llvm-mca/X86/AlderlakeP/independent-load-stores.s
@@ -0,0 +1,142 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=alderlake -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=alderlake -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS

addq $44, 64(%r14)
addq $44, 128(%r14)
addq $44, 192(%r14)
addq $44, 256(%r14)
addq $44, 320(%r14)
addq $44, 384(%r14)
addq $44, 448(%r14)
addq $44, 512(%r14)
addq $44, 576(%r14)
addq $44, 640(%r14)

# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1000

# NOALIAS-NEXT: Total Cycles: 1014
# YESALIAS-NEXT: Total Cycles: 12003

# ALL-NEXT: Total uOps: 4000

# ALL: Dispatch Width: 6

# NOALIAS-NEXT: uOps Per Cycle: 3.94
# NOALIAS-NEXT: IPC: 0.99

# YESALIAS-NEXT: uOps Per Cycle: 0.33
# YESALIAS-NEXT: IPC: 0.08

# ALL-NEXT: Block RThroughput: 6.7

# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)

# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# ALL-NEXT: 4 12 0.50 * * addq $44, 64(%r14)
# ALL-NEXT: 4 12 0.50 * * addq $44, 128(%r14)
# ALL-NEXT: 4 12 0.50 * * addq $44, 192(%r14)
# ALL-NEXT: 4 12 0.50 * * addq $44, 256(%r14)
# ALL-NEXT: 4 12 0.50 * * addq $44, 320(%r14)
# ALL-NEXT: 4 12 0.50 * * addq $44, 384(%r14)
# ALL-NEXT: 4 12 0.50 * * addq $44, 448(%r14)
# ALL-NEXT: 4 12 0.50 * * addq $44, 512(%r14)
# ALL-NEXT: 4 12 0.50 * * addq $44, 576(%r14)
# ALL-NEXT: 4 12 0.50 * * addq $44, 640(%r14)

# ALL: Resources:
# ALL-NEXT: [0] - ADLPPort00
# ALL-NEXT: [1] - ADLPPort01
# ALL-NEXT: [2] - ADLPPort02
# ALL-NEXT: [3] - ADLPPort03
# ALL-NEXT: [4] - ADLPPort04
# ALL-NEXT: [5] - ADLPPort05
# ALL-NEXT: [6] - ADLPPort06
# ALL-NEXT: [7] - ADLPPort07
# ALL-NEXT: [8] - ADLPPort08
# ALL-NEXT: [9] - ADLPPort09
# ALL-NEXT: [10] - ADLPPort10
# ALL-NEXT: [11] - ADLPPort11
# ALL-NEXT: [12] - ADLPPortInvalid

# ALL: Resource pressure per iteration:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
# ALL-NEXT: 2.00 2.00 3.33 3.33 5.00 2.00 2.00 5.00 5.00 5.00 2.00 3.34 -

# ALL: Resource pressure by instruction:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
# ALL-NEXT: - - 0.33 0.33 - - - - 1.00 1.00 1.00 0.34 - addq $44, 64(%r14)
# ALL-NEXT: - - 0.33 0.34 1.00 - 1.00 1.00 - - - 0.33 - addq $44, 128(%r14)
# ALL-NEXT: - - 0.34 0.33 - 1.00 - - 1.00 1.00 - 0.33 - addq $44, 192(%r14)
# ALL-NEXT: - 1.00 0.33 0.33 1.00 - - 1.00 - - - 0.34 - addq $44, 256(%r14)
# ALL-NEXT: 1.00 - 0.33 0.34 - - - - 1.00 1.00 - 0.33 - addq $44, 320(%r14)
# ALL-NEXT: - - 0.34 0.33 1.00 - - 1.00 - - 1.00 0.33 - addq $44, 384(%r14)
# ALL-NEXT: - - 0.33 0.33 - - 1.00 - 1.00 1.00 - 0.34 - addq $44, 448(%r14)
# ALL-NEXT: - - 0.33 0.34 1.00 1.00 - 1.00 - - - 0.33 - addq $44, 512(%r14)
# ALL-NEXT: - 1.00 0.34 0.33 - - - - 1.00 1.00 - 0.33 - addq $44, 576(%r14)
# ALL-NEXT: 1.00 - 0.33 0.33 1.00 - - 1.00 - - - 0.34 - addq $44, 640(%r14)

# ALL: Timeline view:

# NOALIAS-NEXT: 0123456789
# NOALIAS-NEXT: Index 0123456789 0123

# YESALIAS-NEXT: 0123456789 0123456789 0123456789 01234
# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 0123456789

# NOALIAS: [0,0] DeeeeeeeeeeeeER. . . addq $44, 64(%r14)
# NOALIAS-NEXT: [0,1] .DeeeeeeeeeeeeER . . addq $44, 128(%r14)
# NOALIAS-NEXT: [0,2] . DeeeeeeeeeeeeER . . addq $44, 192(%r14)
# NOALIAS-NEXT: [0,3] . DeeeeeeeeeeeeER . . addq $44, 256(%r14)
# NOALIAS-NEXT: [0,4] . DeeeeeeeeeeeeER . . addq $44, 320(%r14)
# NOALIAS-NEXT: [0,5] . DeeeeeeeeeeeeER. . addq $44, 384(%r14)
# NOALIAS-NEXT: [0,6] . .DeeeeeeeeeeeeER . addq $44, 448(%r14)
# NOALIAS-NEXT: [0,7] . . DeeeeeeeeeeeeER . addq $44, 512(%r14)
# NOALIAS-NEXT: [0,8] . . DeeeeeeeeeeeeER. addq $44, 576(%r14)
# NOALIAS-NEXT: [0,9] . . DeeeeeeeeeeeeER addq $44, 640(%r14)

# YESALIAS: [0,0] DeeeeeeeeeeeeER. . . . . . . . . . . . . addq $44, 64(%r14)
# YESALIAS-NEXT: [0,1] .D===========eeeeeeeeeeeeER . . . . . . . . . . addq $44, 128(%r14)
# YESALIAS-NEXT: [0,2] . D======================eeeeeeeeeeeeER . . . . . . . . addq $44, 192(%r14)
# YESALIAS-NEXT: [0,3] . D=================================eeeeeeeeeeeeER . . . . . addq $44, 256(%r14)
# YESALIAS-NEXT: [0,4] . D============================================eeeeeeeeeeeeER . . . addq $44, 320(%r14)
# YESALIAS-NEXT: [0,5] . D=======================================================eeeeeeeeeeeeER addq $44, 384(%r14)
# YESALIAS-NEXT: Truncated display due to cycle limit

# ALL: Average Wait times (based on the timeline view):
# ALL-NEXT: [0]: Executions
# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue
# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# ALL-NEXT: [3]: Average time elapsed from WB until retire stage

# ALL: [0] [1] [2] [3]
# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14)

# NOALIAS-NEXT: 1. 1 1.0 1.0 0.0 addq $44, 128(%r14)
# NOALIAS-NEXT: 2. 1 1.0 1.0 0.0 addq $44, 192(%r14)
# NOALIAS-NEXT: 3. 1 1.0 1.0 0.0 addq $44, 256(%r14)
# NOALIAS-NEXT: 4. 1 1.0 1.0 0.0 addq $44, 320(%r14)
# NOALIAS-NEXT: 5. 1 1.0 1.0 0.0 addq $44, 384(%r14)
# NOALIAS-NEXT: 6. 1 1.0 1.0 0.0 addq $44, 448(%r14)
# NOALIAS-NEXT: 7. 1 1.0 1.0 0.0 addq $44, 512(%r14)
# NOALIAS-NEXT: 8. 1 1.0 1.0 0.0 addq $44, 576(%r14)
# NOALIAS-NEXT: 9. 1 1.0 1.0 0.0 addq $44, 640(%r14)
# NOALIAS-NEXT: 1 1.0 1.0 0.0 <total>

# YESALIAS-NEXT: 1. 1 12.0 0.0 0.0 addq $44, 128(%r14)
# YESALIAS-NEXT: 2. 1 23.0 0.0 0.0 addq $44, 192(%r14)
# YESALIAS-NEXT: 3. 1 34.0 0.0 0.0 addq $44, 256(%r14)
# YESALIAS-NEXT: 4. 1 45.0 0.0 0.0 addq $44, 320(%r14)
# YESALIAS-NEXT: 5. 1 56.0 0.0 0.0 addq $44, 384(%r14)
# YESALIAS-NEXT: 6. 1 67.0 0.0 0.0 addq $44, 448(%r14)
# YESALIAS-NEXT: 7. 1 78.0 0.0 0.0 addq $44, 512(%r14)
# YESALIAS-NEXT: 8. 1 89.0 0.0 0.0 addq $44, 576(%r14)
# YESALIAS-NEXT: 9. 1 100.0 0.0 0.0 addq $44, 640(%r14)
# YESALIAS-NEXT: 1 50.5 0.1 0.0 <total>
60 changes: 60 additions & 0 deletions llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-adx.s
@@ -0,0 +1,60 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=alderlake -instruction-tables < %s | FileCheck %s

adcx %ebx, %ecx
adcx (%rbx), %ecx
adcx %rbx, %rcx
adcx (%rbx), %rcx

adox %ebx, %ecx
adox (%rbx), %ecx
adox %rbx, %rcx
adox (%rbx), %rcx

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 adcxl %ebx, %ecx
# CHECK-NEXT: 2 6 0.50 * adcxl (%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 adcxq %rbx, %rcx
# CHECK-NEXT: 2 6 0.50 * adcxq (%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 adoxl %ebx, %ecx
# CHECK-NEXT: 2 6 0.50 * adoxl (%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 adoxq %rbx, %rcx
# CHECK-NEXT: 2 6 0.50 * adoxq (%rbx), %rcx

# CHECK: Resources:
# CHECK-NEXT: [0] - ADLPPort00
# CHECK-NEXT: [1] - ADLPPort01
# CHECK-NEXT: [2] - ADLPPort02
# CHECK-NEXT: [3] - ADLPPort03
# CHECK-NEXT: [4] - ADLPPort04
# CHECK-NEXT: [5] - ADLPPort05
# CHECK-NEXT: [6] - ADLPPort06
# CHECK-NEXT: [7] - ADLPPort07
# CHECK-NEXT: [8] - ADLPPort08
# CHECK-NEXT: [9] - ADLPPort09
# CHECK-NEXT: [10] - ADLPPort10
# CHECK-NEXT: [11] - ADLPPort11
# CHECK-NEXT: [12] - ADLPPortInvalid

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
# CHECK-NEXT: 4.00 - 1.33 1.33 - - 4.00 - - - - 1.33 -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
# CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcxl %ebx, %ecx
# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcxl (%rbx), %ecx
# CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcxq %rbx, %rcx
# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcxq (%rbx), %rcx
# CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adoxl %ebx, %ecx
# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adoxl (%rbx), %ecx
# CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adoxq %rbx, %rcx
# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adoxq (%rbx), %rcx
76 changes: 76 additions & 0 deletions llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-aes.s
@@ -0,0 +1,76 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=alderlake -instruction-tables < %s | FileCheck %s

aesdec %xmm0, %xmm2
aesdec (%rax), %xmm2

aesdeclast %xmm0, %xmm2
aesdeclast (%rax), %xmm2

aesenc %xmm0, %xmm2
aesenc (%rax), %xmm2

aesenclast %xmm0, %xmm2
aesenclast (%rax), %xmm2

aesimc %xmm0, %xmm2
aesimc (%rax), %xmm2

aeskeygenassist $22, %xmm0, %xmm2
aeskeygenassist $22, (%rax), %xmm2

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 5 0.50 aesdec %xmm0, %xmm2
# CHECK-NEXT: 2 12 0.50 * aesdec (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 aesdeclast %xmm0, %xmm2
# CHECK-NEXT: 2 12 0.50 * aesdeclast (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 aesenc %xmm0, %xmm2
# CHECK-NEXT: 2 12 0.50 * aesenc (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 aesenclast %xmm0, %xmm2
# CHECK-NEXT: 2 12 0.50 * aesenclast (%rax), %xmm2
# CHECK-NEXT: 2 8 1.00 aesimc %xmm0, %xmm2
# CHECK-NEXT: 3 15 1.00 * aesimc (%rax), %xmm2
# CHECK-NEXT: 14 7 4.00 aeskeygenassist $22, %xmm0, %xmm2
# CHECK-NEXT: 14 12 4.00 * aeskeygenassist $22, (%rax), %xmm2

# CHECK: Resources:
# CHECK-NEXT: [0] - ADLPPort00
# CHECK-NEXT: [1] - ADLPPort01
# CHECK-NEXT: [2] - ADLPPort02
# CHECK-NEXT: [3] - ADLPPort03
# CHECK-NEXT: [4] - ADLPPort04
# CHECK-NEXT: [5] - ADLPPort05
# CHECK-NEXT: [6] - ADLPPort06
# CHECK-NEXT: [7] - ADLPPort07
# CHECK-NEXT: [8] - ADLPPort08
# CHECK-NEXT: [9] - ADLPPort09
# CHECK-NEXT: [10] - ADLPPort10
# CHECK-NEXT: [11] - ADLPPort11
# CHECK-NEXT: [12] - ADLPPortInvalid

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
# CHECK-NEXT: 17.33 10.33 2.00 2.00 - 9.33 2.00 - - - - 2.00 -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesdec %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesdec (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesdeclast %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesdeclast (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesenc %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesenc (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesenclast %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesenclast (%rax), %xmm2
# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - aesimc %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - aesimc (%rax), %xmm2
# CHECK-NEXT: 5.83 2.33 - - - 4.83 1.00 - - - - - - aeskeygenassist $22, %xmm0, %xmm2
# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - - 0.33 - aeskeygenassist $22, (%rax), %xmm2

0 comments on commit f4410d4

Please sign in to comment.