1,957 changes: 1,957 additions & 0 deletions llvm/lib/Target/X86/X86ScheduleZnver4.td

Large diffs are not rendered by default.

155 changes: 155 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/independent-load-stores.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver4 -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver4 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS

addq $44, 64(%r14)
addq $44, 128(%r14)
addq $44, 192(%r14)
addq $44, 256(%r14)
addq $44, 320(%r14)
addq $44, 384(%r14)
addq $44, 448(%r14)
addq $44, 512(%r14)
addq $44, 576(%r14)
addq $44, 640(%r14)

# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1000

# NOALIAS-NEXT: Total Cycles: 675
# YESALIAS-NEXT: Total Cycles: 6003

# ALL-NEXT: Total uOps: 1000

# ALL: Dispatch Width: 6

# NOALIAS-NEXT: uOps Per Cycle: 1.48
# NOALIAS-NEXT: IPC: 1.48

# YESALIAS-NEXT: uOps Per Cycle: 0.17
# YESALIAS-NEXT: IPC: 0.17

# ALL-NEXT: Block RThroughput: 6.7

# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)

# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# ALL-NEXT: 1 6 0.67 * * addq $44, 64(%r14)
# ALL-NEXT: 1 6 0.67 * * addq $44, 128(%r14)
# ALL-NEXT: 1 6 0.67 * * addq $44, 192(%r14)
# ALL-NEXT: 1 6 0.67 * * addq $44, 256(%r14)
# ALL-NEXT: 1 6 0.67 * * addq $44, 320(%r14)
# ALL-NEXT: 1 6 0.67 * * addq $44, 384(%r14)
# ALL-NEXT: 1 6 0.67 * * addq $44, 448(%r14)
# ALL-NEXT: 1 6 0.67 * * addq $44, 512(%r14)
# ALL-NEXT: 1 6 0.67 * * addq $44, 576(%r14)
# ALL-NEXT: 1 6 0.67 * * addq $44, 640(%r14)

# ALL: Resources:
# ALL-NEXT: [0] - Zn4AGU0
# ALL-NEXT: [1] - Zn4AGU1
# ALL-NEXT: [2] - Zn4AGU2
# ALL-NEXT: [3] - Zn4ALU0
# ALL-NEXT: [4] - Zn4ALU1
# ALL-NEXT: [5] - Zn4ALU2
# ALL-NEXT: [6] - Zn4ALU3
# ALL-NEXT: [7] - Zn4BRU1
# ALL-NEXT: [8] - Zn4FP0
# ALL-NEXT: [9] - Zn4FP1
# ALL-NEXT: [10] - Zn4FP2
# ALL-NEXT: [11] - Zn4FP3
# ALL-NEXT: [12.0] - Zn4FP45
# ALL-NEXT: [12.1] - Zn4FP45
# ALL-NEXT: [13] - Zn4FPSt
# ALL-NEXT: [14.0] - Zn4LSU
# ALL-NEXT: [14.1] - Zn4LSU
# ALL-NEXT: [14.2] - Zn4LSU
# ALL-NEXT: [15.0] - Zn4Load
# ALL-NEXT: [15.1] - Zn4Load
# ALL-NEXT: [15.2] - Zn4Load
# ALL-NEXT: [16.0] - Zn4Store
# ALL-NEXT: [16.1] - Zn4Store

# ALL: Resource pressure per iteration:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
# ALL-NEXT: 6.66 6.66 6.68 2.50 2.50 2.50 2.50 - - - - - - - - 6.66 6.66 6.68 3.33 3.33 3.34 5.00 5.00

# ALL: Resource pressure by instruction:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
# ALL-NEXT: 0.66 0.66 0.68 - 0.50 - 0.50 - - - - - - - - 0.66 0.66 0.68 0.33 0.33 0.34 - 1.00 addq $44, 64(%r14)
# ALL-NEXT: 0.66 0.68 0.66 0.50 - 0.50 - - - - - - - - - 0.66 0.68 0.66 0.33 0.34 0.33 1.00 - addq $44, 128(%r14)
# ALL-NEXT: 0.68 0.66 0.66 - 0.50 - 0.50 - - - - - - - - 0.68 0.66 0.66 0.34 0.33 0.33 - 1.00 addq $44, 192(%r14)
# ALL-NEXT: 0.66 0.66 0.68 0.50 - 0.50 - - - - - - - - - 0.66 0.66 0.68 0.33 0.33 0.34 1.00 - addq $44, 256(%r14)
# ALL-NEXT: 0.66 0.68 0.66 - 0.50 - 0.50 - - - - - - - - 0.66 0.68 0.66 0.33 0.34 0.33 - 1.00 addq $44, 320(%r14)
# ALL-NEXT: 0.68 0.66 0.66 0.50 - 0.50 - - - - - - - - - 0.68 0.66 0.66 0.34 0.33 0.33 1.00 - addq $44, 384(%r14)
# ALL-NEXT: 0.66 0.66 0.68 - 0.50 - 0.50 - - - - - - - - 0.66 0.66 0.68 0.33 0.33 0.34 - 1.00 addq $44, 448(%r14)
# ALL-NEXT: 0.66 0.68 0.66 0.50 - 0.50 - - - - - - - - - 0.66 0.68 0.66 0.33 0.34 0.33 1.00 - addq $44, 512(%r14)
# ALL-NEXT: 0.68 0.66 0.66 - 0.50 - 0.50 - - - - - - - - 0.68 0.66 0.66 0.34 0.33 0.33 - 1.00 addq $44, 576(%r14)
# ALL-NEXT: 0.66 0.66 0.68 0.50 - 0.50 - - - - - - - - - 0.66 0.66 0.68 0.33 0.33 0.34 1.00 - addq $44, 640(%r14)

# ALL: Timeline view:

# NOALIAS-NEXT: 01234
# NOALIAS-NEXT: Index 0123456789

# YESALIAS-NEXT: 0123456789 0123456789 0123456789
# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 012

# NOALIAS: [0,0] DeeeeeeER . . addq $44, 64(%r14)
# NOALIAS-NEXT: [0,1] DeeeeeeER . . addq $44, 128(%r14)
# NOALIAS-NEXT: [0,2] D=eeeeeeER. . addq $44, 192(%r14)
# NOALIAS-NEXT: [0,3] D==eeeeeeER . addq $44, 256(%r14)
# NOALIAS-NEXT: [0,4] D==eeeeeeER . addq $44, 320(%r14)
# NOALIAS-NEXT: [0,5] D===eeeeeeER . addq $44, 384(%r14)
# NOALIAS-NEXT: [0,6] .D===eeeeeeER . addq $44, 448(%r14)
# NOALIAS-NEXT: [0,7] .D===eeeeeeER . addq $44, 512(%r14)
# NOALIAS-NEXT: [0,8] .D====eeeeeeER. addq $44, 576(%r14)
# NOALIAS-NEXT: [0,9] .D=====eeeeeeER addq $44, 640(%r14)

# YESALIAS: [0,0] DeeeeeeER . . . . . . . . . . . . addq $44, 64(%r14)
# YESALIAS-NEXT: [0,1] D======eeeeeeER. . . . . . . . . . . addq $44, 128(%r14)
# YESALIAS-NEXT: [0,2] D============eeeeeeER . . . . . . . . . addq $44, 192(%r14)
# YESALIAS-NEXT: [0,3] D==================eeeeeeER . . . . . . . . addq $44, 256(%r14)
# YESALIAS-NEXT: [0,4] D========================eeeeeeER . . . . . . . addq $44, 320(%r14)
# YESALIAS-NEXT: [0,5] D==============================eeeeeeER . . . . . . addq $44, 384(%r14)
# YESALIAS-NEXT: [0,6] .D===================================eeeeeeER. . . . . addq $44, 448(%r14)
# YESALIAS-NEXT: [0,7] .D=========================================eeeeeeER . . . addq $44, 512(%r14)
# YESALIAS-NEXT: [0,8] .D===============================================eeeeeeER . . addq $44, 576(%r14)
# YESALIAS-NEXT: [0,9] .D=====================================================eeeeeeER addq $44, 640(%r14)

# ALL: Average Wait times (based on the timeline view):
# ALL-NEXT: [0]: Executions
# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue
# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# ALL-NEXT: [3]: Average time elapsed from WB until retire stage

# ALL: [0] [1] [2] [3]
# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14)

# NOALIAS-NEXT: 1. 1 1.0 0.0 0.0 addq $44, 128(%r14)
# NOALIAS-NEXT: 2. 1 2.0 1.0 0.0 addq $44, 192(%r14)
# NOALIAS-NEXT: 3. 1 3.0 1.0 0.0 addq $44, 256(%r14)
# NOALIAS-NEXT: 4. 1 3.0 0.0 0.0 addq $44, 320(%r14)
# NOALIAS-NEXT: 5. 1 4.0 1.0 0.0 addq $44, 384(%r14)
# NOALIAS-NEXT: 6. 1 4.0 1.0 0.0 addq $44, 448(%r14)
# NOALIAS-NEXT: 7. 1 4.0 0.0 0.0 addq $44, 512(%r14)
# NOALIAS-NEXT: 8. 1 5.0 1.0 0.0 addq $44, 576(%r14)
# NOALIAS-NEXT: 9. 1 6.0 1.0 0.0 addq $44, 640(%r14)
# NOALIAS-NEXT: 1 3.3 0.7 0.0 <total>

# YESALIAS-NEXT: 1. 1 7.0 0.0 0.0 addq $44, 128(%r14)
# YESALIAS-NEXT: 2. 1 13.0 0.0 0.0 addq $44, 192(%r14)
# YESALIAS-NEXT: 3. 1 19.0 0.0 0.0 addq $44, 256(%r14)
# YESALIAS-NEXT: 4. 1 25.0 0.0 0.0 addq $44, 320(%r14)
# YESALIAS-NEXT: 5. 1 31.0 0.0 0.0 addq $44, 384(%r14)
# YESALIAS-NEXT: 6. 1 36.0 0.0 0.0 addq $44, 448(%r14)
# YESALIAS-NEXT: 7. 1 42.0 0.0 0.0 addq $44, 512(%r14)
# YESALIAS-NEXT: 8. 1 48.0 0.0 0.0 addq $44, 576(%r14)
# YESALIAS-NEXT: 9. 1 54.0 0.0 0.0 addq $44, 640(%r14)
# YESALIAS-NEXT: 1 27.6 0.1 0.0 <total>
70 changes: 70 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-adx.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver4 -instruction-tables < %s | FileCheck %s

adcx %ebx, %ecx
adcx (%rbx), %ecx
adcx %rbx, %rcx
adcx (%rbx), %rcx

adox %ebx, %ecx
adox (%rbx), %ecx
adox %rbx, %rcx
adox (%rbx), %rcx

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 1.00 adcxl %ebx, %ecx
# CHECK-NEXT: 1 5 1.00 * adcxl (%rbx), %ecx
# CHECK-NEXT: 1 1 1.00 adcxq %rbx, %rcx
# CHECK-NEXT: 1 5 1.00 * adcxq (%rbx), %rcx
# CHECK-NEXT: 1 1 1.00 adoxl %ebx, %ecx
# CHECK-NEXT: 1 5 1.00 * adoxl (%rbx), %ecx
# CHECK-NEXT: 1 1 1.00 adoxq %rbx, %rcx
# CHECK-NEXT: 1 5 1.00 * adoxq (%rbx), %rcx

# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
# CHECK-NEXT: [1] - Zn4AGU1
# CHECK-NEXT: [2] - Zn4AGU2
# CHECK-NEXT: [3] - Zn4ALU0
# CHECK-NEXT: [4] - Zn4ALU1
# CHECK-NEXT: [5] - Zn4ALU2
# CHECK-NEXT: [6] - Zn4ALU3
# CHECK-NEXT: [7] - Zn4BRU1
# CHECK-NEXT: [8] - Zn4FP0
# CHECK-NEXT: [9] - Zn4FP1
# CHECK-NEXT: [10] - Zn4FP2
# CHECK-NEXT: [11] - Zn4FP3
# CHECK-NEXT: [12.0] - Zn4FP45
# CHECK-NEXT: [12.1] - Zn4FP45
# CHECK-NEXT: [13] - Zn4FPSt
# CHECK-NEXT: [14.0] - Zn4LSU
# CHECK-NEXT: [14.1] - Zn4LSU
# CHECK-NEXT: [14.2] - Zn4LSU
# CHECK-NEXT: [15.0] - Zn4Load
# CHECK-NEXT: [15.1] - Zn4Load
# CHECK-NEXT: [15.2] - Zn4Load
# CHECK-NEXT: [16.0] - Zn4Store
# CHECK-NEXT: [16.1] - Zn4Store

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
# CHECK-NEXT: 1.33 1.33 1.33 8.00 8.00 8.00 8.00 - - - - - - - - 1.33 1.33 1.33 1.33 1.33 1.33 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcxl %ebx, %ecx
# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - adcxl (%rbx), %ecx
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcxq %rbx, %rcx
# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - adcxq (%rbx), %rcx
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adoxl %ebx, %ecx
# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - adoxl (%rbx), %ecx
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adoxq %rbx, %rcx
# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - adoxq (%rbx), %rcx
86 changes: 86 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-aes.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver4 -instruction-tables < %s | FileCheck %s

aesdec %xmm0, %xmm2
aesdec (%rax), %xmm2

aesdeclast %xmm0, %xmm2
aesdeclast (%rax), %xmm2

aesenc %xmm0, %xmm2
aesenc (%rax), %xmm2

aesenclast %xmm0, %xmm2
aesenclast (%rax), %xmm2

aesimc %xmm0, %xmm2
aesimc (%rax), %xmm2

aeskeygenassist $22, %xmm0, %xmm2
aeskeygenassist $22, (%rax), %xmm2

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 4 0.50 aesdec %xmm0, %xmm2
# CHECK-NEXT: 1 11 0.50 * aesdec (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 aesdeclast %xmm0, %xmm2
# CHECK-NEXT: 1 11 0.50 * aesdeclast (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 aesenc %xmm0, %xmm2
# CHECK-NEXT: 1 11 0.50 * aesenc (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 aesenclast %xmm0, %xmm2
# CHECK-NEXT: 1 11 0.50 * aesenclast (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 aesimc %xmm0, %xmm2
# CHECK-NEXT: 1 11 0.50 * aesimc (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 aeskeygenassist $22, %xmm0, %xmm2
# CHECK-NEXT: 1 11 0.50 * aeskeygenassist $22, (%rax), %xmm2

# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
# CHECK-NEXT: [1] - Zn4AGU1
# CHECK-NEXT: [2] - Zn4AGU2
# CHECK-NEXT: [3] - Zn4ALU0
# CHECK-NEXT: [4] - Zn4ALU1
# CHECK-NEXT: [5] - Zn4ALU2
# CHECK-NEXT: [6] - Zn4ALU3
# CHECK-NEXT: [7] - Zn4BRU1
# CHECK-NEXT: [8] - Zn4FP0
# CHECK-NEXT: [9] - Zn4FP1
# CHECK-NEXT: [10] - Zn4FP2
# CHECK-NEXT: [11] - Zn4FP3
# CHECK-NEXT: [12.0] - Zn4FP45
# CHECK-NEXT: [12.1] - Zn4FP45
# CHECK-NEXT: [13] - Zn4FPSt
# CHECK-NEXT: [14.0] - Zn4LSU
# CHECK-NEXT: [14.1] - Zn4LSU
# CHECK-NEXT: [14.2] - Zn4LSU
# CHECK-NEXT: [15.0] - Zn4Load
# CHECK-NEXT: [15.1] - Zn4Load
# CHECK-NEXT: [15.2] - Zn4Load
# CHECK-NEXT: [16.0] - Zn4Store
# CHECK-NEXT: [16.1] - Zn4Store

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
# CHECK-NEXT: - - - - - - - - 6.00 6.00 - - 3.00 3.00 - 2.00 2.00 2.00 2.00 2.00 2.00 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - aesdec %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - aesdec (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - aesdeclast %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - aesdeclast (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - aesenc %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - aesenc (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - aesenclast %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - aesenclast (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - aesimc %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - aesimc (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - aeskeygenassist $22, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - aeskeygenassist $22, (%rax), %xmm2
2,446 changes: 2,446 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s

Large diffs are not rendered by default.

1,096 changes: 1,096 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s

Large diffs are not rendered by default.

2,819 changes: 2,819 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s

Large diffs are not rendered by default.

95 changes: 95 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bitalg.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver4 -instruction-tables < %s | FileCheck %s

vpopcntb %zmm1, %zmm0
vpopcntb (%rdi), %zmm0
vpopcntb %zmm1, %zmm0 {%k1}
vpopcntb (%rdi), %zmm0 {%k1}
vpopcntb %zmm1, %zmm0 {%k1} {z}
vpopcntb (%rdi), %zmm0 {%k1} {z}

vpopcntw %zmm1, %zmm0
vpopcntw (%rdi), %zmm0
vpopcntw %zmm1, %zmm0 {%k1}
vpopcntw (%rdi), %zmm0 {%k1}
vpopcntw %zmm1, %zmm0 {%k1} {z}
vpopcntw (%rdi), %zmm0 {%k1} {z}

vpshufbitqmb %zmm16, %zmm17, %k2
vpshufbitqmb (%rdi), %zmm17, %k2
vpshufbitqmb %zmm16, %zmm17, %k2 {%k1}
vpshufbitqmb (%rdi), %zmm17, %k2 {%k1}

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 vpopcntb %zmm1, %zmm0
# CHECK-NEXT: 1 8 0.50 * vpopcntb (%rdi), %zmm0
# CHECK-NEXT: 1 1 0.50 vpopcntb %zmm1, %zmm0 {%k1}
# CHECK-NEXT: 1 8 0.50 * vpopcntb (%rdi), %zmm0 {%k1}
# CHECK-NEXT: 1 1 0.50 vpopcntb %zmm1, %zmm0 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpopcntb (%rdi), %zmm0 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vpopcntw %zmm1, %zmm0
# CHECK-NEXT: 1 8 0.50 * vpopcntw (%rdi), %zmm0
# CHECK-NEXT: 1 1 0.50 vpopcntw %zmm1, %zmm0 {%k1}
# CHECK-NEXT: 1 8 0.50 * vpopcntw (%rdi), %zmm0 {%k1}
# CHECK-NEXT: 1 1 0.50 vpopcntw %zmm1, %zmm0 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpopcntw (%rdi), %zmm0 {%k1} {z}
# CHECK-NEXT: 1 3 1.00 vpshufbitqmb %zmm16, %zmm17, %k2
# CHECK-NEXT: 1 10 1.00 * vpshufbitqmb (%rdi), %zmm17, %k2
# CHECK-NEXT: 1 3 1.00 vpshufbitqmb %zmm16, %zmm17, %k2 {%k1}
# CHECK-NEXT: 1 10 1.00 * vpshufbitqmb (%rdi), %zmm17, %k2 {%k1}

# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
# CHECK-NEXT: [1] - Zn4AGU1
# CHECK-NEXT: [2] - Zn4AGU2
# CHECK-NEXT: [3] - Zn4ALU0
# CHECK-NEXT: [4] - Zn4ALU1
# CHECK-NEXT: [5] - Zn4ALU2
# CHECK-NEXT: [6] - Zn4ALU3
# CHECK-NEXT: [7] - Zn4BRU1
# CHECK-NEXT: [8] - Zn4FP0
# CHECK-NEXT: [9] - Zn4FP1
# CHECK-NEXT: [10] - Zn4FP2
# CHECK-NEXT: [11] - Zn4FP3
# CHECK-NEXT: [12.0] - Zn4FP45
# CHECK-NEXT: [12.1] - Zn4FP45
# CHECK-NEXT: [13] - Zn4FPSt
# CHECK-NEXT: [14.0] - Zn4LSU
# CHECK-NEXT: [14.1] - Zn4LSU
# CHECK-NEXT: [14.2] - Zn4LSU
# CHECK-NEXT: [15.0] - Zn4Load
# CHECK-NEXT: [15.1] - Zn4Load
# CHECK-NEXT: [15.2] - Zn4Load
# CHECK-NEXT: [16.0] - Zn4Store
# CHECK-NEXT: [16.1] - Zn4Store

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
# CHECK-NEXT: - - - - - - - - 10.00 6.00 6.00 10.00 4.00 4.00 - 2.67 2.67 2.67 2.67 2.67 2.67 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpopcntb %zmm1, %zmm0
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntb (%rdi), %zmm0
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpopcntb %zmm1, %zmm0 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntb (%rdi), %zmm0 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpopcntb %zmm1, %zmm0 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntb (%rdi), %zmm0 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpopcntw %zmm1, %zmm0
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntw (%rdi), %zmm0
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpopcntw %zmm1, %zmm0 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntw (%rdi), %zmm0 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpopcntw %zmm1, %zmm0 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntw (%rdi), %zmm0 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 - - - - - - - - - - - vpshufbitqmb %zmm16, %zmm17, %k2
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufbitqmb (%rdi), %zmm17, %k2
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 - - - - - - - - - - - vpshufbitqmb %zmm16, %zmm17, %k2 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufbitqmb (%rdi), %zmm17, %k2 {%k1}
146 changes: 146 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bitalgvl.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver4 -instruction-tables < %s | FileCheck %s

vpopcntb %xmm1, %xmm0
vpopcntb (%rdi), %xmm0
vpopcntb %xmm1, %xmm0 {%k1}
vpopcntb (%rdi), %xmm0 {%k1}
vpopcntb %xmm1, %xmm0 {%k1} {z}
vpopcntb (%rdi), %xmm0 {%k1} {z}

vpopcntb %ymm1, %ymm0
vpopcntb (%rdi), %ymm0
vpopcntb %ymm1, %ymm0 {%k1}
vpopcntb (%rdi), %ymm0 {%k1}
vpopcntb %ymm1, %ymm0 {%k1} {z}
vpopcntb (%rdi), %ymm0 {%k1} {z}

vpopcntw %xmm1, %xmm0
vpopcntw (%rdi), %xmm0
vpopcntw %xmm1, %xmm0 {%k1}
vpopcntw (%rdi), %xmm0 {%k1}
vpopcntw %xmm1, %xmm0 {%k1} {z}
vpopcntw (%rdi), %xmm0 {%k1} {z}

vpopcntw %ymm1, %ymm0
vpopcntw (%rdi), %ymm0
vpopcntw %ymm1, %ymm0 {%k1}
vpopcntw (%rdi), %ymm0 {%k1}
vpopcntw %ymm1, %ymm0 {%k1} {z}
vpopcntw (%rdi), %ymm0 {%k1} {z}

vpshufbitqmb %xmm16, %xmm17, %k2
vpshufbitqmb (%rdi), %xmm17, %k2
vpshufbitqmb %xmm16, %xmm17, %k2 {%k1}
vpshufbitqmb (%rdi), %xmm17, %k2 {%k1}

vpshufbitqmb %ymm16, %ymm17, %k2
vpshufbitqmb (%rdi), %ymm17, %k2
vpshufbitqmb %ymm16, %ymm17, %k2 {%k1}
vpshufbitqmb (%rdi), %ymm17, %k2 {%k1}

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 2 1.00 vpopcntb %xmm1, %xmm0
# CHECK-NEXT: 1 8 0.50 * vpopcntb (%rdi), %xmm0
# CHECK-NEXT: 1 2 1.00 vpopcntb %xmm1, %xmm0 {%k1}
# CHECK-NEXT: 1 8 0.50 * vpopcntb (%rdi), %xmm0 {%k1}
# CHECK-NEXT: 1 2 1.00 vpopcntb %xmm1, %xmm0 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpopcntb (%rdi), %xmm0 {%k1} {z}
# CHECK-NEXT: 1 1 0.25 vpopcntb %ymm1, %ymm0
# CHECK-NEXT: 1 8 0.50 * vpopcntb (%rdi), %ymm0
# CHECK-NEXT: 1 1 0.25 vpopcntb %ymm1, %ymm0 {%k1}
# CHECK-NEXT: 1 8 0.50 * vpopcntb (%rdi), %ymm0 {%k1}
# CHECK-NEXT: 1 1 0.25 vpopcntb %ymm1, %ymm0 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpopcntb (%rdi), %ymm0 {%k1} {z}
# CHECK-NEXT: 1 2 1.00 vpopcntw %xmm1, %xmm0
# CHECK-NEXT: 1 8 0.50 * vpopcntw (%rdi), %xmm0
# CHECK-NEXT: 1 2 1.00 vpopcntw %xmm1, %xmm0 {%k1}
# CHECK-NEXT: 1 8 0.50 * vpopcntw (%rdi), %xmm0 {%k1}
# CHECK-NEXT: 1 2 1.00 vpopcntw %xmm1, %xmm0 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpopcntw (%rdi), %xmm0 {%k1} {z}
# CHECK-NEXT: 1 1 0.25 vpopcntw %ymm1, %ymm0
# CHECK-NEXT: 1 8 0.50 * vpopcntw (%rdi), %ymm0
# CHECK-NEXT: 1 1 0.25 vpopcntw %ymm1, %ymm0 {%k1}
# CHECK-NEXT: 1 8 0.50 * vpopcntw (%rdi), %ymm0 {%k1}
# CHECK-NEXT: 1 1 0.25 vpopcntw %ymm1, %ymm0 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpopcntw (%rdi), %ymm0 {%k1} {z}
# CHECK-NEXT: 1 2 1.00 vpshufbitqmb %xmm16, %xmm17, %k2
# CHECK-NEXT: 1 10 0.50 * vpshufbitqmb (%rdi), %xmm17, %k2
# CHECK-NEXT: 1 2 1.00 vpshufbitqmb %xmm16, %xmm17, %k2 {%k1}
# CHECK-NEXT: 1 10 0.50 * vpshufbitqmb (%rdi), %xmm17, %k2 {%k1}
# CHECK-NEXT: 1 3 0.50 vpshufbitqmb %ymm16, %ymm17, %k2
# CHECK-NEXT: 1 10 0.50 * vpshufbitqmb (%rdi), %ymm17, %k2
# CHECK-NEXT: 1 3 0.50 vpshufbitqmb %ymm16, %ymm17, %k2 {%k1}
# CHECK-NEXT: 1 10 0.50 * vpshufbitqmb (%rdi), %ymm17, %k2 {%k1}

# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
# CHECK-NEXT: [1] - Zn4AGU1
# CHECK-NEXT: [2] - Zn4AGU2
# CHECK-NEXT: [3] - Zn4ALU0
# CHECK-NEXT: [4] - Zn4ALU1
# CHECK-NEXT: [5] - Zn4ALU2
# CHECK-NEXT: [6] - Zn4ALU3
# CHECK-NEXT: [7] - Zn4BRU1
# CHECK-NEXT: [8] - Zn4FP0
# CHECK-NEXT: [9] - Zn4FP1
# CHECK-NEXT: [10] - Zn4FP2
# CHECK-NEXT: [11] - Zn4FP3
# CHECK-NEXT: [12.0] - Zn4FP45
# CHECK-NEXT: [12.1] - Zn4FP45
# CHECK-NEXT: [13] - Zn4FPSt
# CHECK-NEXT: [14.0] - Zn4LSU
# CHECK-NEXT: [14.1] - Zn4LSU
# CHECK-NEXT: [14.2] - Zn4LSU
# CHECK-NEXT: [15.0] - Zn4Load
# CHECK-NEXT: [15.1] - Zn4Load
# CHECK-NEXT: [15.2] - Zn4Load
# CHECK-NEXT: [16.0] - Zn4Store
# CHECK-NEXT: [16.1] - Zn4Store

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
# CHECK-NEXT: - - - - - - - - 15.50 12.50 4.50 7.50 8.00 8.00 - 5.33 5.33 5.33 5.33 5.33 5.33 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vpopcntb %xmm1, %xmm0
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntb (%rdi), %xmm0
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vpopcntb %xmm1, %xmm0 {%k1}
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntb (%rdi), %xmm0 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vpopcntb %xmm1, %xmm0 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntb (%rdi), %xmm0 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpopcntb %ymm1, %ymm0
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntb (%rdi), %ymm0
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpopcntb %ymm1, %ymm0 {%k1}
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntb (%rdi), %ymm0 {%k1}
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpopcntb %ymm1, %ymm0 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntb (%rdi), %ymm0 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vpopcntw %xmm1, %xmm0
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntw (%rdi), %xmm0
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vpopcntw %xmm1, %xmm0 {%k1}
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntw (%rdi), %xmm0 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vpopcntw %xmm1, %xmm0 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntw (%rdi), %xmm0 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpopcntw %ymm1, %ymm0
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntw (%rdi), %ymm0
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpopcntw %ymm1, %ymm0 {%k1}
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntw (%rdi), %ymm0 {%k1}
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpopcntw %ymm1, %ymm0 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpopcntw (%rdi), %ymm0 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vpshufbitqmb %xmm16, %xmm17, %k2
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufbitqmb (%rdi), %xmm17, %k2
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vpshufbitqmb %xmm16, %xmm17, %k2 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufbitqmb (%rdi), %xmm17, %k2 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpshufbitqmb %ymm16, %ymm17, %k2
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufbitqmb (%rdi), %ymm17, %k2
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpshufbitqmb %ymm16, %ymm17, %k2 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufbitqmb (%rdi), %ymm17, %k2 {%k1}
1,645 changes: 1,645 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bw.s

Large diffs are not rendered by default.

2,958 changes: 2,958 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s

Large diffs are not rendered by default.

164 changes: 164 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512cd.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver4 -instruction-tables < %s | FileCheck %s

vpbroadcastmb2q %k0, %zmm16

vpbroadcastmw2d %k0, %zmm16

vpconflictd %zmm16, %zmm19
vpconflictd (%rax), %zmm19
vpconflictd (%rax){1to16}, %zmm19
vpconflictd %zmm16, %zmm19 {k1}
vpconflictd (%rax), %zmm19 {k1}
vpconflictd (%rax){1to16}, %zmm19 {k1}
vpconflictd %zmm16, %zmm19 {z}{k1}
vpconflictd (%rax), %zmm19 {z}{k1}
vpconflictd (%rax){1to16}, %zmm19 {z}{k1}

vpconflictq %zmm16, %zmm19
vpconflictq (%rax), %zmm19
vpconflictq (%rax){1to8}, %zmm19
vpconflictq %zmm16, %zmm19 {k1}
vpconflictq (%rax), %zmm19 {k1}
vpconflictq (%rax){1to8}, %zmm19 {k1}
vpconflictq %zmm16, %zmm19 {z}{k1}
vpconflictq (%rax), %zmm19 {z}{k1}
vpconflictq (%rax){1to8}, %zmm19 {z}{k1}

vplzcntd %zmm16, %zmm19
vplzcntd (%rax), %zmm19
vplzcntd (%rax){1to16}, %zmm19
vplzcntd %zmm16, %zmm19 {k1}
vplzcntd (%rax), %zmm19 {k1}
vplzcntd (%rax){1to16}, %zmm19 {k1}
vplzcntd %zmm16, %zmm19 {z}{k1}
vplzcntd (%rax), %zmm19 {z}{k1}
vplzcntd (%rax){1to16}, %zmm19 {z}{k1}

vplzcntq %zmm16, %zmm19
vplzcntq (%rax), %zmm19
vplzcntq (%rax){1to8}, %zmm19
vplzcntq %zmm16, %zmm19 {k1}
vplzcntq (%rax), %zmm19 {k1}
vplzcntq (%rax){1to8}, %zmm19 {k1}
vplzcntq %zmm16, %zmm19 {z}{k1}
vplzcntq (%rax), %zmm19 {z}{k1}
vplzcntq (%rax){1to8}, %zmm19 {z}{k1}

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 vpbroadcastmb2q %k0, %zmm16
# CHECK-NEXT: 1 1 0.50 vpbroadcastmw2d %k0, %zmm16
# CHECK-NEXT: 4 6 1.50 vpconflictd %zmm16, %zmm19
# CHECK-NEXT: 1 8 0.50 * vpconflictd (%rax), %zmm19
# CHECK-NEXT: 1 8 0.50 * vpconflictd (%rax){1to16}, %zmm19
# CHECK-NEXT: 4 6 1.50 vpconflictd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 8 0.50 * vpconflictd (%rax), %zmm19 {%k1}
# CHECK-NEXT: 1 8 0.50 * vpconflictd (%rax){1to16}, %zmm19 {%k1}
# CHECK-NEXT: 4 6 1.50 vpconflictd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpconflictd (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpconflictd (%rax){1to16}, %zmm19 {%k1} {z}
# CHECK-NEXT: 4 6 1.50 vpconflictq %zmm16, %zmm19
# CHECK-NEXT: 1 8 0.50 * vpconflictq (%rax), %zmm19
# CHECK-NEXT: 1 8 0.50 * vpconflictq (%rax){1to8}, %zmm19
# CHECK-NEXT: 4 6 1.50 vpconflictq %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 8 0.50 * vpconflictq (%rax), %zmm19 {%k1}
# CHECK-NEXT: 1 8 0.50 * vpconflictq (%rax){1to8}, %zmm19 {%k1}
# CHECK-NEXT: 4 6 1.50 vpconflictq %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpconflictq (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpconflictq (%rax){1to8}, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 2 1.00 vplzcntd %zmm16, %zmm19
# CHECK-NEXT: 1 10 1.00 * vplzcntd (%rax), %zmm19
# CHECK-NEXT: 1 10 1.00 * vplzcntd (%rax){1to16}, %zmm19
# CHECK-NEXT: 1 2 1.00 vplzcntd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 10 1.00 * vplzcntd (%rax), %zmm19 {%k1}
# CHECK-NEXT: 1 10 1.00 * vplzcntd (%rax){1to16}, %zmm19 {%k1}
# CHECK-NEXT: 1 2 1.00 vplzcntd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vplzcntd (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vplzcntd (%rax){1to16}, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 2 1.00 vplzcntq %zmm16, %zmm19
# CHECK-NEXT: 1 10 1.00 * vplzcntq (%rax), %zmm19
# CHECK-NEXT: 1 10 1.00 * vplzcntq (%rax){1to8}, %zmm19
# CHECK-NEXT: 1 2 1.00 vplzcntq %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 10 1.00 * vplzcntq (%rax), %zmm19 {%k1}
# CHECK-NEXT: 1 10 1.00 * vplzcntq (%rax){1to8}, %zmm19 {%k1}
# CHECK-NEXT: 1 2 1.00 vplzcntq %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vplzcntq (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vplzcntq (%rax){1to8}, %zmm19 {%k1} {z}

# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
# CHECK-NEXT: [1] - Zn4AGU1
# CHECK-NEXT: [2] - Zn4AGU2
# CHECK-NEXT: [3] - Zn4ALU0
# CHECK-NEXT: [4] - Zn4ALU1
# CHECK-NEXT: [5] - Zn4ALU2
# CHECK-NEXT: [6] - Zn4ALU3
# CHECK-NEXT: [7] - Zn4BRU1
# CHECK-NEXT: [8] - Zn4FP0
# CHECK-NEXT: [9] - Zn4FP1
# CHECK-NEXT: [10] - Zn4FP2
# CHECK-NEXT: [11] - Zn4FP3
# CHECK-NEXT: [12.0] - Zn4FP45
# CHECK-NEXT: [12.1] - Zn4FP45
# CHECK-NEXT: [13] - Zn4FPSt
# CHECK-NEXT: [14.0] - Zn4LSU
# CHECK-NEXT: [14.1] - Zn4LSU
# CHECK-NEXT: [14.2] - Zn4LSU
# CHECK-NEXT: [15.0] - Zn4Load
# CHECK-NEXT: [15.1] - Zn4Load
# CHECK-NEXT: [15.2] - Zn4Load
# CHECK-NEXT: [16.0] - Zn4Store
# CHECK-NEXT: [16.1] - Zn4Store

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
# CHECK-NEXT: - - - - - - - - 30.00 25.00 19.00 24.00 12.00 12.00 - 8.00 8.00 8.00 8.00 8.00 8.00 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpbroadcastmb2q %k0, %zmm16
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpbroadcastmw2d %k0, %zmm16
# CHECK-NEXT: - - - - - - - - 1.00 2.00 2.00 1.00 - - - - - - - - - - - vpconflictd %zmm16, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpconflictd (%rax), %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpconflictd (%rax){1to16}, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 2.00 2.00 1.00 - - - - - - - - - - - vpconflictd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpconflictd (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpconflictd (%rax){1to16}, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 2.00 2.00 1.00 - - - - - - - - - - - vpconflictd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpconflictd (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpconflictd (%rax){1to16}, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 2.00 2.00 1.00 - - - - - - - - - - - vpconflictq %zmm16, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpconflictq (%rax), %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpconflictq (%rax){1to8}, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 2.00 2.00 1.00 - - - - - - - - - - - vpconflictq %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpconflictq (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpconflictq (%rax){1to8}, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 2.00 2.00 1.00 - - - - - - - - - - - vpconflictq %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpconflictq (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpconflictq (%rax){1to8}, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vplzcntd %zmm16, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vplzcntd (%rax), %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vplzcntd (%rax){1to16}, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vplzcntd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vplzcntd (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vplzcntd (%rax){1to16}, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vplzcntd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vplzcntd (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vplzcntd (%rax){1to16}, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vplzcntq %zmm16, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vplzcntq (%rax), %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vplzcntq (%rax){1to8}, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vplzcntq %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vplzcntq (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vplzcntq (%rax){1to8}, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vplzcntq %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vplzcntq (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vplzcntq (%rax){1to8}, %zmm19 {%k1} {z}
282 changes: 282 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512cdvl.s

Large diffs are not rendered by default.

1,277 changes: 1,277 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512dq.s

Large diffs are not rendered by default.

1,677 changes: 1,677 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512dqvl.s

Large diffs are not rendered by default.

119 changes: 119 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512gfni.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver4 -instruction-tables < %s | FileCheck %s

vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19
vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19
vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19
vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19 {k1}
vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {k1}
vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {k1}
vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19 {z}{k1}
vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {z}{k1}
vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {z}{k1}

vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19
vgf2p8affineqb $0, (%rax), %zmm17, %zmm19
vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19
vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19
vgf2p8affineqb $0, (%rax), %zmm17, %zmm19
vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19
vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19 {z}{k1}
vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 {z}{k1}
vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 {z}{k1}

vgf2p8mulb %zmm16, %zmm17, %zmm19
vgf2p8mulb (%rax), %zmm17, %zmm19
vgf2p8mulb %zmm16, %zmm17, %zmm19 {k1}
vgf2p8mulb (%rax), %zmm17, %zmm19 {k1}
vgf2p8mulb %zmm16, %zmm17, %zmm19 {z}{k1}
vgf2p8mulb (%rax), %zmm17, %zmm19 {z}{k1}

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 10 1.00 * vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 10 1.00 * vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19
# CHECK-NEXT: 1 3 1.00 vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: 1 10 1.00 * vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {%k1}
# CHECK-NEXT: 1 10 1.00 * vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: 1 3 1.00 vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 3 1.00 vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 10 1.00 * vgf2p8affineqb $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 10 1.00 * vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19
# CHECK-NEXT: 1 3 1.00 vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 10 1.00 * vgf2p8affineqb $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 10 1.00 * vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19
# CHECK-NEXT: 1 3 1.00 vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 8 0.50 * vgf2p8mulb (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %zmm16, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: 1 8 0.50 * vgf2p8mulb (%rax), %zmm17, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vgf2p8mulb (%rax), %zmm17, %zmm19 {%k1} {z}

# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
# CHECK-NEXT: [1] - Zn4AGU1
# CHECK-NEXT: [2] - Zn4AGU2
# CHECK-NEXT: [3] - Zn4ALU0
# CHECK-NEXT: [4] - Zn4ALU1
# CHECK-NEXT: [5] - Zn4ALU2
# CHECK-NEXT: [6] - Zn4ALU3
# CHECK-NEXT: [7] - Zn4BRU1
# CHECK-NEXT: [8] - Zn4FP0
# CHECK-NEXT: [9] - Zn4FP1
# CHECK-NEXT: [10] - Zn4FP2
# CHECK-NEXT: [11] - Zn4FP3
# CHECK-NEXT: [12.0] - Zn4FP45
# CHECK-NEXT: [12.1] - Zn4FP45
# CHECK-NEXT: [13] - Zn4FPSt
# CHECK-NEXT: [14.0] - Zn4LSU
# CHECK-NEXT: [14.1] - Zn4LSU
# CHECK-NEXT: [14.2] - Zn4LSU
# CHECK-NEXT: [15.0] - Zn4Load
# CHECK-NEXT: [15.1] - Zn4Load
# CHECK-NEXT: [15.2] - Zn4Load
# CHECK-NEXT: [16.0] - Zn4Store
# CHECK-NEXT: [16.1] - Zn4Store

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
# CHECK-NEXT: - - - - - - - - 21.00 3.00 3.00 21.00 7.50 7.50 - 5.00 5.00 5.00 5.00 5.00 5.00 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 - - - - - - - - - - - vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 - - - - - - - - - - - vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 - - - - - - - - - - - vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 - - - - - - - - - - - vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 - - - - - - - - - - - vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 - - - - - - - - - - - vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8mulb (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %zmm16, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8mulb (%rax), %zmm17, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgf2p8mulb (%rax), %zmm17, %zmm19 {%k1} {z}
194 changes: 194 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512gfnivl.s

Large diffs are not rendered by default.

100 changes: 100 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512ifma.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver4 -instruction-tables < %s | FileCheck %s

vpmadd52huq %zmm16, %zmm17, %zmm19
vpmadd52huq (%rdi), %zmm17, %zmm19
vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19
vpmadd52huq %zmm16, %zmm17, %zmm19 {%k1}
vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1}
vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1}
vpmadd52huq %zmm16, %zmm17, %zmm19 {%k1} {z}
vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1} {z}
vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z}

vpmadd52luq %zmm16, %zmm17, %zmm19
vpmadd52luq (%rdi), %zmm17, %zmm19
vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19
vpmadd52luq %zmm16, %zmm17, %zmm19 {%k1}
vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1}
vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1}
vpmadd52luq %zmm16, %zmm17, %zmm19 {%k1} {z}
vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1} {z}
vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z}

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 4 2.00 vpmadd52huq %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 10 1.00 * vpmadd52huq (%rdi), %zmm17, %zmm19
# CHECK-NEXT: 1 10 1.00 * vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19
# CHECK-NEXT: 1 4 2.00 vpmadd52huq %zmm16, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: 1 10 1.00 * vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1}
# CHECK-NEXT: 1 10 1.00 * vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: 1 4 2.00 vpmadd52huq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 4 2.00 vpmadd52luq %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 10 1.00 * vpmadd52luq (%rdi), %zmm17, %zmm19
# CHECK-NEXT: 1 10 1.00 * vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19
# CHECK-NEXT: 1 4 2.00 vpmadd52luq %zmm16, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: 1 10 1.00 * vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1}
# CHECK-NEXT: 1 10 1.00 * vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: 1 4 2.00 vpmadd52luq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z}

# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
# CHECK-NEXT: [1] - Zn4AGU1
# CHECK-NEXT: [2] - Zn4AGU2
# CHECK-NEXT: [3] - Zn4ALU0
# CHECK-NEXT: [4] - Zn4ALU1
# CHECK-NEXT: [5] - Zn4ALU2
# CHECK-NEXT: [6] - Zn4ALU3
# CHECK-NEXT: [7] - Zn4BRU1
# CHECK-NEXT: [8] - Zn4FP0
# CHECK-NEXT: [9] - Zn4FP1
# CHECK-NEXT: [10] - Zn4FP2
# CHECK-NEXT: [11] - Zn4FP3
# CHECK-NEXT: [12.0] - Zn4FP45
# CHECK-NEXT: [12.1] - Zn4FP45
# CHECK-NEXT: [13] - Zn4FPSt
# CHECK-NEXT: [14.0] - Zn4LSU
# CHECK-NEXT: [14.1] - Zn4LSU
# CHECK-NEXT: [14.2] - Zn4LSU
# CHECK-NEXT: [15.0] - Zn4Load
# CHECK-NEXT: [15.1] - Zn4Load
# CHECK-NEXT: [15.2] - Zn4Load
# CHECK-NEXT: [16.0] - Zn4Store
# CHECK-NEXT: [16.1] - Zn4Store

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
# CHECK-NEXT: - - - - - - - - 24.00 12.00 - 12.00 6.00 6.00 - 4.00 4.00 4.00 4.00 4.00 4.00 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52huq %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi), %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52huq %zmm16, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52huq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52luq %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi), %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52luq %zmm16, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52luq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 1.00 - - 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z}
156 changes: 156 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512ifmavl.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver4 -instruction-tables < %s | FileCheck %s

vpmadd52huq %xmm16, %xmm17, %xmm19
vpmadd52huq (%rdi), %xmm17, %xmm19
vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19
vpmadd52huq %xmm16, %xmm17, %xmm19 {%k1}
vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1}
vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1}
vpmadd52huq %xmm16, %xmm17, %xmm19 {%k1} {z}
vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1} {z}
vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z}

vpmadd52huq %ymm16, %ymm17, %ymm19
vpmadd52huq (%rdi), %ymm17, %ymm19
vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19
vpmadd52huq %ymm16, %ymm17, %ymm19 {%k1}
vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1}
vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1}
vpmadd52huq %ymm16, %ymm17, %ymm19 {%k1} {z}
vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1} {z}
vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z}

vpmadd52luq %xmm16, %xmm17, %xmm19
vpmadd52luq (%rdi), %xmm17, %xmm19
vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19
vpmadd52luq %xmm16, %xmm17, %xmm19 {%k1}
vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1}
vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1}
vpmadd52luq %xmm16, %xmm17, %xmm19 {%k1} {z}
vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1} {z}
vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z}

vpmadd52luq %ymm16, %ymm17, %ymm19
vpmadd52luq (%rdi), %ymm17, %ymm19
vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19
vpmadd52luq %ymm16, %ymm17, %ymm19 {%k1}
vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1}
vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1}
vpmadd52luq %ymm16, %ymm17, %ymm19 {%k1} {z}
vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1} {z}
vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z}

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 4 2.00 vpmadd52huq %xmm16, %xmm17, %xmm19
# CHECK-NEXT: 1 10 0.50 * vpmadd52huq (%rdi), %xmm17, %xmm19
# CHECK-NEXT: 1 10 0.50 * vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19
# CHECK-NEXT: 1 4 2.00 vpmadd52huq %xmm16, %xmm17, %xmm19 {%k1}
# CHECK-NEXT: 1 10 0.50 * vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1}
# CHECK-NEXT: 1 10 0.50 * vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1}
# CHECK-NEXT: 1 4 2.00 vpmadd52huq %xmm16, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: 1 10 0.50 * vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: 1 10 0.50 * vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: 1 4 2.00 vpmadd52huq %ymm16, %ymm17, %ymm19
# CHECK-NEXT: 1 10 0.50 * vpmadd52huq (%rdi), %ymm17, %ymm19
# CHECK-NEXT: 1 10 0.50 * vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19
# CHECK-NEXT: 1 4 2.00 vpmadd52huq %ymm16, %ymm17, %ymm19 {%k1}
# CHECK-NEXT: 1 10 0.50 * vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1}
# CHECK-NEXT: 1 10 0.50 * vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1}
# CHECK-NEXT: 1 4 2.00 vpmadd52huq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 10 0.50 * vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 10 0.50 * vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 4 2.00 vpmadd52luq %xmm16, %xmm17, %xmm19
# CHECK-NEXT: 1 10 0.50 * vpmadd52luq (%rdi), %xmm17, %xmm19
# CHECK-NEXT: 1 10 0.50 * vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19
# CHECK-NEXT: 1 4 2.00 vpmadd52luq %xmm16, %xmm17, %xmm19 {%k1}
# CHECK-NEXT: 1 10 0.50 * vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1}
# CHECK-NEXT: 1 10 0.50 * vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1}
# CHECK-NEXT: 1 4 2.00 vpmadd52luq %xmm16, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: 1 10 0.50 * vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: 1 10 0.50 * vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: 1 4 2.00 vpmadd52luq %ymm16, %ymm17, %ymm19
# CHECK-NEXT: 1 10 0.50 * vpmadd52luq (%rdi), %ymm17, %ymm19
# CHECK-NEXT: 1 10 0.50 * vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19
# CHECK-NEXT: 1 4 2.00 vpmadd52luq %ymm16, %ymm17, %ymm19 {%k1}
# CHECK-NEXT: 1 10 0.50 * vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1}
# CHECK-NEXT: 1 10 0.50 * vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1}
# CHECK-NEXT: 1 4 2.00 vpmadd52luq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 10 0.50 * vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 10 0.50 * vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z}

# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
# CHECK-NEXT: [1] - Zn4AGU1
# CHECK-NEXT: [2] - Zn4AGU2
# CHECK-NEXT: [3] - Zn4ALU0
# CHECK-NEXT: [4] - Zn4ALU1
# CHECK-NEXT: [5] - Zn4ALU2
# CHECK-NEXT: [6] - Zn4ALU3
# CHECK-NEXT: [7] - Zn4BRU1
# CHECK-NEXT: [8] - Zn4FP0
# CHECK-NEXT: [9] - Zn4FP1
# CHECK-NEXT: [10] - Zn4FP2
# CHECK-NEXT: [11] - Zn4FP3
# CHECK-NEXT: [12.0] - Zn4FP45
# CHECK-NEXT: [12.1] - Zn4FP45
# CHECK-NEXT: [13] - Zn4FPSt
# CHECK-NEXT: [14.0] - Zn4LSU
# CHECK-NEXT: [14.1] - Zn4LSU
# CHECK-NEXT: [14.2] - Zn4LSU
# CHECK-NEXT: [15.0] - Zn4Load
# CHECK-NEXT: [15.1] - Zn4Load
# CHECK-NEXT: [15.2] - Zn4Load
# CHECK-NEXT: [16.0] - Zn4Store
# CHECK-NEXT: [16.1] - Zn4Store

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
# CHECK-NEXT: - - - - - - - - 36.00 24.00 - 12.00 12.00 12.00 - 8.00 8.00 8.00 8.00 8.00 8.00 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52huq %xmm16, %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi), %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52huq %xmm16, %xmm17, %xmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52huq %xmm16, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52huq %ymm16, %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi), %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52huq %ymm16, %ymm17, %ymm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1}
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52huq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52luq %xmm16, %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi), %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52luq %xmm16, %xmm17, %xmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1}
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52luq %xmm16, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52luq %ymm16, %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi), %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52luq %ymm16, %ymm17, %ymm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1}
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpmadd52luq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z}
72 changes: 72 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vaes.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver4 -instruction-tables < %s | FileCheck %s

vaesdec %zmm16, %zmm17, %zmm19
vaesdec (%rax), %zmm17, %zmm19

vaesdeclast %zmm16, %zmm17, %zmm19
vaesdeclast (%rax), %zmm17, %zmm19

vaesenc %zmm16, %zmm17, %zmm19
vaesenc (%rax), %zmm17, %zmm19

vaesenclast %zmm16, %zmm17, %zmm19
vaesenclast (%rax), %zmm17, %zmm19

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 4 0.50 vaesdec %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 11 0.50 * vaesdec (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 4 0.50 vaesdeclast %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 11 0.50 * vaesdeclast (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 4 0.50 vaesenc %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 11 0.50 * vaesenc (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 4 0.50 vaesenclast %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 11 0.50 * vaesenclast (%rax), %zmm17, %zmm19

# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
# CHECK-NEXT: [1] - Zn4AGU1
# CHECK-NEXT: [2] - Zn4AGU2
# CHECK-NEXT: [3] - Zn4ALU0
# CHECK-NEXT: [4] - Zn4ALU1
# CHECK-NEXT: [5] - Zn4ALU2
# CHECK-NEXT: [6] - Zn4ALU3
# CHECK-NEXT: [7] - Zn4BRU1
# CHECK-NEXT: [8] - Zn4FP0
# CHECK-NEXT: [9] - Zn4FP1
# CHECK-NEXT: [10] - Zn4FP2
# CHECK-NEXT: [11] - Zn4FP3
# CHECK-NEXT: [12.0] - Zn4FP45
# CHECK-NEXT: [12.1] - Zn4FP45
# CHECK-NEXT: [13] - Zn4FPSt
# CHECK-NEXT: [14.0] - Zn4LSU
# CHECK-NEXT: [14.1] - Zn4LSU
# CHECK-NEXT: [14.2] - Zn4LSU
# CHECK-NEXT: [15.0] - Zn4Load
# CHECK-NEXT: [15.1] - Zn4Load
# CHECK-NEXT: [15.2] - Zn4Load
# CHECK-NEXT: [16.0] - Zn4Store
# CHECK-NEXT: [16.1] - Zn4Store

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 2.00 2.00 - 1.33 1.33 1.33 1.33 1.33 1.33 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesdec %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesdec (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesdeclast %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesdeclast (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesenc %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesenc (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesenclast %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesenclast (%rax), %zmm17, %zmm19
100 changes: 100 additions & 0 deletions llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vaesvl.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver4 -instruction-tables < %s | FileCheck %s

vaesdec %xmm16, %xmm17, %xmm19
vaesdec (%rax), %xmm17, %xmm19

vaesdec %ymm16, %ymm17, %ymm19
vaesdec (%rax), %ymm17, %ymm19

vaesdeclast %xmm16, %xmm17, %xmm19
vaesdeclast (%rax), %xmm17, %xmm19

vaesdeclast %ymm16, %ymm17, %ymm19
vaesdeclast (%rax), %ymm17, %ymm19

vaesenc %xmm16, %xmm17, %xmm19
vaesenc (%rax), %xmm17, %xmm19

vaesenc %ymm16, %ymm17, %ymm19
vaesenc (%rax), %ymm17, %ymm19

vaesenclast %xmm16, %xmm17, %xmm19
vaesenclast (%rax), %xmm17, %xmm19

vaesenclast %ymm16, %ymm17, %ymm19
vaesenclast (%rax), %ymm17, %ymm19

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 4 0.50 vaesdec %xmm16, %xmm17, %xmm19
# CHECK-NEXT: 1 11 0.50 * vaesdec (%rax), %xmm17, %xmm19
# CHECK-NEXT: 1 4 0.50 vaesdec %ymm16, %ymm17, %ymm19
# CHECK-NEXT: 1 11 0.50 * vaesdec (%rax), %ymm17, %ymm19
# CHECK-NEXT: 1 4 0.50 vaesdeclast %xmm16, %xmm17, %xmm19
# CHECK-NEXT: 1 11 0.50 * vaesdeclast (%rax), %xmm17, %xmm19
# CHECK-NEXT: 1 4 0.50 vaesdeclast %ymm16, %ymm17, %ymm19
# CHECK-NEXT: 1 11 0.50 * vaesdeclast (%rax), %ymm17, %ymm19
# CHECK-NEXT: 1 4 0.50 vaesenc %xmm16, %xmm17, %xmm19
# CHECK-NEXT: 1 11 0.50 * vaesenc (%rax), %xmm17, %xmm19
# CHECK-NEXT: 1 4 0.50 vaesenc %ymm16, %ymm17, %ymm19
# CHECK-NEXT: 1 11 0.50 * vaesenc (%rax), %ymm17, %ymm19
# CHECK-NEXT: 1 4 0.50 vaesenclast %xmm16, %xmm17, %xmm19
# CHECK-NEXT: 1 11 0.50 * vaesenclast (%rax), %xmm17, %xmm19
# CHECK-NEXT: 1 4 0.50 vaesenclast %ymm16, %ymm17, %ymm19
# CHECK-NEXT: 1 11 0.50 * vaesenclast (%rax), %ymm17, %ymm19

# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
# CHECK-NEXT: [1] - Zn4AGU1
# CHECK-NEXT: [2] - Zn4AGU2
# CHECK-NEXT: [3] - Zn4ALU0
# CHECK-NEXT: [4] - Zn4ALU1
# CHECK-NEXT: [5] - Zn4ALU2
# CHECK-NEXT: [6] - Zn4ALU3
# CHECK-NEXT: [7] - Zn4BRU1
# CHECK-NEXT: [8] - Zn4FP0
# CHECK-NEXT: [9] - Zn4FP1
# CHECK-NEXT: [10] - Zn4FP2
# CHECK-NEXT: [11] - Zn4FP3
# CHECK-NEXT: [12.0] - Zn4FP45
# CHECK-NEXT: [12.1] - Zn4FP45
# CHECK-NEXT: [13] - Zn4FPSt
# CHECK-NEXT: [14.0] - Zn4LSU
# CHECK-NEXT: [14.1] - Zn4LSU
# CHECK-NEXT: [14.2] - Zn4LSU
# CHECK-NEXT: [15.0] - Zn4Load
# CHECK-NEXT: [15.1] - Zn4Load
# CHECK-NEXT: [15.2] - Zn4Load
# CHECK-NEXT: [16.0] - Zn4Store
# CHECK-NEXT: [16.1] - Zn4Store

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
# CHECK-NEXT: - - - - - - - - 8.00 8.00 - - 4.00 4.00 - 2.67 2.67 2.67 2.67 2.67 2.67 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesdec %xmm16, %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesdec (%rax), %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesdec %ymm16, %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesdec (%rax), %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesdeclast %xmm16, %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesdeclast (%rax), %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesdeclast %ymm16, %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesdeclast (%rax), %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesenc %xmm16, %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesenc (%rax), %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesenc %ymm16, %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesenc (%rax), %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesenclast %xmm16, %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesenclast (%rax), %xmm17, %xmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesenclast %ymm16, %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesenclast (%rax), %ymm17, %ymm19
Loading