66 changes: 31 additions & 35 deletions llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s

vmovaps (%rsi), %xmm0
Expand All @@ -9,34 +10,31 @@ vmovaps %xmm0, 32(%rdi)
vmovaps 48(%rsi), %xmm0
vmovaps %xmm0, 48(%rdi)


# CHECK: Iterations: 100
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
# CHECK-NEXT: Total Cycles: 408
# CHECK-NEXT: Dispatch Width: 2
# CHECK-NEXT: IPC: 1.96


# CHECK: Instruction Info:
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 5 1.00 * vmovaps (%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi)
# CHECK-NEXT: 1 5 1.00 * vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: 1 5 1.00 * vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: 1 5 1.00 * vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi)


# CHECK: Resources:
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 5 1.00 * vmovaps (%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi)
# CHECK-NEXT: 1 5 1.00 * vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: 1 5 1.00 * vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: 1 5 1.00 * vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi)

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
# CHECK-NEXT: [2] - JDiv
Expand All @@ -52,12 +50,11 @@ vmovaps %xmm0, 48(%rdi)
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 2.00 2.00 3.97 4.03 4.00 - 4.00 4.00 - - -

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 2.00 2.00 3.97 4.03 4.00 - 4.00 4.00 - - -

# CHECK: Resource pressure by instruction:
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - 1.00 0.98 0.02 1.00 - - - - - - vmovaps (%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, (%rdi)
Expand All @@ -68,10 +65,9 @@ vmovaps %xmm0, 48(%rdi)
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 48(%rdi)


# CHECK: Timeline view:
# CHECK-NEXT: 01
# CHECK-NEXT: Index 0123456789
# CHECK: Timeline view:
# CHECK-NEXT: 01
# CHECK-NEXT: Index 0123456789

# CHECK: [0,0] DeeeeeER .. vmovaps (%rsi), %xmm0
# CHECK-NEXT: [0,1] D=====eER .. vmovaps %xmm0, (%rdi)
Expand All @@ -82,19 +78,19 @@ vmovaps %xmm0, 48(%rdi)
# CHECK-NEXT: [0,6] . DeeeeeER. vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: [0,7] . D=====eER vmovaps %xmm0, 48(%rdi)


# CHECK: Average Wait times (based on the timeline view):
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0
# CHECK-NEXT: 1. 1 6.0 0.0 0.0 vmovaps %xmm0, (%rdi)
# CHECK-NEXT: 2. 1 1.0 1.0 0.0 vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: 3. 1 6.0 0.0 0.0 vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: 4. 1 1.0 1.0 0.0 vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: 5. 1 6.0 0.0 0.0 vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: 6. 1 1.0 1.0 0.0 vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: 7. 1 6.0 0.0 0.0 vmovaps %xmm0, 48(%rdi)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0
# CHECK-NEXT: 1. 1 6.0 0.0 0.0 vmovaps %xmm0, (%rdi)
# CHECK-NEXT: 2. 1 1.0 1.0 0.0 vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: 3. 1 6.0 0.0 0.0 vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: 4. 1 1.0 1.0 0.0 vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: 5. 1 6.0 0.0 0.0 vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: 6. 1 1.0 1.0 0.0 vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: 7. 1 6.0 0.0 0.0 vmovaps %xmm0, 48(%rdi)

3 changes: 2 additions & 1 deletion llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=2 < %s | FileCheck %s

# VALU0/VALU1
Expand Down Expand Up @@ -57,7 +58,7 @@ vsqrtps %ymm0, %ymm2
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 3.00 63.00 6.01 5.99 - - - 1.00 1.00 1.00 3.00

# CHECK: Resource pressure by instruction:
Expand Down
1 change: 1 addition & 0 deletions llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -retire-stats -iterations=1 < %s | FileCheck %s

vsqrtps %xmm0, %xmm2
Expand Down
13 changes: 6 additions & 7 deletions llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s

# The vmul can start executing 3cy in advance. That is beause the first use
# operand (i.e. %xmm1) is a ReadAfterLd. That means, the memory operand is
# evaluated before %xmm1.


vaddps %xmm0, %xmm0, %xmm1
vmulps (%rdi), %xmm1, %xmm2


# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 10
# CHECK-NEXT: Dispatch Width: 2

# CHECK-NEXT: IPC: 0.20

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Expand All @@ -27,20 +26,20 @@ vmulps (%rdi), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm0, %xmm1
# CHECK-NEXT: 1 7 1.00 * vmulps (%rdi), %xmm1, %xmm2


# CHECK: Timeline view:

# CHECK: Index 0123456789

# CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [0,1] DeeeeeeeER vmulps (%rdi), %xmm1, %xmm2


# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1
# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vmulps (%rdi), %xmm1, %xmm2
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1
# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vmulps (%rdi), %xmm1, %xmm2

24 changes: 21 additions & 3 deletions llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=0 -timeline < %s | FileCheck %s

imull %esi
imull (%rdi)


# The second integer multiply can start at cycle 2 because the implicit reads
# can start after the load operand is evaluated.

# CHECK: Instruction Info:
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 10
# CHECK-NEXT: Dispatch Width: 2
# CHECK-NEXT: IPC: 0.20

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
Expand All @@ -19,8 +25,20 @@
# CHECK-NEXT: 2 3 1.00 imull %esi
# CHECK-NEXT: 2 6 1.00 * imull (%rdi)


# CHECK: Timeline view:

# CHECK: Index 0123456789

# CHECK: [0,0] DeeeER . imull %esi
# CHECK-NEXT: [0,1] .DeeeeeeER imull (%rdi)

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %esi
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 imull (%rdi)

1 change: 1 addition & 0 deletions llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=0 -timeline -dispatch=3 < %s | FileCheck %s

add %rdi, %rsi
Expand Down
54 changes: 48 additions & 6 deletions llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=5 -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s

vaddps %xmm0, %xmm0, %xmm0
vmulps %xmm0, %xmm0, %xmm0

# CHECK: Iterations: 5
# CHECK: Iterations: 5
# CHECK-NEXT: Instructions: 10

# CHECK-NEXT: Total Cycles: 28
# CHECK-NEXT: Dispatch Width: 2
# CHECK-NEXT: IPC: 0.36

# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
Expand All @@ -15,10 +18,14 @@ vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0

# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 23 (82.1%)
# CHECK-NEXT: 2, 5 (17.9%)

# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 10
# CHECK-NEXT: Max number of mappings used: 10
# CHECK-NEXT: Total number of mappings created: 10
# CHECK-NEXT: Max number of mappings used: 10

# CHECK: * Register File #1 -- FpuPRF:
# CHECK-NEXT: Number of physical registers: 72
Expand All @@ -30,9 +37,33 @@ vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: Total number of mappings created: 0
# CHECK-NEXT: Max number of mappings used: 0

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
# CHECK-NEXT: [2] - JDiv
# CHECK-NEXT: [3] - JFPA
# CHECK-NEXT: [4] - JFPM
# CHECK-NEXT: [5] - JFPU0
# CHECK-NEXT: [6] - JFPU1
# CHECK-NEXT: [7] - JLAGU
# CHECK-NEXT: [8] - JMul
# CHECK-NEXT: [9] - JSAGU
# CHECK-NEXT: [10] - JSTC
# CHECK-NEXT: [11] - JVALU0
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vmulps %xmm0, %xmm0, %xmm0

# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 01234567

# CHECK: [0,0] DeeeER . . . . . vaddps %xmm0, %xmm0, %xmm0
Expand All @@ -45,3 +76,14 @@ vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [3,1] . D===============eeER . . vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [4,0] . D================eeeER . vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [4,1] . D===================eeER vmulps %xmm0, %xmm0, %xmm0

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 5 9.0 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: 1. 5 12.0 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0

56 changes: 50 additions & 6 deletions llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
Original file line number Diff line number Diff line change
@@ -1,24 +1,32 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -register-file-size=5 -iterations=5 -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s

vaddps %xmm0, %xmm0, %xmm0
vmulps %xmm0, %xmm0, %xmm0

# CHECK: Iterations: 5
# CHECK: Iterations: 5
# CHECK-NEXT: Instructions: 10
# CHECK-NEXT: Total Cycles: 28
# CHECK-NEXT: Dispatch Width: 2
# CHECK-NEXT: IPC: 0.36


# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 13
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0

# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 20 (71.4%)
# CHECK-NEXT: 2, 2 (7.1%)
# CHECK-NEXT: 1, 6 (21.4%)

# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 10
# CHECK-NEXT: Max number of mappings used: 5
# CHECK-NEXT: Total number of mappings created: 10
# CHECK-NEXT: Max number of mappings used: 5

# CHECK: * Register File #1 -- FpuPRF:
# CHECK-NEXT: Number of physical registers: 72
Expand All @@ -30,10 +38,35 @@ vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: Total number of mappings created: 0
# CHECK-NEXT: Max number of mappings used: 0

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
# CHECK-NEXT: [2] - JDiv
# CHECK-NEXT: [3] - JFPA
# CHECK-NEXT: [4] - JFPM
# CHECK-NEXT: [5] - JFPU0
# CHECK-NEXT: [6] - JFPU1
# CHECK-NEXT: [7] - JLAGU
# CHECK-NEXT: [8] - JMul
# CHECK-NEXT: [9] - JSAGU
# CHECK-NEXT: [10] - JSTC
# CHECK-NEXT: [11] - JVALU0
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vmulps %xmm0, %xmm0, %xmm0

# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 01234567

# CHECK: [0,0] DeeeER . . . . . vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [0,1] D===eeER . . . . . vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [1,0] .D====eeeER . . . . vaddps %xmm0, %xmm0, %xmm0
Expand All @@ -44,3 +77,14 @@ vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [3,1] . . D========eeER . . vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [4,0] . . . D========eeeER . vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [4,1] . . . D========eeER vmulps %xmm0, %xmm0, %xmm0

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 5 6.6 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: 1. 5 7.8 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0

50 changes: 45 additions & 5 deletions llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -register-file-size=5 -iterations=2 -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s

idiv %eax

# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 55
# CHECK-NEXT: Dispatch Width: 2
# CHECK-NEXT: IPC: 0.04

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Expand All @@ -16,7 +20,6 @@ idiv %eax
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 25 25.00 * idivl %eax


# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 26
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
Expand All @@ -25,10 +28,14 @@ idiv %eax
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0

# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 53 (96.4%)
# CHECK-NEXT: 1, 2 (3.6%)

# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 6
# CHECK-NEXT: Max number of mappings used: 3
# CHECK-NEXT: Total number of mappings created: 6
# CHECK-NEXT: Max number of mappings used: 3

# CHECK: * Register File #1 -- FpuPRF:
# CHECK-NEXT: Number of physical registers: 72
Expand All @@ -40,10 +47,43 @@ idiv %eax
# CHECK-NEXT: Total number of mappings created: 6
# CHECK-NEXT: Max number of mappings used: 3

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
# CHECK-NEXT: [2] - JDiv
# CHECK-NEXT: [3] - JFPA
# CHECK-NEXT: [4] - JFPM
# CHECK-NEXT: [5] - JFPU0
# CHECK-NEXT: [6] - JFPU1
# CHECK-NEXT: [7] - JLAGU
# CHECK-NEXT: [8] - JMul
# CHECK-NEXT: [9] - JSAGU
# CHECK-NEXT: [10] - JSTC
# CHECK-NEXT: [11] - JVALU0
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - 1.00 25.00 - - - - - - - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - 1.00 25.00 - - - - - - - - - - - idivl %eax

# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789 01234
# CHECK-NEXT: Index 0123456789 0123456789 0123456789
# CHECK-NEXT: 0123456789 0123456789 01234
# CHECK-NEXT: Index 0123456789 0123456789 0123456789

# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . idivl %eax
# CHECK-NEXT: [1,0] . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeER idivl %eax

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 1.0 1.0 0.0 idivl %eax

27 changes: 22 additions & 5 deletions llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=22 -dispatch-stats -register-file-stats -resource-pressure=false -timeline -timeline-max-iterations=3 < %s | FileCheck %s

idiv %eax

# CHECK: Iterations: 22
# CHECK-NEXT: Instructions: 22
# CHECK-NEXT: Total Cycles: 553
# CHECK-NEXT: Dispatch Width: 2
# CHECK-NEXT: IPC: 0.04

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Expand All @@ -16,7 +20,6 @@ idiv %eax
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 25 25.00 * idivl %eax


# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 6
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
Expand All @@ -25,10 +28,14 @@ idiv %eax
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0

# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 531 (96.0%)
# CHECK-NEXT: 1, 22 (4.0%)

# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 66
# CHECK-NEXT: Max number of mappings used: 63
# CHECK-NEXT: Total number of mappings created: 66
# CHECK-NEXT: Max number of mappings used: 63

# CHECK: * Register File #1 -- FpuPRF:
# CHECK-NEXT: Number of physical registers: 72
Expand All @@ -40,10 +47,20 @@ idiv %eax
# CHECK-NEXT: Total number of mappings created: 66
# CHECK-NEXT: Max number of mappings used: 63


# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789 0123456789 01234567
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789

# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . . . idivl %eax
# CHECK-NEXT: [1,0] .D========================eeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . idivl %eax
# CHECK-NEXT: [2,0] . D================================================eeeeeeeeeeeeeeeeeeeeeeeeeER idivl %eax

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 25.0 0.3 0.0 idivl %eax

56 changes: 50 additions & 6 deletions llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s

vdivps %ymm0, %ymm0, %ymm1
Expand Down Expand Up @@ -34,14 +35,12 @@
vaddps %ymm3, %ymm0, %ymm5
vaddps %ymm3, %ymm0, %ymm6


# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 33
# CHECK-NEXT: Total Cycles: 70
# CHECK-NEXT: Dispatch Width: 2
# CHECK-NEXT: IPC: 0.47


# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 8
Expand All @@ -50,10 +49,14 @@
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0

# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 37 (52.9%)
# CHECK-NEXT: 1, 33 (47.1%)

# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 66
# CHECK-NEXT: Max number of mappings used: 64
# CHECK-NEXT: Total number of mappings created: 66
# CHECK-NEXT: Max number of mappings used: 64

# CHECK: * Register File #1 -- FpuPRF:
# CHECK-NEXT: Number of physical registers: 72
Expand All @@ -65,9 +68,8 @@
# CHECK-NEXT: Total number of mappings created: 0
# CHECK-NEXT: Max number of mappings used: 0


# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789 0123456789
# CHECK-NEXT: 0123456789 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789

# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . vdivps %ymm0, %ymm0, %ymm1
Expand Down Expand Up @@ -103,3 +105,45 @@
# CHECK-NEXT: [0,30] . . . . . . D==============================eeeER . vaddps %ymm3, %ymm0, %ymm4
# CHECK-NEXT: [0,31] . . . . . . .D===============================eeeER . vaddps %ymm3, %ymm0, %ymm5
# CHECK-NEXT: [0,32] . . . . . . . . D========================eeeER vaddps %ymm3, %ymm0, %ymm6

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vdivps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 1. 1 1.0 1.0 34.0 vaddps %ymm0, %ymm0, %ymm2
# CHECK-NEXT: 2. 1 2.0 2.0 33.0 vaddps %ymm0, %ymm0, %ymm3
# CHECK-NEXT: 3. 1 3.0 3.0 31.0 vaddps %ymm0, %ymm0, %ymm4
# CHECK-NEXT: 4. 1 4.0 4.0 30.0 vaddps %ymm0, %ymm0, %ymm5
# CHECK-NEXT: 5. 1 5.0 5.0 28.0 vaddps %ymm0, %ymm0, %ymm6
# CHECK-NEXT: 6. 1 6.0 6.0 27.0 vaddps %ymm0, %ymm0, %ymm7
# CHECK-NEXT: 7. 1 7.0 7.0 25.0 vaddps %ymm0, %ymm0, %ymm8
# CHECK-NEXT: 8. 1 8.0 8.0 24.0 vaddps %ymm0, %ymm0, %ymm9
# CHECK-NEXT: 9. 1 9.0 9.0 22.0 vaddps %ymm0, %ymm0, %ymm10
# CHECK-NEXT: 10. 1 10.0 10.0 21.0 vaddps %ymm0, %ymm0, %ymm11
# CHECK-NEXT: 11. 1 11.0 11.0 19.0 vaddps %ymm0, %ymm0, %ymm12
# CHECK-NEXT: 12. 1 12.0 12.0 18.0 vaddps %ymm0, %ymm0, %ymm13
# CHECK-NEXT: 13. 1 13.0 13.0 16.0 vaddps %ymm0, %ymm0, %ymm14
# CHECK-NEXT: 14. 1 14.0 14.0 15.0 vaddps %ymm0, %ymm0, %ymm15
# CHECK-NEXT: 15. 1 15.0 15.0 13.0 vaddps %ymm2, %ymm0, %ymm0
# CHECK-NEXT: 16. 1 17.0 0.0 11.0 vaddps %ymm2, %ymm0, %ymm3
# CHECK-NEXT: 17. 1 18.0 2.0 9.0 vaddps %ymm2, %ymm0, %ymm4
# CHECK-NEXT: 18. 1 19.0 4.0 8.0 vaddps %ymm2, %ymm0, %ymm5
# CHECK-NEXT: 19. 1 20.0 6.0 6.0 vaddps %ymm2, %ymm0, %ymm6
# CHECK-NEXT: 20. 1 21.0 8.0 5.0 vaddps %ymm2, %ymm0, %ymm7
# CHECK-NEXT: 21. 1 22.0 10.0 3.0 vaddps %ymm2, %ymm0, %ymm8
# CHECK-NEXT: 22. 1 23.0 12.0 2.0 vaddps %ymm2, %ymm0, %ymm9
# CHECK-NEXT: 23. 1 24.0 14.0 0.0 vaddps %ymm2, %ymm0, %ymm10
# CHECK-NEXT: 24. 1 25.0 16.0 0.0 vaddps %ymm2, %ymm0, %ymm11
# CHECK-NEXT: 25. 1 26.0 18.0 0.0 vaddps %ymm2, %ymm0, %ymm12
# CHECK-NEXT: 26. 1 27.0 20.0 0.0 vaddps %ymm2, %ymm0, %ymm13
# CHECK-NEXT: 27. 1 28.0 22.0 0.0 vaddps %ymm2, %ymm0, %ymm14
# CHECK-NEXT: 28. 1 29.0 24.0 0.0 vaddps %ymm2, %ymm0, %ymm15
# CHECK-NEXT: 29. 1 30.0 23.0 0.0 vaddps %ymm3, %ymm0, %ymm2
# CHECK-NEXT: 30. 1 31.0 25.0 0.0 vaddps %ymm3, %ymm0, %ymm4
# CHECK-NEXT: 31. 1 32.0 27.0 0.0 vaddps %ymm3, %ymm0, %ymm5
# CHECK-NEXT: 32. 1 25.0 25.0 0.0 vaddps %ymm3, %ymm0, %ymm6

28 changes: 28 additions & 0 deletions llvm/test/tools/llvm-mca/X86/BtVer2/resources-aes.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s

aesdec %xmm0, %xmm2
Expand All @@ -18,6 +19,28 @@ aesimc (%rax), %xmm2
aeskeygenassist $22, %xmm0, %xmm2
aeskeygenassist $22, (%rax), %xmm2

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 3 1.00 aesdec %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * aesdec (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 aesdeclast %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * aesdeclast (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 aesenc %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * aesenc (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 aesenclast %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * aesenclast (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 aesimc %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * aesimc (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 aeskeygenassist $22, %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * aeskeygenassist $22, (%rax), %xmm2

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
Expand All @@ -34,6 +57,10 @@ aeskeygenassist $22, (%rax), %xmm2
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - - - 12.00 - 6.00 - - - - - 12.00

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 aesdec %xmm0, %xmm2
Expand All @@ -48,3 +75,4 @@ aeskeygenassist $22, (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 aesimc (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 aeskeygenassist $22, %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 aeskeygenassist $22, (%rax), %xmm2

8 changes: 6 additions & 2 deletions llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s

vaddpd %xmm0, %xmm1, %xmm2
Expand Down Expand Up @@ -1008,7 +1009,6 @@ vxorps (%rax), %ymm1, %ymm2
vzeroall
vzeroupper


# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
Expand Down Expand Up @@ -1702,7 +1702,6 @@ vzeroupper
# CHECK-NEXT: 73 90 - * * * vzeroall
# CHECK-NEXT: 37 46 - * * * vzeroupper


# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
Expand All @@ -1719,6 +1718,10 @@ vzeroupper
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: 48.00 2.00 - 355.50 907.50 402.00 398.00 381.00 - 43.00 114.00 116.50 116.50 40.00

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddpd %xmm0, %xmm1, %xmm2
Expand Down Expand Up @@ -2404,3 +2407,4 @@ vzeroupper
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vxorps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - - vzeroall
# CHECK-NEXT: - - - - - - - - - - - - - - vzeroupper

18 changes: 18 additions & 0 deletions llvm/test/tools/llvm-mca/X86/BtVer2/resources-clmul.s
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s

pclmulqdq $11, %xmm0, %xmm2
pclmulqdq $11, (%rax), %xmm2

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 2 1.00 pclmulqdq $11, %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * pclmulqdq $11, (%rax), %xmm2

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
Expand All @@ -19,7 +32,12 @@ pclmulqdq $11, (%rax), %xmm2
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - - - 2.00 - 1.00 - - - - - 2.00

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 pclmulqdq $11, %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 pclmulqdq $11, (%rax), %xmm2

3 changes: 2 additions & 1 deletion llvm/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 < %s | FileCheck %s

vcvtph2ps %xmm0, %xmm2
Expand Down Expand Up @@ -53,7 +54,7 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 2.00 2.00 - 12.00 2.00 - 2.00 12.00 - - -

# CHECK: Resource pressure by instruction:
Expand Down
9 changes: 6 additions & 3 deletions llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse1.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s

addps %xmm0, %xmm2
Expand Down Expand Up @@ -134,7 +135,6 @@ unpcklps (%rax), %xmm2
xorps %xmm0, %xmm2
xorps (%rax), %xmm2


# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
Expand Down Expand Up @@ -236,7 +236,6 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 xorps %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * xorps (%rax), %xmm2


# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
Expand All @@ -253,8 +252,11 @@ xorps (%rax), %xmm2
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: 13.00 - - 44.50 183.50 37.50 47.50 42.00 - 7.00 15.00 1.00 1.00 -

# CHECK: Resource pressure by instruction:
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - addps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - addps (%rax), %xmm2
Expand Down Expand Up @@ -347,3 +349,4 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - unpcklps (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - xorps %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - xorps (%rax), %xmm2

271 changes: 270 additions & 1 deletion llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s

Large diffs are not rendered by default.

35 changes: 35 additions & 0 deletions llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse3.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s

addsubpd %xmm0, %xmm2
Expand Down Expand Up @@ -29,6 +30,35 @@ movshdup (%rax), %xmm2
movsldup %xmm0, %xmm2
movsldup (%rax), %xmm2

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 addsubpd %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * addsubpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * addsubps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 haddpd %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * haddpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 haddps %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * haddps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 hsubpd %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * hsubpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 hsubps %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * hsubps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 * lddqu (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movddup %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * movddup (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movshdup %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * movshdup (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movsldup %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * movsldup (%rax), %xmm2

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
Expand All @@ -45,6 +75,10 @@ movsldup (%rax), %xmm2
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 15.00 3.00 15.50 3.50 10.00 - - - 0.50 0.50 -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - addsubpd %xmm0, %xmm2
Expand All @@ -66,3 +100,4 @@ movsldup (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - movshdup (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - movsldup %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - movsldup (%rax), %xmm2

112 changes: 112 additions & 0 deletions llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s

blendpd $11, %xmm0, %xmm2
Expand Down Expand Up @@ -145,6 +146,112 @@ roundsd $1, (%rax), %xmm2
roundss $1, %xmm0, %xmm2
roundss $1, (%rax), %xmm2

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 blendpd $11, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * blendpd $11, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 blendps $11, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * blendps $11, (%rax), %xmm2
# CHECK-NEXT: 3 2 2.00 blendvpd %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 3 7 2.00 * blendvpd %xmm0, (%rax), %xmm2
# CHECK-NEXT: 3 2 2.00 blendvps %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 3 7 2.00 * blendvps %xmm0, (%rax), %xmm2
# CHECK-NEXT: 3 9 3.00 dppd $22, %xmm0, %xmm2
# CHECK-NEXT: 3 14 3.00 * dppd $22, (%rax), %xmm2
# CHECK-NEXT: 5 11 3.00 dpps $22, %xmm0, %xmm2
# CHECK-NEXT: 5 16 3.00 * dpps $22, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 extractps $1, %xmm0, %ecx
# CHECK-NEXT: 1 3 1.00 * extractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.50 insertps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * insertps $1, (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 * movntdqa (%rax), %xmm2
# CHECK-NEXT: 1 3 2.00 mpsadbw $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 2.00 * mpsadbw $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 packusdw %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * packusdw (%rax), %xmm2
# CHECK-NEXT: 3 2 2.00 pblendvb %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 3 7 2.00 * pblendvb %xmm0, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pblendw $11, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pblendw $11, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpeqq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pcmpeqq (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 pextrb $1, %xmm0, %ecx
# CHECK-NEXT: 1 3 1.00 * pextrb $1, %xmm0, (%rax)
# CHECK-NEXT: 1 3 1.00 pextrd $1, %xmm0, %ecx
# CHECK-NEXT: 1 3 1.00 * pextrd $1, %xmm0, (%rax)
# CHECK-NEXT: 1 3 1.00 pextrq $1, %xmm0, %rcx
# CHECK-NEXT: 1 3 1.00 * pextrq $1, %xmm0, (%rax)
# CHECK-NEXT: 1 3 1.00 * pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 phminposuw %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * phminposuw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pinsrb $1, %eax, %xmm1
# CHECK-NEXT: 1 6 1.00 * pinsrb $1, (%rax), %xmm1
# CHECK-NEXT: 1 1 0.50 pinsrd $1, %eax, %xmm1
# CHECK-NEXT: 1 6 1.00 * pinsrd $1, (%rax), %xmm1
# CHECK-NEXT: 1 1 0.50 pinsrq $1, %rax, %xmm1
# CHECK-NEXT: 1 6 1.00 * pinsrq $1, (%rax), %xmm1
# CHECK-NEXT: 1 1 0.50 pmaxsb %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmaxsb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxsd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmaxsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxud %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmaxud (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxuw %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmaxuw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminsb %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pminsb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminsd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pminsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminud %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pminud (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminuw %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pminuw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovsxbd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmovsxbd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovsxbq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmovsxbq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovsxbw %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmovsxbw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovsxdq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmovsxdq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovsxwd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmovsxwd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovsxwq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmovsxwq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovzxbd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmovzxbd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovzxbq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmovzxbq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovzxbw %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmovzxbw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovzxdq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmovzxdq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovzxwd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmovzxwd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovzxwq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pmovzxwq (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 pmuldq %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * pmuldq (%rax), %xmm2
# CHECK-NEXT: 3 4 2.00 pmulld %xmm0, %xmm2
# CHECK-NEXT: 3 9 2.00 * pmulld (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 ptest %xmm0, %xmm1
# CHECK-NEXT: 1 8 1.00 * ptest (%rax), %xmm1
# CHECK-NEXT: 1 3 1.00 roundpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * roundpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 roundps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * roundps $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 roundsd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * roundsd $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 roundss $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * roundss $1, (%rax), %xmm2

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
Expand All @@ -161,6 +268,10 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: 6.00 - - 37.00 23.00 57.50 42.50 44.00 - 5.00 5.00 31.50 31.50 12.00

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - blendpd $11, %xmm0, %xmm2
Expand Down Expand Up @@ -259,3 +370,4 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundsd $1, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundss $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundss $1, (%rax), %xmm2

36 changes: 36 additions & 0 deletions llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse42.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s

crc32b %al, %ecx
Expand Down Expand Up @@ -30,6 +31,36 @@ pcmpistrm $1, (%rax), %xmm2
pcmpgtq %xmm0, %xmm2
pcmpgtq (%rax), %xmm2

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 3 3 2.00 crc32b %al, %ecx
# CHECK-NEXT: 3 6 2.00 * crc32b (%rax), %ecx
# CHECK-NEXT: 3 3 2.00 crc32l %eax, %ecx
# CHECK-NEXT: 3 6 2.00 * crc32l (%rax), %ecx
# CHECK-NEXT: 3 3 2.00 crc32w %ax, %ecx
# CHECK-NEXT: 3 6 2.00 * crc32w (%rax), %ecx
# CHECK-NEXT: 3 3 2.00 crc32b %al, %rcx
# CHECK-NEXT: 3 6 2.00 * crc32b (%rax), %rcx
# CHECK-NEXT: 3 3 2.00 crc32q %rax, %rcx
# CHECK-NEXT: 3 6 2.00 * crc32q (%rax), %rcx
# CHECK-NEXT: 9 14 5.00 pcmpestri $1, %xmm0, %xmm2
# CHECK-NEXT: 9 19 5.00 * pcmpestri $1, (%rax), %xmm2
# CHECK-NEXT: 9 14 5.00 pcmpestrm $1, %xmm0, %xmm2
# CHECK-NEXT: 9 19 5.00 * pcmpestrm $1, (%rax), %xmm2
# CHECK-NEXT: 3 7 2.00 pcmpistri $1, %xmm0, %xmm2
# CHECK-NEXT: 3 12 2.00 * pcmpistri $1, (%rax), %xmm2
# CHECK-NEXT: 3 8 2.00 pcmpistrm $1, %xmm0, %xmm2
# CHECK-NEXT: 3 13 2.00 * pcmpistrm $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpgtq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * pcmpgtq (%rax), %xmm2

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
Expand All @@ -46,6 +77,10 @@ pcmpgtq (%rax), %xmm2
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: 28.00 20.00 - 8.00 - 1.00 9.00 18.00 - 8.00 - 13.00 37.00 -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - - crc32b %al, %ecx
Expand All @@ -68,3 +103,4 @@ pcmpgtq (%rax), %xmm2
# CHECK-NEXT: 1.00 - - 1.00 - - 1.00 1.00 - - - - 2.00 - pcmpistrm $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pcmpgtq %xmm0, %xmm2
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - pcmpgtq (%rax), %xmm2

22 changes: 22 additions & 0 deletions llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse4a.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s

extrq %xmm0, %xmm2
Expand All @@ -9,6 +10,22 @@ insertq $22, $22, %xmm0, %xmm2
movntsd %xmm0, (%rax)
movntss %xmm0, (%rax)

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 extrq %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.50 extrq $22, $2, %xmm2
# CHECK-NEXT: 1 2 2.00 insertq %xmm0, %xmm2
# CHECK-NEXT: 1 2 2.00 insertq $22, $22, %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 * movntsd %xmm0, (%rax)
# CHECK-NEXT: 1 3 1.00 * movntss %xmm0, (%rax)

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
Expand All @@ -25,6 +42,10 @@ movntss %xmm0, (%rax)
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - - - 2.00 4.00 - - 2.00 2.00 5.00 5.00 -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - extrq %xmm0, %xmm2
Expand All @@ -33,3 +54,4 @@ movntss %xmm0, (%rax)
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 2.00 2.00 - insertq $22, $22, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movntsd %xmm0, (%rax)
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movntss %xmm0, (%rax)

42 changes: 42 additions & 0 deletions llvm/test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s

palignr $1, %xmm0, %xmm2
Expand Down Expand Up @@ -39,6 +40,42 @@ psignd (%rax), %xmm2
psignw %xmm0, %xmm2
psignw (%rax), %xmm2

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 palignr $1, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * palignr $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 phaddd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * phaddd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 phaddsw %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * phaddsw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 phaddw %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * phaddw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 phsubd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * phsubd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 phsubsw %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * phsubsw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 phsubw %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * phsubw (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 pmaddubsw %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 pmulhrsw %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * pmulhrsw (%rax), %xmm2
# CHECK-NEXT: 3 2 2.00 pshufb %xmm0, %xmm2
# CHECK-NEXT: 3 7 2.00 * pshufb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psignb %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * psignb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psignd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * psignd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psignw %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * psignw (%rax), %xmm2

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
Expand All @@ -55,6 +92,10 @@ psignw (%rax), %xmm2
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - - - 15.00 11.00 13.00 - - - 14.00 14.00 4.00

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - palignr $1, %xmm0, %xmm2
Expand Down Expand Up @@ -83,3 +124,4 @@ psignw (%rax), %xmm2
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - psignd (%rax), %xmm2
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - psignw %xmm0, %xmm2
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - psignw (%rax), %xmm2

3 changes: 2 additions & 1 deletion llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -scheduler-stats < %s | FileCheck %s

vmulps (%rsi), %xmm0, %xmm0
Expand Down Expand Up @@ -48,7 +49,7 @@ add %rsi, %rsi
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - 1.00 - - 1.00 - 1.00 1.00 - - - - - -

# CHECK: Resource pressure by instruction:
Expand Down
23 changes: 11 additions & 12 deletions llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s
Original file line number Diff line number Diff line change
@@ -1,27 +1,26 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=100 < %s | FileCheck %s

add %edi, %eax

# CHECK: Iterations: 100
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 100
# CHECK-NEXT: Total Cycles: 103
# CHECK-NEXT: Dispatch Width: 2
# CHECK-NEXT: IPC: 0.97


# CHECK: Instruction Info:
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 addl %edi, %eax

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 addl %edi, %eax

# CHECK-LABEL: Resources:
# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
# CHECK-NEXT: [2] - JDiv
Expand All @@ -37,11 +36,11 @@ add %edi, %eax
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL


# CHECK: Resource pressure per iteration:
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - addl %edi, %eax
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - addl %edi, %eax

21 changes: 20 additions & 1 deletion llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s

vaddps %xmm0, %xmm0, %xmm1
vandps (%rdi), %xmm1, %xmm2

# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 9
# CHECK-NEXT: Dispatch Width: 2
# CHECK-NEXT: IPC: 0.22

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
Expand All @@ -15,8 +22,20 @@ vandps (%rdi), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm0, %xmm1
# CHECK-NEXT: 1 6 1.00 * vandps (%rdi), %xmm1, %xmm2


# CHECK: Timeline view:

# CHECK: Index 012345678

# CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [0,1] DeeeeeeER vandps (%rdi), %xmm1, %xmm2

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1
# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vandps (%rdi), %xmm1, %xmm2

21 changes: 20 additions & 1 deletion llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s

vaddps %ymm0, %ymm0, %ymm1
vandps (%rdi), %ymm1, %ymm2

# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 10
# CHECK-NEXT: Dispatch Width: 2
# CHECK-NEXT: IPC: 0.20

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
Expand All @@ -15,8 +22,20 @@ vandps (%rdi), %ymm1, %ymm2
# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 2 6 2.00 * vandps (%rdi), %ymm1, %ymm2


# CHECK: Timeline view:

# CHECK: Index 0123456789

# CHECK: [0,0] DeeeER . vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [0,1] .DeeeeeeER vandps (%rdi), %ymm1, %ymm2

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vandps (%rdi), %ymm1, %ymm2

368 changes: 368 additions & 0 deletions llvm/utils/update_mca_test_checks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,368 @@
#!/usr/bin/env python2.7

"""A test case update script.
This script is a utility to update LLVM 'llvm-mca' based test cases with new
FileCheck patterns.
"""

import argparse
from collections import defaultdict
import difflib
import glob
import os
import sys
import warnings

from UpdateTestChecks import common


COMMENT_CHAR = '#'
ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
COMMENT_CHAR)
ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))


class Error(Exception):
""" Generic Error to be raised without printing a traceback.
"""
pass


def _warn(msg):
""" Log a user warning to stderr.
"""
warnings.warn(msg, Warning, stacklevel=2)


def _configure_warnings(args):
warnings.resetwarnings()
if args.w:
warnings.simplefilter('ignore')
if args.Werror:
warnings.simplefilter('error')


def _showwarning(message, category, filename, lineno, file=None, line=None):
""" Version of warnings.showwarning that won't attempt to print out the
line at the location of the warning if the line text is not explicitly
specified.
"""
if file is None:
file = sys.stderr
if line is None:
line = ''
file.write(warnings.formatwarning(message, category, filename, lineno, line))


def _parse_args():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('-v', '--verbose',
action='store_true',
help='show verbose output')
parser.add_argument('-w',
action='store_true',
help='suppress warnings')
parser.add_argument('-Werror',
action='store_true',
help='promote warnings to errors')
parser.add_argument('--llvm-mca-binary',
metavar='<path>',
default='llvm-mca',
help='the binary to use to generate the test case '
'(default: llvm-mca)')
parser.add_argument('tests',
metavar='<test-path>',
nargs='+')
args = parser.parse_args()

_configure_warnings(args)

if os.path.basename(args.llvm_mca_binary) != 'llvm-mca':
_warn('unexpected binary name: {}'.format(args.llvm_mca_binary))

return args


def _find_run_lines(input_lines, args):
raw_lines = [m.group(1)
for m in [common.RUN_LINE_RE.match(l) for l in input_lines]
if m]
run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
for l in raw_lines[1:]:
if run_lines[-1].endswith(r'\\'):
run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
else:
run_lines.append(l)

if args.verbose:
sys.stderr.write('Found {} RUN line{}:\n'.format(
len(run_lines), '' if len(run_lines) == 1 else 's'))
for line in run_lines:
sys.stderr.write(' RUN: {}\n'.format(line))

return run_lines


def _get_run_infos(run_lines, args):
run_infos = []
for run_line in run_lines:
try:
(tool_cmd, filecheck_cmd) = tuple([cmd.strip()
for cmd in run_line.split('|', 1)])
except ValueError:
_warn('could not split tool and filecheck commands: {}'.format(run_line))
continue

tool_basename = os.path.basename(args.llvm_mca_binary)

if not tool_cmd.startswith(tool_basename + ' '):
_warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
continue

if not filecheck_cmd.startswith('FileCheck '):
_warn('skipping non-FileCheck RUN line: {}'.format(run_line))
continue

tool_cmd_args = tool_cmd[len(tool_basename):].strip()
tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()

check_prefixes = [item
for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
for item in m.group(1).split(',')]
if not check_prefixes:
check_prefixes = ['CHECK']

run_infos.append((check_prefixes, tool_cmd_args))

return run_infos


def _get_block_infos(run_infos, test_path, args): # noqa
""" For each run line, run the tool with the specified args and collect the
output. We use the concept of 'blocks' for uniquing, where a block is
a series of lines of text with no more than one newline character between
each one. For example:
This
is
one
block
This is
another block
This is yet another block
We then build up a 'block_infos' structure containing a dict where the
text of each block is the key and a list of the sets of prefixes that may
generate that particular block. This then goes through a series of
transformations to minimise the amount of CHECK lines that need to be
written by taking advantage of common prefixes.
"""

def _block_key(tool_args, prefixes):
""" Get a hashable key based on the current tool_args and prefixes.
"""
return ' '.join([tool_args] + prefixes)

all_blocks = {}
max_block_len = 0

# Run the tool for each run line to generate all of the blocks.
for prefixes, tool_args in run_infos:
key = _block_key(tool_args, prefixes)
raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
tool_args,
test_path)

# Replace any lines consisting of purely whitespace with empty lines.
raw_tool_output = '\n'.join(line if line.strip() else ''
for line in raw_tool_output.splitlines())

# Split blocks, stripping all trailing whitespace, but keeping preceding
# whitespace except for newlines so that columns will line up visually.
all_blocks[key] = [b.lstrip('\n').rstrip()
for b in raw_tool_output.split('\n\n')]
max_block_len = max(max_block_len, len(all_blocks[key]))

# If necessary, pad the lists of blocks with empty blocks so that they are
# all the same length.
for key in all_blocks:
len_to_pad = max_block_len - len(all_blocks[key])
all_blocks[key] += [''] * len_to_pad

# Create the block_infos structure where it is a nested dict in the form of:
# block number -> block text -> list of prefix sets
block_infos = defaultdict(lambda: defaultdict(list))
for prefixes, tool_args in run_infos:
key = _block_key(tool_args, prefixes)
for block_num, block_text in enumerate(all_blocks[key]):
block_infos[block_num][block_text].append(set(prefixes))

# Now go through the block_infos structure and attempt to smartly prune the
# number of prefixes per block to the minimal set possible to output.
for block_num in range(len(block_infos)):

# When there are multiple block texts for a block num, remove any
# prefixes that are common to more than one of them.
# E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
all_sets = [s for s in block_infos[block_num].values()]
pruned_sets = []

for i, setlist in enumerate(all_sets):
other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
for set_ in setlist2 for elem in set_
if i != j])
pruned_sets.append([s - other_set_values for s in setlist])

for i, block_text in enumerate(block_infos[block_num]):

# When a block text matches multiple sets of prefixes, try removing any
# prefixes that aren't common to all of them.
# E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
common_values = pruned_sets[i][0].copy()
for s in pruned_sets[i][1:]:
common_values &= s
if common_values:
pruned_sets[i] = [common_values]

# Everything should be uniqued as much as possible by now. Apply the
# newly pruned sets to the block_infos structure.
# If there are any blocks of text that still match multiple prefixes,
# output a warning.
current_set = set()
for s in pruned_sets[i]:
s = sorted(list(s))
if s:
current_set.add(s[0])
if len(s) > 1:
_warn('Multiple prefixes generating same output: {} '
'(discarding {})'.format(','.join(s), ','.join(s[1:])))

block_infos[block_num][block_text] = sorted(list(current_set))

return block_infos


def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa
args):
prefix_set = set([prefix for prefixes, _ in prefix_list
for prefix in prefixes])
not_prefix_set = set()

output_lines = []
for input_line in input_lines:
if input_line.startswith(ADVERT_PREFIX):
continue

if input_line.startswith(COMMENT_CHAR):
m = common.CHECK_RE.match(input_line)
try:
prefix = m.group(1)
except AttributeError:
prefix = None

if '{}-NOT:'.format(prefix) in input_line:
not_prefix_set.add(prefix)

if prefix not in prefix_set or prefix in not_prefix_set:
output_lines.append(input_line)
continue

if common.should_add_line_to_output(input_line, prefix_set):
# This input line of the function body will go as-is into the output.
# Except make leading whitespace uniform: 2 spaces.
input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)

# Skip empty lines if the previous output line is also empty.
if input_line or output_lines[-1]:
output_lines.append(input_line)
else:
continue

# Add a blank line before the new checks if required.
if output_lines[-1]:
output_lines.append('')

output_check_lines = []
for block_num in range(len(block_infos)):
for block_text in sorted(block_infos[block_num]):
if not block_text:
continue

if block_infos[block_num][block_text]:
lines = block_text.split('\n')
for prefix in block_infos[block_num][block_text]:
if prefix in not_prefix_set:
_warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
'in input file.'.format(prefix))
continue

output_check_lines.append(
'{} {}: {}'.format(COMMENT_CHAR, prefix, lines[0]).rstrip())
for line in lines[1:]:
output_check_lines.append(
'{} {}-NEXT: {}'.format(COMMENT_CHAR, prefix, line).rstrip())
output_check_lines.append('')

if output_check_lines:
output_lines.insert(0, ADVERT)
output_lines.extend(output_check_lines)

if input_lines == output_lines:
sys.stderr.write(' [unchanged]\n')
return

diff = list(difflib.Differ().compare(input_lines, output_lines))
sys.stderr.write(
' [{} lines total ({} added, {} removed)]\n'.format(
len(output_lines),
len([l for l in diff if l[0] == '+']),
len([l for l in diff if l[0] == '-'])))

if args.verbose:
sys.stderr.write(
'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path))

with open(test_path, 'wb') as f:
for line in output_lines:
f.write('{}\n'.format(line.rstrip()).encode())


def main():
args = _parse_args()
test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
for test_path in test_paths:
sys.stderr.write('Test: {}\n'.format(test_path))

# Call this per test. By default each warning will only be written once
# per source location. Reset the warning filter so that now each warning
# will be written once per source location per test.
_configure_warnings(args)

if args.verbose:
sys.stderr.write(
'Scanning for RUN lines in test file: {}\n'.format(test_path))

if not os.path.isfile(test_path):
raise Error('could not find test file: {}'.format(test_path))

with open(test_path) as f:
input_lines = [l.rstrip() for l in f]

run_lines = _find_run_lines(input_lines, args)
run_infos = _get_run_infos(run_lines, args)
block_infos = _get_block_infos(run_infos, test_path, args)
_write_output(test_path, input_lines, run_infos, block_infos, args)

return 0


if __name__ == '__main__':
try:
warnings.showwarning = _showwarning
sys.exit(main())
except Error as e:
sys.stdout.write('error: {}\n'.format(e))
sys.exit(1)