| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,25 +1,41 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
| ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s --check-prefix=CORE2 | ||
| ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck %s --check-prefix=NEHALEM | ||
| ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 | FileCheck %s --check-prefix=BDVER2 | ||
| ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2 | ||
|
|
||
| declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) | ||
|
|
||
| define void @copy16bytes(i8* nocapture %a, i8* nocapture readonly %b) { | ||
| ; CORE2-LABEL: copy16bytes: | ||
| ; CORE2: ## %bb.0: | ||
| ; CORE2-NEXT: movq (%rsi), %rax | ||
| ; CORE2-NEXT: movq 8(%rsi), %rcx | ||
| ; CORE2-NEXT: movq %rcx, 8(%rdi) | ||
| ; CORE2-NEXT: movq %rax, (%rdi) | ||
| ; CORE2-NEXT: retq | ||
| ; | ||
| ; NEHALEM-LABEL: copy16bytes: | ||
| ; NEHALEM: ## %bb.0: | ||
| ; NEHALEM-NEXT: movups (%rsi), %xmm0 | ||
| ; NEHALEM-NEXT: movups %xmm0, (%rdi) | ||
| ; NEHALEM-NEXT: retq | ||
| ; | ||
| ; BDVER2-LABEL: copy16bytes: | ||
| ; BDVER2: ## %bb.0: | ||
| ; BDVER2-NEXT: movups (%rsi), %xmm0 | ||
| ; BDVER2-NEXT: movups %xmm0, (%rdi) | ||
| ; BDVER2-NEXT: retq | ||
| ; | ||
| ; BTVER2-LABEL: copy16bytes: | ||
| ; BTVER2: ## %bb.0: | ||
| ; BTVER2-NEXT: vmovups (%rsi), %xmm0 | ||
| ; BTVER2-NEXT: vmovups %xmm0, (%rdi) | ||
| ; BTVER2-NEXT: retq | ||
| call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i1 false) | ||
| ret void | ||
|
|
||
| ; CHECK-LABEL: copy16bytes | ||
|
|
||
|
|
||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,95 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1000 -timeline < %s | FileCheck %s | ||
|
|
||
| add %eax, %ecx | ||
| add %esi, %eax | ||
| add %eax, %edx | ||
|
|
||
| # CHECK: Iterations: 1000 | ||
| # CHECK-NEXT: Instructions: 3000 | ||
| # CHECK-NEXT: Total Cycles: 1004 | ||
| # CHECK-NEXT: Total uOps: 3000 | ||
|
|
||
| # CHECK: Dispatch Width: 4 | ||
| # CHECK-NEXT: uOps Per Cycle: 2.99 | ||
| # CHECK-NEXT: IPC: 2.99 | ||
| # CHECK-NEXT: Block RThroughput: 1.0 | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 1 1 0.33 addl %eax, %ecx | ||
| # CHECK-NEXT: 1 1 0.33 addl %esi, %eax | ||
| # CHECK-NEXT: 1 1 0.33 addl %eax, %edx | ||
|
|
||
| # CHECK: Resources: | ||
| # CHECK-NEXT: [0] - SBDivider | ||
| # CHECK-NEXT: [1] - SBFPDivider | ||
| # CHECK-NEXT: [2] - SBPort0 | ||
| # CHECK-NEXT: [3] - SBPort1 | ||
| # CHECK-NEXT: [4] - SBPort4 | ||
| # CHECK-NEXT: [5] - SBPort5 | ||
| # CHECK-NEXT: [6.0] - SBPort23 | ||
| # CHECK-NEXT: [6.1] - SBPort23 | ||
|
|
||
| # CHECK: Resource pressure per iteration: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] | ||
| # CHECK-NEXT: - - 1.00 1.00 - 1.00 - - | ||
|
|
||
| # CHECK: Resource pressure by instruction: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: | ||
| # CHECK-NEXT: - - - 1.00 - - - - addl %eax, %ecx | ||
| # CHECK-NEXT: - - - - - 1.00 - - addl %esi, %eax | ||
| # CHECK-NEXT: - - 1.00 - - - - - addl %eax, %edx | ||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: 0123 | ||
| # CHECK-NEXT: Index 0123456789 | ||
|
|
||
| # CHECK: [0,0] DeER . . . addl %eax, %ecx | ||
| # CHECK-NEXT: [0,1] DeER . . . addl %esi, %eax | ||
| # CHECK-NEXT: [0,2] D=eER. . . addl %eax, %edx | ||
| # CHECK-NEXT: [1,0] D=eER. . . addl %eax, %ecx | ||
| # CHECK-NEXT: [1,1] .DeER. . . addl %esi, %eax | ||
| # CHECK-NEXT: [1,2] .D=eER . . addl %eax, %edx | ||
| # CHECK-NEXT: [2,0] .D=eER . . addl %eax, %ecx | ||
| # CHECK-NEXT: [2,1] .D=eER . . addl %esi, %eax | ||
| # CHECK-NEXT: [2,2] . D=eER . . addl %eax, %edx | ||
| # CHECK-NEXT: [3,0] . D=eER . . addl %eax, %ecx | ||
| # CHECK-NEXT: [3,1] . D=eER . . addl %esi, %eax | ||
| # CHECK-NEXT: [3,2] . D==eER . . addl %eax, %edx | ||
| # CHECK-NEXT: [4,0] . D=eER . . addl %eax, %ecx | ||
| # CHECK-NEXT: [4,1] . D=eER . . addl %esi, %eax | ||
| # CHECK-NEXT: [4,2] . D==eER . . addl %eax, %edx | ||
| # CHECK-NEXT: [5,0] . D==eER . . addl %eax, %ecx | ||
| # CHECK-NEXT: [5,1] . D=eER . . addl %esi, %eax | ||
| # CHECK-NEXT: [5,2] . D==eER. . addl %eax, %edx | ||
| # CHECK-NEXT: [6,0] . D==eER. . addl %eax, %ecx | ||
| # CHECK-NEXT: [6,1] . D==eER. . addl %esi, %eax | ||
| # CHECK-NEXT: [6,2] . D==eER . addl %eax, %edx | ||
| # CHECK-NEXT: [7,0] . D==eER . addl %eax, %ecx | ||
| # CHECK-NEXT: [7,1] . D==eER . addl %esi, %eax | ||
| # CHECK-NEXT: [7,2] . D===eER . addl %eax, %edx | ||
| # CHECK-NEXT: [8,0] . .D==eER . addl %eax, %ecx | ||
| # CHECK-NEXT: [8,1] . .D==eER . addl %esi, %eax | ||
| # CHECK-NEXT: [8,2] . .D===eER. addl %eax, %edx | ||
| # CHECK-NEXT: [9,0] . .D===eER. addl %eax, %ecx | ||
| # CHECK-NEXT: [9,1] . . D==eER. addl %esi, %eax | ||
| # CHECK-NEXT: [9,2] . . D===eER addl %eax, %edx | ||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 10 2.5 0.1 0.0 addl %eax, %ecx | ||
| # CHECK-NEXT: 1. 10 2.2 0.1 0.0 addl %esi, %eax | ||
| # CHECK-NEXT: 2. 10 3.0 0.0 0.0 addl %eax, %edx |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s | ||
|
|
||
| ## Sets register RAX. | ||
| imulq $5, %rcx, %rax | ||
|
|
||
| ## Kills the previous definition of RAX. | ||
| ## The upper portion of RAX is cleared. | ||
| lzcnt %ecx, %eax | ||
|
|
||
| ## The AND can start immediately after the LZCNT. | ||
| ## It doesn't need to wait for the IMUL. | ||
| and %rcx, %rax | ||
| bsf %rax, %rcx | ||
|
|
||
| # CHECK: Iterations: 100 | ||
| # CHECK-NEXT: Instructions: 400 | ||
| # CHECK-NEXT: Total Cycles: 803 | ||
| # CHECK-NEXT: Total uOps: 400 | ||
|
|
||
| # CHECK: Dispatch Width: 4 | ||
| # CHECK-NEXT: uOps Per Cycle: 0.50 | ||
| # CHECK-NEXT: IPC: 0.50 | ||
| # CHECK-NEXT: Block RThroughput: 3.0 | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 1 3 1.00 imulq $5, %rcx, %rax | ||
| # CHECK-NEXT: 1 3 1.00 lzcntl %ecx, %eax | ||
| # CHECK-NEXT: 1 1 0.33 andq %rcx, %rax | ||
| # CHECK-NEXT: 1 3 1.00 bsfq %rax, %rcx | ||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: 012345678 | ||
| # CHECK-NEXT: Index 0123456789 | ||
|
|
||
| # CHECK: [0,0] DeeeER . . . imulq $5, %rcx, %rax | ||
| # CHECK-NEXT: [0,1] D=eeeER . . . lzcntl %ecx, %eax | ||
| # CHECK-NEXT: [0,2] D====eER . . . andq %rcx, %rax | ||
| # CHECK-NEXT: [0,3] D=====eeeER . . bsfq %rax, %rcx | ||
| # CHECK-NEXT: [1,0] .D=======eeeER . . imulq $5, %rcx, %rax | ||
| # CHECK-NEXT: [1,1] .D========eeeER. . lzcntl %ecx, %eax | ||
| # CHECK-NEXT: [1,2] .D===========eER . andq %rcx, %rax | ||
| # CHECK-NEXT: [1,3] .D============eeeER bsfq %rax, %rcx | ||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 2 4.5 0.5 0.0 imulq $5, %rcx, %rax | ||
| # CHECK-NEXT: 1. 2 5.5 1.5 0.0 lzcntl %ecx, %eax | ||
| # CHECK-NEXT: 2. 2 8.5 0.0 0.0 andq %rcx, %rax | ||
| # CHECK-NEXT: 3. 2 9.5 0.0 0.0 bsfq %rax, %rcx |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,137 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s | ||
|
|
||
| # In this test, the VDIVPS takes 38 cycles to write to register YMM3. The first | ||
| # VADDPS does not depend on the VDIVPS (the WAW dependency is eliminated at | ||
| # register renaming stage). So the first VADDPS can be executed in parallel to | ||
| # the VDIVPS. That VADDPS also writes to register XMM3, and the upper half of | ||
| # YMM3 is implicitly cleared. As a consequence, the definition of YMM3 from the | ||
| # VDIVPS is killed, and the subsequent VADDPS instructions don't need to wait | ||
| # for the VDIVPS to complete. | ||
| # The block reciprocal throughput is limited by the VDIVPS reciprocal throughput | ||
| # (which is 38 cycles). The sequence of VADDPS can be executed in parallel on | ||
| # the FPA unit; their latency is "hidden" by the long latency of the VDIVPS. | ||
|
|
||
| vdivps %ymm0, %ymm1, %ymm3 | ||
| vaddps %xmm0, %xmm1, %xmm3 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vaddps %ymm3, %ymm1, %ymm4 | ||
| vandps %xmm4, %xmm1, %xmm0 | ||
|
|
||
| # CHECK: Iterations: 100 | ||
| # CHECK-NEXT: Instructions: 1800 | ||
| # CHECK-NEXT: Total Cycles: 2804 | ||
| # CHECK-NEXT: Total uOps: 2000 | ||
|
|
||
| # CHECK: Dispatch Width: 4 | ||
| # CHECK-NEXT: uOps Per Cycle: 0.71 | ||
| # CHECK-NEXT: IPC: 0.64 | ||
| # CHECK-NEXT: Block RThroughput: 28.0 | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 3 29 28.00 vdivps %ymm0, %ymm1, %ymm3 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm3 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 1 1 1.00 vandps %xmm4, %xmm1, %xmm0 | ||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: 0123456789 0123456789 0123456789 | ||
| # CHECK-NEXT: Index 0123456789 0123456789 0123456789 | ||
|
|
||
| # CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . vdivps %ymm0, %ymm1, %ymm3 | ||
| # CHECK-NEXT: [0,1] DeeeE--------------------------R . . . . . . vaddps %xmm0, %xmm1, %xmm3 | ||
| # CHECK-NEXT: [0,2] .D==eeeE-----------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,3] .D===eeeE----------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,4] .D====eeeE---------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,5] .D=====eeeE--------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,6] . D=====eeeE-------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,7] . D======eeeE------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,8] . D=======eeeE-----------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,9] . D========eeeE----------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,10] . D========eeeE---------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,11] . D=========eeeE--------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,12] . D==========eeeE-------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,13] . D===========eeeE------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,14] . D===========eeeE-----------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,15] . D============eeeE----------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,16] . D=============eeeE---------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [0,17] . D================eE--------R . . . . . . vandps %xmm4, %xmm1, %xmm0 | ||
| # CHECK-NEXT: [1,0] . D=======================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeER vdivps %ymm0, %ymm1, %ymm3 | ||
| # CHECK-NEXT: [1,1] . D================eeeE---------------------------------R vaddps %xmm0, %xmm1, %xmm3 | ||
| # CHECK-NEXT: [1,2] . .D==================eeeE------------------------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,3] . .D===================eeeE-----------------------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,4] . .D====================eeeE----------------------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,5] . .D=====================eeeE---------------------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,6] . . D=====================eeeE--------------------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,7] . . D======================eeeE-------------------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,8] . . D=======================eeeE------------------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,9] . . D========================eeeE-----------------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,10] . . D========================eeeE----------------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,11] . . D=========================eeeE---------------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,12] . . D==========================eeeE--------------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,13] . . D===========================eeeE-------------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,14] . . D===========================eeeE------------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,15] . . D============================eeeE-----------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,16] . . D=============================eeeE----------------R vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: [1,17] . . D================================eE---------------R vandps %xmm4, %xmm1, %xmm0 | ||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 2 12.5 4.0 0.0 vdivps %ymm0, %ymm1, %ymm3 | ||
| # CHECK-NEXT: 1. 2 9.0 0.5 29.5 vaddps %xmm0, %xmm1, %xmm3 | ||
| # CHECK-NEXT: 2. 2 11.0 0.0 26.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 3. 2 12.0 1.0 25.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 4. 2 13.0 2.0 24.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 5. 2 14.0 3.0 23.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 6. 2 14.0 4.0 22.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 7. 2 15.0 5.0 21.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 8. 2 16.0 6.0 20.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 9. 2 17.0 7.0 19.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 10. 2 17.0 8.0 18.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 11. 2 18.0 9.0 17.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 12. 2 19.0 10.0 16.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 13. 2 20.0 11.0 15.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 14. 2 20.0 12.0 14.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 15. 2 21.0 13.0 13.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 16. 2 22.0 14.0 12.5 vaddps %ymm3, %ymm1, %ymm4 | ||
| # CHECK-NEXT: 17. 2 25.0 0.0 11.5 vandps %xmm4, %xmm1, %xmm0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s | ||
|
|
||
| # Perf stat reports an IPC of 1.97 for this block of code. | ||
|
|
||
| # The CMP instruction doesn't depend on the value of EAX. It can set the flags | ||
| # without having to read the inputs. | ||
|
|
||
| cmp %eax, %eax | ||
| cmovae %ebx, %eax | ||
|
|
||
| # CHECK: Iterations: 1500 | ||
| # CHECK-NEXT: Instructions: 3000 | ||
| # CHECK-NEXT: Total Cycles: 4503 | ||
| # CHECK-NEXT: Total uOps: 4500 | ||
|
|
||
| # CHECK: Dispatch Width: 4 | ||
| # CHECK-NEXT: uOps Per Cycle: 1.00 | ||
| # CHECK-NEXT: IPC: 0.67 | ||
| # CHECK-NEXT: Block RThroughput: 0.8 | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 1 1 0.33 cmpl %eax, %eax | ||
| # CHECK-NEXT: 2 2 0.67 cmovael %ebx, %eax | ||
|
|
||
| # CHECK: Resources: | ||
| # CHECK-NEXT: [0] - SBDivider | ||
| # CHECK-NEXT: [1] - SBFPDivider | ||
| # CHECK-NEXT: [2] - SBPort0 | ||
| # CHECK-NEXT: [3] - SBPort1 | ||
| # CHECK-NEXT: [4] - SBPort4 | ||
| # CHECK-NEXT: [5] - SBPort5 | ||
| # CHECK-NEXT: [6.0] - SBPort23 | ||
| # CHECK-NEXT: [6.1] - SBPort23 | ||
|
|
||
| # CHECK: Resource pressure per iteration: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] | ||
| # CHECK-NEXT: - - 1.00 1.00 - 1.00 - - | ||
|
|
||
| # CHECK: Resource pressure by instruction: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: | ||
| # CHECK-NEXT: - - - - - 1.00 - - cmpl %eax, %eax | ||
| # CHECK-NEXT: - - 1.00 1.00 - - - - cmovael %ebx, %eax | ||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: 01 | ||
| # CHECK-NEXT: Index 0123456789 | ||
|
|
||
| # CHECK: [0,0] DeER . .. cmpl %eax, %eax | ||
| # CHECK-NEXT: [0,1] D=eeER .. cmovael %ebx, %eax | ||
| # CHECK-NEXT: [1,0] D===eER .. cmpl %eax, %eax | ||
| # CHECK-NEXT: [1,1] .D===eeER .. cmovael %ebx, %eax | ||
| # CHECK-NEXT: [2,0] .D=====eER.. cmpl %eax, %eax | ||
| # CHECK-NEXT: [2,1] . D=====eeER cmovael %ebx, %eax | ||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 3 3.7 0.3 0.0 cmpl %eax, %eax | ||
| # CHECK-NEXT: 1. 3 4.0 0.0 0.0 cmovael %ebx, %eax |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s | ||
|
|
||
| # perf stat reports an IPC of 2.00 for this block of code. | ||
|
|
||
| # All of the vector packed compares from this test are dependency breaking | ||
| # instructions. That means, there is no RAW dependency between any of the | ||
| # instructions, and the code can be fully parallelized in hardware. | ||
|
|
||
| vpcmpeqb %xmm0, %xmm0, %xmm1 | ||
| vpcmpeqw %xmm1, %xmm1, %xmm2 | ||
| vpcmpeqd %xmm2, %xmm2, %xmm3 | ||
| vpcmpeqq %xmm3, %xmm3, %xmm0 | ||
|
|
||
| # CHECK: Iterations: 1500 | ||
| # CHECK-NEXT: Instructions: 6000 | ||
| # CHECK-NEXT: Total Cycles: 6003 | ||
| # CHECK-NEXT: Total uOps: 6000 | ||
|
|
||
| # CHECK: Dispatch Width: 4 | ||
| # CHECK-NEXT: uOps Per Cycle: 1.00 | ||
| # CHECK-NEXT: IPC: 1.00 | ||
| # CHECK-NEXT: Block RThroughput: 2.0 | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm1, %xmm1, %xmm2 | ||
| # CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm0 | ||
|
|
||
| # CHECK: Resources: | ||
| # CHECK-NEXT: [0] - SBDivider | ||
| # CHECK-NEXT: [1] - SBFPDivider | ||
| # CHECK-NEXT: [2] - SBPort0 | ||
| # CHECK-NEXT: [3] - SBPort1 | ||
| # CHECK-NEXT: [4] - SBPort4 | ||
| # CHECK-NEXT: [5] - SBPort5 | ||
| # CHECK-NEXT: [6.0] - SBPort23 | ||
| # CHECK-NEXT: [6.1] - SBPort23 | ||
|
|
||
| # CHECK: Resource pressure per iteration: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] | ||
| # CHECK-NEXT: - - - 2.00 - 2.00 - - | ||
|
|
||
| # CHECK: Resource pressure by instruction: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: | ||
| # CHECK-NEXT: - - - - - 1.00 - - vpcmpeqb %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: - - - 1.00 - - - - vpcmpeqw %xmm1, %xmm1, %xmm2 | ||
| # CHECK-NEXT: - - - - - 1.00 - - vpcmpeqd %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: - - - 1.00 - - - - vpcmpeqq %xmm3, %xmm3, %xmm0 | ||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: 01234 | ||
| # CHECK-NEXT: Index 0123456789 | ||
|
|
||
| # CHECK: [0,0] DeER . . . vpcmpeqb %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [0,1] D=eER. . . vpcmpeqw %xmm1, %xmm1, %xmm2 | ||
| # CHECK-NEXT: [0,2] D==eER . . vpcmpeqd %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: [0,3] D===eER . . vpcmpeqq %xmm3, %xmm3, %xmm0 | ||
| # CHECK-NEXT: [1,0] .D===eER . . vpcmpeqb %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [1,1] .D====eER . . vpcmpeqw %xmm1, %xmm1, %xmm2 | ||
| # CHECK-NEXT: [1,2] .D=====eER. . vpcmpeqd %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: [1,3] .D======eER . vpcmpeqq %xmm3, %xmm3, %xmm0 | ||
| # CHECK-NEXT: [2,0] . D======eER . vpcmpeqb %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [2,1] . D=======eER . vpcmpeqw %xmm1, %xmm1, %xmm2 | ||
| # CHECK-NEXT: [2,2] . D========eER. vpcmpeqd %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: [2,3] . D=========eER vpcmpeqq %xmm3, %xmm3, %xmm0 | ||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 3 4.0 0.3 0.0 vpcmpeqb %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: 1. 3 5.0 0.0 0.0 vpcmpeqw %xmm1, %xmm1, %xmm2 | ||
| # CHECK-NEXT: 2. 3 6.0 0.0 0.0 vpcmpeqd %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: 3. 3 7.0 0.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s | ||
|
|
||
| # perf stat reports an IPC of 2.00 for this block of code. | ||
|
|
||
| # All of the vector packed compares from this test are zero idioms. These zero | ||
| # idioms are all detected and removed by the register renamer. That means, no | ||
| # uOp is executed, and there is no RAW dependency for any of the packed | ||
| # compares. | ||
|
|
||
| vpcmpgtb %xmm0, %xmm0, %xmm1 | ||
| vpcmpgtw %xmm1, %xmm1, %xmm2 | ||
| vpcmpgtd %xmm2, %xmm2, %xmm3 | ||
| vpcmpgtq %xmm3, %xmm3, %xmm0 | ||
|
|
||
| # CHECK: Iterations: 1500 | ||
| # CHECK-NEXT: Instructions: 6000 | ||
| # CHECK-NEXT: Total Cycles: 1501 | ||
| # CHECK-NEXT: Total uOps: 6000 | ||
|
|
||
| # CHECK: Dispatch Width: 4 | ||
| # CHECK-NEXT: uOps Per Cycle: 4.00 | ||
| # CHECK-NEXT: IPC: 4.00 | ||
| # CHECK-NEXT: Block RThroughput: 1.0 | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 1 0 0.25 vpcmpgtb %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: 1 0 0.25 vpcmpgtw %xmm1, %xmm1, %xmm2 | ||
| # CHECK-NEXT: 1 0 0.25 vpcmpgtd %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: 1 0 0.25 vpcmpgtq %xmm3, %xmm3, %xmm0 | ||
|
|
||
| # CHECK: Resources: | ||
| # CHECK-NEXT: [0] - SBDivider | ||
| # CHECK-NEXT: [1] - SBFPDivider | ||
| # CHECK-NEXT: [2] - SBPort0 | ||
| # CHECK-NEXT: [3] - SBPort1 | ||
| # CHECK-NEXT: [4] - SBPort4 | ||
| # CHECK-NEXT: [5] - SBPort5 | ||
| # CHECK-NEXT: [6.0] - SBPort23 | ||
| # CHECK-NEXT: [6.1] - SBPort23 | ||
|
|
||
| # CHECK: Resource pressure per iteration: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] | ||
| # CHECK-NEXT: - - - - - - - - | ||
|
|
||
| # CHECK: Resource pressure by instruction: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: | ||
| # CHECK-NEXT: - - - - - - - - vpcmpgtb %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: - - - - - - - - vpcmpgtw %xmm1, %xmm1, %xmm2 | ||
| # CHECK-NEXT: - - - - - - - - vpcmpgtd %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm0 | ||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: Index 0123 | ||
|
|
||
| # CHECK: [0,0] DR . vpcmpgtb %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [0,1] DR . vpcmpgtw %xmm1, %xmm1, %xmm2 | ||
| # CHECK-NEXT: [0,2] DR . vpcmpgtd %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: [0,3] DR . vpcmpgtq %xmm3, %xmm3, %xmm0 | ||
| # CHECK-NEXT: [1,0] .DR. vpcmpgtb %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [1,1] .DR. vpcmpgtw %xmm1, %xmm1, %xmm2 | ||
| # CHECK-NEXT: [1,2] .DR. vpcmpgtd %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: [1,3] .DR. vpcmpgtq %xmm3, %xmm3, %xmm0 | ||
| # CHECK-NEXT: [2,0] . DR vpcmpgtb %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [2,1] . DR vpcmpgtw %xmm1, %xmm1, %xmm2 | ||
| # CHECK-NEXT: [2,2] . DR vpcmpgtd %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: [2,3] . DR vpcmpgtq %xmm3, %xmm3, %xmm0 | ||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 3 0.0 0.0 0.0 vpcmpgtb %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: 1. 3 0.0 0.0 0.0 vpcmpgtw %xmm1, %xmm1, %xmm2 | ||
| # CHECK-NEXT: 2. 3 0.0 0.0 0.0 vpcmpgtd %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: 3. 3 0.0 0.0 0.0 vpcmpgtq %xmm3, %xmm3, %xmm0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s | ||
|
|
||
| # perf stat reports an IPC of 1.00 for this code block. | ||
|
|
||
| # Although both SBB are dependency breaking instructions, there is still an | ||
| # implicit dependency on EFLAGS which limits the ILP. So, the hardware backend | ||
| # can only execute one instruction per cycle. | ||
|
|
||
| sbb %edx, %edx | ||
| sbb %eax, %eax | ||
|
|
||
| # CHECK: Iterations: 1500 | ||
| # CHECK-NEXT: Instructions: 3000 | ||
| # CHECK-NEXT: Total Cycles: 6003 | ||
| # CHECK-NEXT: Total uOps: 6000 | ||
|
|
||
| # CHECK: Dispatch Width: 4 | ||
| # CHECK-NEXT: uOps Per Cycle: 1.00 | ||
| # CHECK-NEXT: IPC: 0.50 | ||
| # CHECK-NEXT: Block RThroughput: 1.0 | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 2 2 0.67 sbbl %edx, %edx | ||
| # CHECK-NEXT: 2 2 0.67 sbbl %eax, %eax | ||
|
|
||
| # CHECK: Resources: | ||
| # CHECK-NEXT: [0] - SBDivider | ||
| # CHECK-NEXT: [1] - SBFPDivider | ||
| # CHECK-NEXT: [2] - SBPort0 | ||
| # CHECK-NEXT: [3] - SBPort1 | ||
| # CHECK-NEXT: [4] - SBPort4 | ||
| # CHECK-NEXT: [5] - SBPort5 | ||
| # CHECK-NEXT: [6.0] - SBPort23 | ||
| # CHECK-NEXT: [6.1] - SBPort23 | ||
|
|
||
| # CHECK: Resource pressure per iteration: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] | ||
| # CHECK-NEXT: - - 1.33 1.33 - 1.33 - - | ||
|
|
||
| # CHECK: Resource pressure by instruction: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: | ||
| # CHECK-NEXT: - - 0.67 0.67 - 0.67 - - sbbl %edx, %edx | ||
| # CHECK-NEXT: - - 0.67 0.67 - 0.67 - - sbbl %eax, %eax | ||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: 01234 | ||
| # CHECK-NEXT: Index 0123456789 | ||
|
|
||
| # CHECK: [0,0] DeeER. . . sbbl %edx, %edx | ||
| # CHECK-NEXT: [0,1] D==eeER . . sbbl %eax, %eax | ||
| # CHECK-NEXT: [1,0] .D===eeER . . sbbl %edx, %edx | ||
| # CHECK-NEXT: [1,1] .D=====eeER . sbbl %eax, %eax | ||
| # CHECK-NEXT: [2,0] . D======eeER . sbbl %edx, %edx | ||
| # CHECK-NEXT: [2,1] . D========eeER sbbl %eax, %eax | ||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 3 4.0 0.3 0.0 sbbl %edx, %edx | ||
| # CHECK-NEXT: 1. 3 6.0 0.0 0.0 sbbl %eax, %eax |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s | ||
|
|
||
| # perf stat reports a throughput of 1.51 IPC for this block of code. | ||
|
|
||
| # The SBB does not depend on the value of register EAX. That means, it doesn't | ||
| # have to wait for the IMUL to write-back on EAX. However, it still depends on | ||
| # the ADD for EFLAGS. | ||
|
|
||
| imul %edx, %eax | ||
| add %edx, %edx | ||
| sbb %eax, %eax | ||
|
|
||
| # CHECK: Iterations: 1500 | ||
| # CHECK-NEXT: Instructions: 4500 | ||
| # CHECK-NEXT: Total Cycles: 7503 | ||
| # CHECK-NEXT: Total uOps: 6000 | ||
|
|
||
| # CHECK: Dispatch Width: 4 | ||
| # CHECK-NEXT: uOps Per Cycle: 0.80 | ||
| # CHECK-NEXT: IPC: 0.60 | ||
| # CHECK-NEXT: Block RThroughput: 1.0 | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 1 3 1.00 imull %edx, %eax | ||
| # CHECK-NEXT: 1 1 0.33 addl %edx, %edx | ||
| # CHECK-NEXT: 2 2 0.67 sbbl %eax, %eax | ||
|
|
||
| # CHECK: Resources: | ||
| # CHECK-NEXT: [0] - SBDivider | ||
| # CHECK-NEXT: [1] - SBFPDivider | ||
| # CHECK-NEXT: [2] - SBPort0 | ||
| # CHECK-NEXT: [3] - SBPort1 | ||
| # CHECK-NEXT: [4] - SBPort4 | ||
| # CHECK-NEXT: [5] - SBPort5 | ||
| # CHECK-NEXT: [6.0] - SBPort23 | ||
| # CHECK-NEXT: [6.1] - SBPort23 | ||
|
|
||
| # CHECK: Resource pressure per iteration: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] | ||
| # CHECK-NEXT: - - 1.33 1.33 - 1.33 - - | ||
|
|
||
| # CHECK: Resource pressure by instruction: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: | ||
| # CHECK-NEXT: - - - 1.00 - - - - imull %edx, %eax | ||
| # CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %edx, %edx | ||
| # CHECK-NEXT: - - 1.00 - - 1.00 - - sbbl %eax, %eax | ||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: 01234567 | ||
| # CHECK-NEXT: Index 0123456789 | ||
|
|
||
| # CHECK: [0,0] DeeeER . . . imull %edx, %eax | ||
| # CHECK-NEXT: [0,1] DeE--R . . . addl %edx, %edx | ||
| # CHECK-NEXT: [0,2] D===eeER . . . sbbl %eax, %eax | ||
| # CHECK-NEXT: [1,0] .D====eeeER . . imull %edx, %eax | ||
| # CHECK-NEXT: [1,1] .DeE------R . . addl %edx, %edx | ||
| # CHECK-NEXT: [1,2] .D=======eeER . . sbbl %eax, %eax | ||
| # CHECK-NEXT: [2,0] . D========eeeER . imull %edx, %eax | ||
| # CHECK-NEXT: [2,1] . DeE----------R . addl %edx, %edx | ||
| # CHECK-NEXT: [2,2] . D===========eeER sbbl %eax, %eax | ||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 3 5.0 0.3 0.0 imull %edx, %eax | ||
| # CHECK-NEXT: 1. 3 1.0 0.3 6.0 addl %edx, %edx | ||
| # CHECK-NEXT: 2. 3 8.0 0.0 0.0 sbbl %eax, %eax |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,95 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=500 -timeline < %s | FileCheck %s | ||
|
|
||
| vpmuld %xmm0, %xmm0, %xmm1 | ||
| vpaddd %xmm1, %xmm1, %xmm0 | ||
| vpaddd %xmm0, %xmm0, %xmm3 | ||
|
|
||
| # CHECK: Iterations: 500 | ||
| # CHECK-NEXT: Instructions: 1500 | ||
| # CHECK-NEXT: Total Cycles: 3004 | ||
| # CHECK-NEXT: Total uOps: 1500 | ||
|
|
||
| # CHECK: Dispatch Width: 4 | ||
| # CHECK-NEXT: uOps Per Cycle: 0.50 | ||
| # CHECK-NEXT: IPC: 0.50 | ||
| # CHECK-NEXT: Block RThroughput: 1.0 | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 1 5 1.00 vpmuldq %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: 1 1 0.50 vpaddd %xmm1, %xmm1, %xmm0 | ||
| # CHECK-NEXT: 1 1 0.50 vpaddd %xmm0, %xmm0, %xmm3 | ||
|
|
||
| # CHECK: Resources: | ||
| # CHECK-NEXT: [0] - SBDivider | ||
| # CHECK-NEXT: [1] - SBFPDivider | ||
| # CHECK-NEXT: [2] - SBPort0 | ||
| # CHECK-NEXT: [3] - SBPort1 | ||
| # CHECK-NEXT: [4] - SBPort4 | ||
| # CHECK-NEXT: [5] - SBPort5 | ||
| # CHECK-NEXT: [6.0] - SBPort23 | ||
| # CHECK-NEXT: [6.1] - SBPort23 | ||
|
|
||
| # CHECK: Resource pressure per iteration: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] | ||
| # CHECK-NEXT: - - 1.00 1.00 - 1.00 - - | ||
|
|
||
| # CHECK: Resource pressure by instruction: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: | ||
| # CHECK-NEXT: - - 1.00 - - - - - vpmuldq %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: - - - - - 1.00 - - vpaddd %xmm1, %xmm1, %xmm0 | ||
| # CHECK-NEXT: - - - 1.00 - - - - vpaddd %xmm0, %xmm0, %xmm3 | ||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: 0123456789 0123456789 0123456789 | ||
| # CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123 | ||
|
|
||
| # CHECK: [0,0] DeeeeeER . . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [0,1] D=====eER . . . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 | ||
| # CHECK-NEXT: [0,2] D======eER. . . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 | ||
| # CHECK-NEXT: [1,0] D======eeeeeER . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [1,1] .D==========eER. . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 | ||
| # CHECK-NEXT: [1,2] .D===========eER . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 | ||
| # CHECK-NEXT: [2,0] .D===========eeeeeER. . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [2,1] .D================eER . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 | ||
| # CHECK-NEXT: [2,2] . D================eER . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 | ||
| # CHECK-NEXT: [3,0] . D================eeeeeER . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [3,1] . D=====================eER . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 | ||
| # CHECK-NEXT: [3,2] . D======================eER . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 | ||
| # CHECK-NEXT: [4,0] . D=====================eeeeeER . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [4,1] . D==========================eER . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 | ||
| # CHECK-NEXT: [4,2] . D===========================eER . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 | ||
| # CHECK-NEXT: [5,0] . D===========================eeeeeER . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [5,1] . D===============================eER . . . . . . vpaddd %xmm1, %xmm1, %xmm0 | ||
| # CHECK-NEXT: [5,2] . D================================eER. . . . . . vpaddd %xmm0, %xmm0, %xmm3 | ||
| # CHECK-NEXT: [6,0] . D================================eeeeeER . . . . . vpmuldq %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [6,1] . D=====================================eER. . . . . vpaddd %xmm1, %xmm1, %xmm0 | ||
| # CHECK-NEXT: [6,2] . D=====================================eER . . . . vpaddd %xmm0, %xmm0, %xmm3 | ||
| # CHECK-NEXT: [7,0] . D=====================================eeeeeER. . . . vpmuldq %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [7,1] . D==========================================eER . . . vpaddd %xmm1, %xmm1, %xmm0 | ||
| # CHECK-NEXT: [7,2] . D===========================================eER . . . vpaddd %xmm0, %xmm0, %xmm3 | ||
| # CHECK-NEXT: [8,0] . .D==========================================eeeeeER . . vpmuldq %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [8,1] . .D===============================================eER . . vpaddd %xmm1, %xmm1, %xmm0 | ||
| # CHECK-NEXT: [8,2] . .D================================================eER . . vpaddd %xmm0, %xmm0, %xmm3 | ||
| # CHECK-NEXT: [9,0] . .D================================================eeeeeER . vpmuldq %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: [9,1] . . D====================================================eER. vpaddd %xmm1, %xmm1, %xmm0 | ||
| # CHECK-NEXT: [9,2] . . D=====================================================eER vpaddd %xmm0, %xmm0, %xmm3 | ||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 10 25.0 0.1 0.0 vpmuldq %xmm0, %xmm0, %xmm1 | ||
| # CHECK-NEXT: 1. 10 29.7 0.0 0.0 vpaddd %xmm1, %xmm1, %xmm0 | ||
| # CHECK-NEXT: 2. 10 30.5 0.0 0.0 vpaddd %xmm0, %xmm0, %xmm3 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=300 -timeline -timeline-max-iterations=3 < %s | FileCheck %s | ||
|
|
||
| vmulps %xmm0, %xmm1, %xmm2 | ||
| vhaddps %xmm2, %xmm2, %xmm3 | ||
| vhaddps %xmm3, %xmm3, %xmm4 | ||
|
|
||
| # CHECK: Iterations: 300 | ||
| # CHECK-NEXT: Instructions: 900 | ||
| # CHECK-NEXT: Total Cycles: 1211 | ||
| # CHECK-NEXT: Total uOps: 2100 | ||
|
|
||
| # CHECK: Dispatch Width: 4 | ||
| # CHECK-NEXT: uOps Per Cycle: 1.73 | ||
| # CHECK-NEXT: IPC: 0.74 | ||
| # CHECK-NEXT: Block RThroughput: 4.0 | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 1 5 1.00 vmulps %xmm0, %xmm1, %xmm2 | ||
| # CHECK-NEXT: 3 5 2.00 vhaddps %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: 3 5 2.00 vhaddps %xmm3, %xmm3, %xmm4 | ||
|
|
||
| # CHECK: Resources: | ||
| # CHECK-NEXT: [0] - SBDivider | ||
| # CHECK-NEXT: [1] - SBFPDivider | ||
| # CHECK-NEXT: [2] - SBPort0 | ||
| # CHECK-NEXT: [3] - SBPort1 | ||
| # CHECK-NEXT: [4] - SBPort4 | ||
| # CHECK-NEXT: [5] - SBPort5 | ||
| # CHECK-NEXT: [6.0] - SBPort23 | ||
| # CHECK-NEXT: [6.1] - SBPort23 | ||
|
|
||
| # CHECK: Resource pressure per iteration: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] | ||
| # CHECK-NEXT: - - 1.00 2.00 - 4.00 - - | ||
|
|
||
| # CHECK: Resource pressure by instruction: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: | ||
| # CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm0, %xmm1, %xmm2 | ||
| # CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddps %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddps %xmm3, %xmm3, %xmm4 | ||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: 0123456789 | ||
| # CHECK-NEXT: Index 0123456789 012 | ||
|
|
||
| # CHECK: [0,0] DeeeeeER . . . . vmulps %xmm0, %xmm1, %xmm2 | ||
| # CHECK-NEXT: [0,1] D=====eeeeeER . . . vhaddps %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: [0,2] .D==========eeeeeER . . vhaddps %xmm3, %xmm3, %xmm4 | ||
| # CHECK-NEXT: [1,0] .DeeeeeE----------R . . vmulps %xmm0, %xmm1, %xmm2 | ||
| # CHECK-NEXT: [1,1] . D=====eeeeeE----R . . vhaddps %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: [1,2] . D==========eeeeeER . vhaddps %xmm3, %xmm3, %xmm4 | ||
| # CHECK-NEXT: [2,0] . DeeeeeE----------R . vmulps %xmm0, %xmm1, %xmm2 | ||
| # CHECK-NEXT: [2,1] . D=====eeeeeE----R . vhaddps %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: [2,2] . D==========eeeeeER vhaddps %xmm3, %xmm3, %xmm4 | ||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 3 1.0 1.0 6.7 vmulps %xmm0, %xmm1, %xmm2 | ||
| # CHECK-NEXT: 1. 3 6.0 0.7 2.7 vhaddps %xmm2, %xmm2, %xmm3 | ||
| # CHECK-NEXT: 2. 3 11.0 1.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s | ||
|
|
||
| vshufps $0, %xmm0, %xmm1, %xmm1 | ||
| vhaddps (%rdi), %xmm1, %xmm2 | ||
|
|
||
| # CHECK: Iterations: 1 | ||
| # CHECK-NEXT: Instructions: 2 | ||
| # CHECK-NEXT: Total Cycles: 15 | ||
| # CHECK-NEXT: Total uOps: 5 | ||
|
|
||
| # CHECK: Dispatch Width: 4 | ||
| # CHECK-NEXT: uOps Per Cycle: 0.33 | ||
| # CHECK-NEXT: IPC: 0.13 | ||
| # CHECK-NEXT: Block RThroughput: 3.0 | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 1 1 1.00 vshufps $0, %xmm0, %xmm1, %xmm1 | ||
| # CHECK-NEXT: 4 11 2.00 * vhaddps (%rdi), %xmm1, %xmm2 | ||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: 01234 | ||
| # CHECK-NEXT: Index 0123456789 | ||
|
|
||
| # CHECK: [0,0] DeER . . . vshufps $0, %xmm0, %xmm1, %xmm1 | ||
| # CHECK-NEXT: [0,1] .DeeeeeeeeeeeER vhaddps (%rdi), %xmm1, %xmm2 | ||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1 | ||
| # CHECK-NEXT: 1. 1 1.0 1.0 0.0 vhaddps (%rdi), %xmm1, %xmm2 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s | ||
|
|
||
| vshufps $0, %xmm0, %xmm1, %xmm1 | ||
| vhaddps (%rdi), %ymm1, %ymm2 | ||
|
|
||
| # CHECK: Iterations: 1 | ||
| # CHECK-NEXT: Instructions: 2 | ||
| # CHECK-NEXT: Total Cycles: 16 | ||
| # CHECK-NEXT: Total uOps: 5 | ||
|
|
||
| # CHECK: Dispatch Width: 4 | ||
| # CHECK-NEXT: uOps Per Cycle: 0.31 | ||
| # CHECK-NEXT: IPC: 0.13 | ||
| # CHECK-NEXT: Block RThroughput: 3.0 | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 1 1 1.00 vshufps $0, %xmm0, %xmm1, %xmm1 | ||
| # CHECK-NEXT: 4 12 2.00 * vhaddps (%rdi), %ymm1, %ymm2 | ||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: 012345 | ||
| # CHECK-NEXT: Index 0123456789 | ||
|
|
||
| # CHECK: [0,0] DeER . . . vshufps $0, %xmm0, %xmm1, %xmm1 | ||
| # CHECK-NEXT: [0,1] .DeeeeeeeeeeeeER vhaddps (%rdi), %ymm1, %ymm2 | ||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1 | ||
| # CHECK-NEXT: 1. 1 1.0 1.0 0.0 vhaddps (%rdi), %ymm1, %ymm2 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info=true < %s | FileCheck %s --check-prefix=ENABLED | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=DISABLED | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info < %s | FileCheck %s -check-prefix=ENABLED | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false < %s | FileCheck %s -check-prefix=ENABLED | ||
|
|
||
| vmulps %xmm0, %xmm1, %xmm2 | ||
| vhaddps %xmm2, %xmm2, %xmm3 | ||
| vhaddps %xmm3, %xmm3, %xmm4 | ||
|
|
||
| # DISABLED-NOT: Instruction Info: | ||
|
|
||
|
|
||
| # ENABLED: Iterations: 100 | ||
| # ENABLED-NEXT: Instructions: 300 | ||
| # ENABLED-NEXT: Total Cycles: 414 | ||
| # ENABLED-NEXT: Total uOps: 700 | ||
|
|
||
|
|
||
| # ENABLED: Dispatch Width: 4 | ||
| # ENABLED-NEXT: uOps Per Cycle: 1.69 | ||
| # ENABLED-NEXT: IPC: 0.72 | ||
| # ENABLED-NEXT: Block RThroughput: 4.0 | ||
|
|
||
| # ENABLED: Instruction Info: | ||
| # ENABLED-NEXT: [1]: #uOps | ||
| # ENABLED-NEXT: [2]: Latency | ||
| # ENABLED-NEXT: [3]: RThroughput | ||
| # ENABLED-NEXT: [4]: MayLoad | ||
| # ENABLED-NEXT: [5]: MayStore | ||
| # ENABLED-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # ENABLED: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # ENABLED-NEXT: 1 5 1.00 vmulps %xmm0, %xmm1, %xmm2 | ||
| # ENABLED-NEXT: 3 5 2.00 vhaddps %xmm2, %xmm2, %xmm3 | ||
| # ENABLED-NEXT: 3 5 2.00 vhaddps %xmm3, %xmm3, %xmm4 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py | ||
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s | ||
|
|
||
| vmovaps (%rsi), %xmm0 | ||
| vmovaps %xmm0, (%rdi) | ||
| vmovaps 16(%rsi), %xmm0 | ||
| vmovaps %xmm0, 16(%rdi) | ||
| vmovaps 32(%rsi), %xmm0 | ||
| vmovaps %xmm0, 32(%rdi) | ||
| vmovaps 48(%rsi), %xmm0 | ||
| vmovaps %xmm0, 48(%rdi) | ||
|
|
||
| # CHECK: Iterations: 100 | ||
| # CHECK-NEXT: Instructions: 800 | ||
| # CHECK-NEXT: Total Cycles: 2803 | ||
| # CHECK-NEXT: Total uOps: 800 | ||
|
|
||
| # CHECK: Dispatch Width: 4 | ||
| # CHECK-NEXT: uOps Per Cycle: 0.29 | ||
| # CHECK-NEXT: IPC: 0.29 | ||
| # CHECK-NEXT: Block RThroughput: 4.0 | ||
|
|
||
| # CHECK: Instruction Info: | ||
| # CHECK-NEXT: [1]: #uOps | ||
| # CHECK-NEXT: [2]: Latency | ||
| # CHECK-NEXT: [3]: RThroughput | ||
| # CHECK-NEXT: [4]: MayLoad | ||
| # CHECK-NEXT: [5]: MayStore | ||
| # CHECK-NEXT: [6]: HasSideEffects (U) | ||
|
|
||
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: 1 6 0.50 * vmovaps (%rsi), %xmm0 | ||
| # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi) | ||
| # CHECK-NEXT: 1 6 0.50 * vmovaps 16(%rsi), %xmm0 | ||
| # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi) | ||
| # CHECK-NEXT: 1 6 0.50 * vmovaps 32(%rsi), %xmm0 | ||
| # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi) | ||
| # CHECK-NEXT: 1 6 0.50 * vmovaps 48(%rsi), %xmm0 | ||
| # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi) | ||
|
|
||
| # CHECK: Resources: | ||
| # CHECK-NEXT: [0] - SBDivider | ||
| # CHECK-NEXT: [1] - SBFPDivider | ||
| # CHECK-NEXT: [2] - SBPort0 | ||
| # CHECK-NEXT: [3] - SBPort1 | ||
| # CHECK-NEXT: [4] - SBPort4 | ||
| # CHECK-NEXT: [5] - SBPort5 | ||
| # CHECK-NEXT: [6.0] - SBPort23 | ||
| # CHECK-NEXT: [6.1] - SBPort23 | ||
|
|
||
| # CHECK: Resource pressure per iteration: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] | ||
| # CHECK-NEXT: - - - - 4.00 - - 8.00 | ||
|
|
||
| # CHECK: Resource pressure by instruction: | ||
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: | ||
| # CHECK-NEXT: - - - - - - - 1.00 vmovaps (%rsi), %xmm0 | ||
| # CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, (%rdi) | ||
| # CHECK-NEXT: - - - - - - - 1.00 vmovaps 16(%rsi), %xmm0 | ||
| # CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 16(%rdi) | ||
| # CHECK-NEXT: - - - - - - - 1.00 vmovaps 32(%rsi), %xmm0 | ||
| # CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 32(%rdi) | ||
| # CHECK-NEXT: - - - - - - - 1.00 vmovaps 48(%rsi), %xmm0 | ||
| # CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 48(%rdi) | ||
|
|
||
| # CHECK: Timeline view: | ||
| # CHECK-NEXT: 0123456789 0 | ||
| # CHECK-NEXT: Index 0123456789 0123456789 | ||
|
|
||
| # CHECK: [0,0] DeeeeeeER . . . . . vmovaps (%rsi), %xmm0 | ||
| # CHECK-NEXT: [0,1] D======eER. . . . . vmovaps %xmm0, (%rdi) | ||
| # CHECK-NEXT: [0,2] D=======eeeeeeER . . . vmovaps 16(%rsi), %xmm0 | ||
| # CHECK-NEXT: [0,3] D=============eER . . . vmovaps %xmm0, 16(%rdi) | ||
| # CHECK-NEXT: [0,4] .D=============eeeeeeER . . vmovaps 32(%rsi), %xmm0 | ||
| # CHECK-NEXT: [0,5] .D===================eER . . vmovaps %xmm0, 32(%rdi) | ||
| # CHECK-NEXT: [0,6] .D====================eeeeeeER. vmovaps 48(%rsi), %xmm0 | ||
| # CHECK-NEXT: [0,7] .D==========================eER vmovaps %xmm0, 48(%rdi) | ||
|
|
||
| # CHECK: Average Wait times (based on the timeline view): | ||
| # CHECK-NEXT: [0]: Executions | ||
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue | ||
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready | ||
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage | ||
|
|
||
| # CHECK: [0] [1] [2] [3] | ||
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0 | ||
| # CHECK-NEXT: 1. 1 7.0 0.0 0.0 vmovaps %xmm0, (%rdi) | ||
| # CHECK-NEXT: 2. 1 8.0 0.0 0.0 vmovaps 16(%rsi), %xmm0 | ||
| # CHECK-NEXT: 3. 1 14.0 0.0 0.0 vmovaps %xmm0, 16(%rdi) | ||
| # CHECK-NEXT: 4. 1 14.0 0.0 0.0 vmovaps 32(%rsi), %xmm0 | ||
| # CHECK-NEXT: 5. 1 20.0 0.0 0.0 vmovaps %xmm0, 32(%rdi) | ||
| # CHECK-NEXT: 6. 1 21.0 0.0 0.0 vmovaps 48(%rsi), %xmm0 | ||
| # CHECK-NEXT: 7. 1 27.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) |