diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td index 86f1b285fec21f..be07c069aae104 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver3.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td @@ -618,30 +618,10 @@ defm : Zn3WriteResIntPair; // Intege defm : Zn3WriteResIntPair; // Integer 16-bit multiplication by register. defm : Zn3WriteResIntPair; // Integer 32-bit multiplication. defm : Zn3WriteResIntPair; // Integer 32-bit Unsigned Multiply Without Affecting Flags. - -def Zn3MULX32rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> { - let Latency = !add(Znver3Model.LoadLatency, 3); - let ResourceCycles = [1, 1, 2]; - let NumMicroOps = 2; -} -def : InstRW<[Zn3MULX32rm, WriteIMulHLd, - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - ReadAfterLd], (instrs MULX32rm)>; - defm : Zn3WriteResIntPair; // Integer 32-bit multiplication by immediate. defm : Zn3WriteResIntPair; // Integer 32-bit multiplication by register. defm : Zn3WriteResIntPair; // Integer 64-bit multiplication. defm : Zn3WriteResIntPair; // Integer 32-bit Unsigned Multiply Without Affecting Flags. - -def Zn3MULX64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> { - let Latency = !add(Znver3Model.LoadLatency, 3); - let ResourceCycles = [1, 1, 2]; - let NumMicroOps = 2; -} -def : InstRW<[Zn3MULX64rm, WriteIMulHLd, - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - ReadAfterLd], (instrs MULX64rm)>; - defm : Zn3WriteResIntPair; // Integer 64-bit multiplication by immediate. defm : Zn3WriteResIntPair; // Integer 64-bit multiplication by register. defm : Zn3WriteResInt; // Integer multiplication, high part. diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s index b19cf61f3a58c6..12d6f399d42952 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s @@ -17,13 +17,13 @@ add %rax, %rax # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 12 +# CHECK-NEXT: Total Cycles: 11 # CHECK-NEXT: Total uOps: 3 # CHECK: Dispatch Width: 6 -# CHECK-NEXT: uOps Per Cycle: 0.25 -# CHECK-NEXT: IPC: 0.17 -# CHECK-NEXT: Block RThroughput: 2.0 +# CHECK-NEXT: uOps Per Cycle: 0.27 +# CHECK-NEXT: IPC: 0.18 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -34,7 +34,7 @@ add %rax, %rax # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 8 2.00 * mulxl (%rdi), %eax, %ecx +# CHECK-NEXT: 2 8 1.00 * mulxl (%rdi), %eax, %ecx # CHECK-NEXT: 1 1 0.25 addl %eax, %eax # CHECK: Resources: @@ -64,19 +64,19 @@ add %rax, %rax # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: - - 1.00 - 2.00 - 1.00 - - - - - - - - - - 1.00 - - 1.00 - - +# CHECK-NEXT: - - 1.00 - 1.00 - 1.00 - - - - - - - - - - 1.00 - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: -# CHECK-NEXT: - - 1.00 - 2.00 - - - - - - - - - - - - 1.00 - - 1.00 - - mulxl (%rdi), %eax, %ecx +# CHECK-NEXT: - - 1.00 - 1.00 - - - - - - - - - - - - 1.00 - - 1.00 - - mulxl (%rdi), %eax, %ecx # CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - - - addl %eax, %eax # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER. mulxl (%rdi), %eax, %ecx -# CHECK-NEXT: [0,1] D========eER addl %eax, %eax +# CHECK: [0,0] DeeeeeeeeER mulxl (%rdi), %eax, %ecx +# CHECK-NEXT: [0,1] D=======eER addl %eax, %eax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -86,20 +86,20 @@ add %rax, %rax # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxl (%rdi), %eax, %ecx -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 addl %eax, %eax -# CHECK-NEXT: 1 5.0 0.5 0.0 +# CHECK-NEXT: 1. 1 8.0 0.0 0.0 addl %eax, %eax +# CHECK-NEXT: 1 4.5 0.5 0.0 # CHECK: [1] Code Region # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 12 +# CHECK-NEXT: Total Cycles: 11 # CHECK-NEXT: Total uOps: 3 # CHECK: Dispatch Width: 6 -# CHECK-NEXT: uOps Per Cycle: 0.25 -# CHECK-NEXT: IPC: 0.17 -# CHECK-NEXT: Block RThroughput: 2.0 +# CHECK-NEXT: uOps Per Cycle: 0.27 +# CHECK-NEXT: IPC: 0.18 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -110,7 +110,7 @@ add %rax, %rax # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 8 2.00 * mulxq (%rdi), %rax, %rcx +# CHECK-NEXT: 2 8 1.00 * mulxq (%rdi), %rax, %rcx # CHECK-NEXT: 1 1 0.25 addq %rax, %rax # CHECK: Resources: @@ -140,19 +140,19 @@ add %rax, %rax # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: - - 1.00 - 2.00 - 1.00 - - - - - - - - - - 1.00 - - 1.00 - - +# CHECK-NEXT: - - 1.00 - 1.00 - 1.00 - - - - - - - - - - 1.00 - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: -# CHECK-NEXT: - - 1.00 - 2.00 - - - - - - - - - - - - 1.00 - - 1.00 - - mulxq (%rdi), %rax, %rcx +# CHECK-NEXT: - - 1.00 - 1.00 - - - - - - - - - - - - 1.00 - - 1.00 - - mulxq (%rdi), %rax, %rcx # CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - - - addq %rax, %rax # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER. mulxq (%rdi), %rax, %rcx -# CHECK-NEXT: [0,1] D========eER addq %rax, %rax +# CHECK: [0,0] DeeeeeeeeER mulxq (%rdi), %rax, %rcx +# CHECK-NEXT: [0,1] D=======eER addq %rax, %rax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -162,5 +162,5 @@ add %rax, %rax # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxq (%rdi), %rax, %rcx -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 addq %rax, %rax -# CHECK-NEXT: 1 5.0 0.5 0.0 +# CHECK-NEXT: 1. 1 8.0 0.0 0.0 addq %rax, %rax +# CHECK-NEXT: 1 4.5 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s index bbc5cfa398708c..13ef5bcb11ca96 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s @@ -15,13 +15,13 @@ mulxq (%rdi), %rax, %rdx # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 14 +# CHECK-NEXT: Total Cycles: 15 # CHECK-NEXT: Total uOps: 4 # CHECK: Dispatch Width: 6 -# CHECK-NEXT: uOps Per Cycle: 0.29 -# CHECK-NEXT: IPC: 0.14 -# CHECK-NEXT: Block RThroughput: 2.0 +# CHECK-NEXT: uOps Per Cycle: 0.27 +# CHECK-NEXT: IPC: 0.13 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -32,7 +32,7 @@ mulxq (%rdi), %rax, %rdx # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 8 2.00 * mulxl (%rdi), %eax, %edx +# CHECK-NEXT: 2 8 1.00 * mulxl (%rdi), %eax, %edx # CHECK: Resources: # CHECK-NEXT: [0] - Zn3AGU0 @@ -61,18 +61,18 @@ mulxq (%rdi), %rax, %rdx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: - 0.50 0.50 - 2.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - +# CHECK-NEXT: - 0.50 0.50 - 1.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: -# CHECK-NEXT: - 0.50 0.50 - 2.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - mulxl (%rdi), %eax, %edx +# CHECK-NEXT: - 0.50 0.50 - 1.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - mulxl (%rdi), %eax, %edx # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . mulxl (%rdi), %eax, %edx -# CHECK-NEXT: [1,0] D===eeeeeeeeER mulxl (%rdi), %eax, %edx +# CHECK: [0,0] DeeeeeeeeER . mulxl (%rdi), %eax, %edx +# CHECK-NEXT: [1,0] D====eeeeeeeeER mulxl (%rdi), %eax, %edx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -81,19 +81,19 @@ mulxq (%rdi), %rax, %rdx # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 2.5 0.5 0.0 mulxl (%rdi), %eax, %edx +# CHECK-NEXT: 0. 2 3.0 0.5 0.0 mulxl (%rdi), %eax, %edx # CHECK: [1] Code Region # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 14 +# CHECK-NEXT: Total Cycles: 15 # CHECK-NEXT: Total uOps: 4 # CHECK: Dispatch Width: 6 -# CHECK-NEXT: uOps Per Cycle: 0.29 -# CHECK-NEXT: IPC: 0.14 -# CHECK-NEXT: Block RThroughput: 2.0 +# CHECK-NEXT: uOps Per Cycle: 0.27 +# CHECK-NEXT: IPC: 0.13 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -104,7 +104,7 @@ mulxq (%rdi), %rax, %rdx # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 8 2.00 * mulxq (%rdi), %rax, %rdx +# CHECK-NEXT: 2 8 1.00 * mulxq (%rdi), %rax, %rdx # CHECK: Resources: # CHECK-NEXT: [0] - Zn3AGU0 @@ -133,18 +133,18 @@ mulxq (%rdi), %rax, %rdx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: - 0.50 0.50 - 2.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - +# CHECK-NEXT: - 0.50 0.50 - 1.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: -# CHECK-NEXT: - 0.50 0.50 - 2.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - mulxq (%rdi), %rax, %rdx +# CHECK-NEXT: - 0.50 0.50 - 1.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - mulxq (%rdi), %rax, %rdx # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . mulxq (%rdi), %rax, %rdx -# CHECK-NEXT: [1,0] D===eeeeeeeeER mulxq (%rdi), %rax, %rdx +# CHECK: [0,0] DeeeeeeeeER . mulxq (%rdi), %rax, %rdx +# CHECK-NEXT: [1,0] D====eeeeeeeeER mulxq (%rdi), %rax, %rdx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -153,4 +153,4 @@ mulxq (%rdi), %rax, %rdx # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 2.5 0.5 0.0 mulxq (%rdi), %rax, %rdx +# CHECK-NEXT: 0. 2 3.0 0.5 0.0 mulxq (%rdi), %rax, %rdx diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s index 1c2ccfc7cf8347..8d00c99982b070 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s @@ -63,9 +63,9 @@ shrx %rax, (%rbx), %rcx # CHECK-NEXT: 1 1 0.50 bzhiq %rax, %rbx, %rcx # CHECK-NEXT: 2 5 0.50 * bzhiq %rax, (%rbx), %rcx # CHECK-NEXT: 2 4 1.00 mulxl %eax, %ebx, %ecx -# CHECK-NEXT: 2 8 2.00 * mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: 2 8 1.00 * mulxl (%rax), %ebx, %ecx # CHECK-NEXT: 2 4 1.00 mulxq %rax, %rbx, %rcx -# CHECK-NEXT: 2 8 2.00 * mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: 2 8 1.00 * mulxq (%rax), %rbx, %rcx # CHECK-NEXT: 1 3 1.00 pdepl %eax, %ebx, %ecx # CHECK-NEXT: 1 5 0.33 * pdepl (%rax), %ebx, %ecx # CHECK-NEXT: 1 3 1.00 pdepq %rax, %rbx, %rcx @@ -118,7 +118,7 @@ shrx %rax, (%rbx), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: 5.33 5.33 5.33 1.00 21.00 11.00 1.00 - - - - - - - - 5.33 5.33 5.33 5.33 5.33 5.33 - - +# CHECK-NEXT: 5.33 5.33 5.33 1.00 19.00 11.00 1.00 - - - - - - - - 5.33 5.33 5.33 5.33 5.33 5.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -127,9 +127,9 @@ shrx %rax, (%rbx), %rcx # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - bzhiq %rax, %rbx, %rcx # CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bzhiq %rax, (%rbx), %rcx # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - mulxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxl (%rax), %ebx, %ecx # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - mulxq %rax, %rbx, %rcx -# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxq (%rax), %rbx, %rcx # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - pdepl %eax, %ebx, %ecx # CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - pdepl (%rax), %ebx, %ecx # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - pdepq %rax, %rbx, %rcx