Skip to content

Commit

Permalink
[X86][BtVer2] Fix latency and resource cycles of AVX 256-bit zero-idi…
Browse files Browse the repository at this point in the history
…oms.

This patch introduces a SchedWriteVariant to describe zero-idiom VXORP(S|D)Yrr
and VANDNP(S|D)Yrr.

This is a follow-up of r342555.

On Jaguar, a VXORPSYrr is 2 macro opcodes. Only one opcode is eliminated at
register-renaming stage. The other opcode has to be executed to set the upper
half of the destination YMM.
Same for VANDNP(S|D)Yrr.

Differential Revision: https://reviews.llvm.org/D52347

llvm-svn: 342728
  • Loading branch information
Andrea Di Biagio authored and Andrea Di Biagio committed Sep 21, 2018
1 parent 7cf529c commit 4cd5cf9
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 46 deletions.
11 changes: 11 additions & 0 deletions llvm/lib/Target/X86/X86ScheduleBtVer2.td
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,10 @@ def JWriteZeroLatency : SchedWriteRes<[]> {
let Latency = 0;
}

def JWriteZeroIdiomYmm : SchedWriteRes<[JFPU01, JFPX]> {
let NumMicroOps = 2;
}

// Certain instructions that use the same register for both source
// operands do not have a real dependency on the previous contents of the
// register, and thus, do not have to wait before completing. They can be
Expand All @@ -619,6 +623,13 @@ def : InstRW<[JWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, VXORPDrr,
ANDNPSrr, VANDNPSrr,
ANDNPDrr, VANDNPDrr)>;

def JWriteFZeroIdiomY : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroIdiomYmm]>,
SchedVar<NoSchedPred, [WriteFLogicY]>
]>;
def : InstRW<[JWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
VANDNPSYrr, VANDNPDYrr)>;

def JWriteVZeroIdiomLogic : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteVecLogic]>
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/avx-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5489,10 +5489,10 @@ define void @test_avx256_zero_idioms() {
; BTVER2-LABEL: test_avx256_zero_idioms:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; BTVER2-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:1.00]
; BTVER2-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:1.00]
; BTVER2-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:1.00]
; BTVER2-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:1.00]
; BTVER2-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:0.50]
; BTVER2-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
; BTVER2-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:0.50]
; BTVER2-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:0.50]
; BTVER2-NEXT: #NO_APP
; BTVER2-NEXT: retq # sched: [4:1.00]
;
Expand Down
84 changes: 42 additions & 42 deletions llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ vandnps %ymm2, %ymm2, %ymm3

# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 300
# CHECK-NEXT: Total Cycles: 306
# CHECK-NEXT: Total Cycles: 304
# CHECK-NEXT: Total uOps: 600

# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 1.96
# CHECK-NEXT: IPC: 0.98
# CHECK-NEXT: uOps Per Cycle: 1.97
# CHECK-NEXT: IPC: 0.99
# CHECK-NEXT: Block RThroughput: 3.0

# CHECK: Instruction Info:
Expand All @@ -53,7 +53,7 @@ vandnps %ymm2, %ymm2, %ymm3

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 2 1 1.00 vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2 1 0.50 vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2 1 1.00 vblendps $2, %ymm1, %ymm2, %ymm3

# CHECK: Resources:
Expand All @@ -74,27 +74,27 @@ vandnps %ymm2, %ymm2, %ymm3

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - -
# CHECK-NEXT: - - - 3.00 2.00 3.00 2.00 - - - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendps $2, %ymm1, %ymm2, %ymm3

# CHECK: Timeline view:
# CHECK-NEXT: 012
# CHECK-NEXT: 0123
# CHECK-NEXT: Index 0123456789

# CHECK: [0,0] DeeeER . . vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [0,1] .DeE-R . . vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [0,2] . DeE-R . . vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [1,0] . D=eeeER. . vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [1,1] . DeE--R. . vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [1,2] . D=eE-R . vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [2,0] . .DeeeER. vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [2,1] . . D=eER. vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [2,2] . . D=eER vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK: [0,0] DeeeER . . vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [0,1] .DeE-R . . vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [0,2] . DeE-R . . vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [1,0] . DeeeER . . vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [1,1] . DeE-R . . vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [1,2] . DeE-R. . vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [2,0] . .D=eeeER. vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [2,1] . . DeE--R. vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [2,2] . . DeE--R vblendps $2, %ymm1, %ymm2, %ymm3

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
Expand All @@ -104,19 +104,19 @@ vandnps %ymm2, %ymm2, %ymm3

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 1.3 1.3 0.0 vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 1. 3 1.3 1.3 1.0 vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2. 3 1.7 0.3 0.7 vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 1. 3 1.0 1.0 1.3 vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2. 3 1.0 0.0 1.3 vblendps $2, %ymm1, %ymm2, %ymm3

# CHECK: [1] Code Region - ZERO-IDIOM-2

# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 300
# CHECK-NEXT: Total Cycles: 306
# CHECK-NEXT: Total Cycles: 304
# CHECK-NEXT: Total uOps: 600

# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 1.96
# CHECK-NEXT: IPC: 0.98
# CHECK-NEXT: uOps Per Cycle: 1.97
# CHECK-NEXT: IPC: 0.99
# CHECK-NEXT: Block RThroughput: 3.0

# CHECK: Instruction Info:
Expand All @@ -129,7 +129,7 @@ vandnps %ymm2, %ymm2, %ymm3

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 3 2.00 vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 2 1 1.00 vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2 1 0.50 vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2 1 1.00 vblendpd $2, %ymm1, %ymm2, %ymm3

# CHECK: Resources:
Expand All @@ -150,27 +150,27 @@ vandnps %ymm2, %ymm2, %ymm3

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - -
# CHECK-NEXT: - - - 3.00 2.00 3.00 2.00 - - - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendpd $2, %ymm1, %ymm2, %ymm3

# CHECK: Timeline view:
# CHECK-NEXT: 012
# CHECK-NEXT: 0123
# CHECK-NEXT: Index 0123456789

# CHECK: [0,0] DeeeER . . vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [0,1] .DeE-R . . vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [0,2] . DeE-R . . vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [1,0] . D=eeeER. . vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [1,1] . DeE--R. . vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [1,2] . D=eE-R . vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [2,0] . .DeeeER. vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [2,1] . . D=eER. vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [2,2] . . D=eER vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK: [0,0] DeeeER . . vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [0,1] .DeE-R . . vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [0,2] . DeE-R . . vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [1,0] . DeeeER . . vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [1,1] . DeE-R . . vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [1,2] . DeE-R. . vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [2,0] . .D=eeeER. vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [2,1] . . DeE--R. vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [2,2] . . DeE--R vblendpd $2, %ymm1, %ymm2, %ymm3

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
Expand All @@ -180,8 +180,8 @@ vandnps %ymm2, %ymm2, %ymm3

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 1.3 1.3 0.0 vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 1. 3 1.3 1.3 1.0 vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2. 3 1.7 0.3 0.7 vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 1. 3 1.0 1.0 1.3 vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2. 3 1.0 0.0 1.3 vblendpd $2, %ymm1, %ymm2, %ymm3

# CHECK: [2] Code Region - ZERO-IDIOM-3

Expand All @@ -205,7 +205,7 @@ vandnps %ymm2, %ymm2, %ymm3

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 1 1.00 vandnps %ymm2, %ymm2, %ymm3
# CHECK-NEXT: 2 1 0.50 vandnps %ymm2, %ymm2, %ymm3

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
Expand All @@ -225,12 +225,12 @@ vandnps %ymm2, %ymm2, %ymm3

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 - - - - - - -
# CHECK-NEXT: - - - 2.00 1.00 2.00 1.00 - - - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vandnps %ymm2, %ymm2, %ymm3
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vandnps %ymm2, %ymm2, %ymm3

# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
Expand Down Expand Up @@ -274,7 +274,7 @@ vandnps %ymm2, %ymm2, %ymm3

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 1 1.00 vandnps %ymm2, %ymm2, %ymm3
# CHECK-NEXT: 2 1 0.50 vandnps %ymm2, %ymm2, %ymm3

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
Expand All @@ -294,12 +294,12 @@ vandnps %ymm2, %ymm2, %ymm3

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 - - - - - - -
# CHECK-NEXT: - - - 2.00 1.00 2.00 1.00 - - - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vandnps %ymm2, %ymm2, %ymm3
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vandnps %ymm2, %ymm2, %ymm3

# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
Expand Down

0 comments on commit 4cd5cf9

Please sign in to comment.