Skip to content

Commit

Permalink
[X86] Update the haswell and broadwell scheduler information for gath…
Browse files Browse the repository at this point in the history
…er instructions

Broadwell was missing half the gather instructions. Both models
had some mixups in the resource costs and number of uops.

I've updated here based on what I think the original IACA source
says with some cross checking against the microcode.

I'm not sure about latency as the IACA source I have doesn't have
that information. So I'm using the latency from uops.info.

I plan to update Skylake models as well, but I'll do that in a
separate patch.

Differential Revision: https://reviews.llvm.org/D73844
  • Loading branch information
topperc committed Feb 4, 2020
1 parent 9a40670 commit c7768ce
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 123 deletions.
42 changes: 15 additions & 27 deletions llvm/lib/Target/X86/X86SchedBroadwell.td
Original file line number Diff line number Diff line change
Expand Up @@ -1480,54 +1480,42 @@ def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI(16|32)m")>;

def BWWriteResGroup183_1 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 22;
let Latency = 17;
let NumMicroOps = 7;
let ResourceCycles = [1,3,2,1];
}
def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERQPDrm)>;
def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERDPDrm, VPGATHERDQrm,
VGATHERQPDrm, VPGATHERQQrm)>;

def BWWriteResGroup183_2 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 23;
let Latency = 18;
let NumMicroOps = 9;
let ResourceCycles = [1,3,4,1];
}
def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERQPDYrm)>;
def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
VGATHERQPDYrm, VPGATHERQQYrm)>;

def BWWriteResGroup183_3 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 24;
let Latency = 19;
let NumMicroOps = 9;
let ResourceCycles = [1,5,2,1];
}
def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSYrm)>;
def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSrm, VPGATHERQDrm)>;

def BWWriteResGroup183_4 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 25;
let NumMicroOps = 7;
let ResourceCycles = [1,3,2,1];
let Latency = 19;
let NumMicroOps = 10;
let ResourceCycles = [1,4,4,1];
}
def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPDrm,
VGATHERDPSrm)>;
def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPSrm, VPGATHERDDrm,
VGATHERQPSYrm, VPGATHERQDYrm)>;

def BWWriteResGroup183_5 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 26;
let NumMicroOps = 9;
let ResourceCycles = [1,5,2,1];
}
def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPDYrm)>;

def BWWriteResGroup183_6 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 26;
let Latency = 21;
let NumMicroOps = 14;
let ResourceCycles = [1,4,8,1];
}
def: InstRW<[BWWriteResGroup183_6], (instrs VGATHERDPSYrm)>;

def BWWriteResGroup183_7 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 27;
let NumMicroOps = 9;
let ResourceCycles = [1,5,2,1];
}
def: InstRW<[BWWriteResGroup183_7], (instrs VGATHERQPSrm)>;
def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;

def BWWriteResGroup185 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> {
let Latency = 29;
Expand Down
74 changes: 27 additions & 47 deletions llvm/lib/Target/X86/X86SchedHaswell.td
Original file line number Diff line number Diff line change
Expand Up @@ -1785,75 +1785,55 @@ def HWWriteResGroup183 : SchedWriteRes<[HWPort0,HWPort1,HWPort4,HWPort5,HWPort6,
}
def: InstRW<[HWWriteResGroup183], (instrs FSTENVm)>;

def HWWriteResGroup184 : SchedWriteRes<[HWPort0, HWPort5, HWPort15, HWPort015, HWPort06, HWPort23]> {
let Latency = 26;
def HWWriteResGroup184 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 14;
let NumMicroOps = 12;
let ResourceCycles = [2,2,1,3,2,2];
}
def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm,
VPGATHERDQrm,
VPGATHERDDrm)>;

def HWWriteResGroup185 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 24;
let NumMicroOps = 22;
let ResourceCycles = [5,3,4,1,5,4];
let ResourceCycles = [2,2,2,1,3,2];
}
def: InstRW<[HWWriteResGroup185], (instrs VGATHERQPDYrm,
VPGATHERQQYrm)>;
def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm, VPGATHERDQrm)>;

def HWWriteResGroup186 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 28;
let NumMicroOps = 22;
let ResourceCycles = [5,3,4,1,5,4];
}
def: InstRW<[HWWriteResGroup186], (instrs VPGATHERQDYrm)>;

def HWWriteResGroup187 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 25;
let NumMicroOps = 22;
let ResourceCycles = [5,3,4,1,5,4];
def HWWriteResGroup185 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 17;
let NumMicroOps = 20;
let ResourceCycles = [3,3,4,1,5,4];
}
def: InstRW<[HWWriteResGroup187], (instrs VPGATHERQDrm)>;
def: InstRW<[HWWriteResGroup185], (instrs VGATHERDPDYrm, VPGATHERDQYrm)>;

def HWWriteResGroup188 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 27;
def HWWriteResGroup186 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 16;
let NumMicroOps = 20;
let ResourceCycles = [3,3,4,1,5,4];
}
def: InstRW<[HWWriteResGroup188], (instrs VGATHERDPDYrm,
VPGATHERDQYrm)>;
def: InstRW<[HWWriteResGroup186], (instrs VGATHERDPSrm, VPGATHERDDrm)>;

def HWWriteResGroup189 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 27;
def HWWriteResGroup187 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 22;
let NumMicroOps = 34;
let ResourceCycles = [5,3,8,1,9,8];
}
def: InstRW<[HWWriteResGroup189], (instrs VGATHERDPSYrm,
VPGATHERDDYrm)>;
def: InstRW<[HWWriteResGroup187], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;

def HWWriteResGroup190 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 23;
def HWWriteResGroup188 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 15;
let NumMicroOps = 14;
let ResourceCycles = [3,3,2,1,3,2];
}
def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPDrm,
VPGATHERQQrm)>;
def: InstRW<[HWWriteResGroup188], (instrs VGATHERQPDrm, VPGATHERQQrm)>;

def HWWriteResGroup191 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 28;
let NumMicroOps = 15;
let ResourceCycles = [3,3,2,1,4,2];
def HWWriteResGroup189 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 17;
let NumMicroOps = 22;
let ResourceCycles = [5,3,4,1,5,4];
}
def: InstRW<[HWWriteResGroup191], (instrs VGATHERQPSYrm)>;
def: InstRW<[HWWriteResGroup189], (instrs VGATHERQPDYrm, VPGATHERQQYrm,
VGATHERQPSYrm, VPGATHERQDYrm)>;

def HWWriteResGroup192 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 25;
def HWWriteResGroup190 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 16;
let NumMicroOps = 15;
let ResourceCycles = [3,3,2,1,4,2];
}
def: InstRW<[HWWriteResGroup192], (instrs VGATHERQPSrm,
VGATHERDPSrm)>;
def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPSrm, VPGATHERQDrm)>;

def: InstRW<[WriteZero], (instrs CLC)>;

Expand Down
56 changes: 28 additions & 28 deletions llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx2.s
Original file line number Diff line number Diff line change
Expand Up @@ -465,14 +465,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vbroadcastss %xmm0, %ymm0
# CHECK-NEXT: 1 3 1.00 vextracti128 $1, %ymm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: 7 25 3.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 26 5.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 7 25 3.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 14 26 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 7 22 3.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 23 3.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 9 27 5.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 24 5.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 7 17 3.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 18 3.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 10 19 4.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 14 21 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 7 17 3.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 18 3.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 9 19 5.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 10 19 4.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 1 3 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 6 0.50 * vmovntdqa (%rax), %ymm0
Expand Down Expand Up @@ -568,14 +568,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 9 1.00 * vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vpermq $1, %ymm0, %ymm2
# CHECK-NEXT: 2 9 1.00 * vpermq $1, (%rax), %ymm2
# CHECK-NEXT: 1 5 0.50 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.50 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 1 5 0.50 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.50 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 1 5 0.50 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.50 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 1 5 0.50 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.50 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 10 19 4.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 14 21 4.00 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 7 17 3.00 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 18 3.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 9 19 5.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 10 19 4.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 7 17 3.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 18 3.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 4 9 2.00 * vphaddd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2
Expand Down Expand Up @@ -776,7 +776,7 @@ vpxor (%rax), %ymm1, %ymm2

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 94.67 58.67 85.67 85.67 13.00 237.67 2.00 1.67
# CHECK-NEXT: - - 96.67 60.67 99.67 99.67 21.00 266.67 4.00 1.67

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
Expand All @@ -786,13 +786,13 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vextracti128 $1, %ymm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 4.00 4.00 1.00 4.25 0.25 - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - vinserti128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovntdqa (%rax), %ymm0
Expand Down Expand Up @@ -888,14 +888,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpermq $1, %ymm0, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq $1, (%rax), %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 4.00 4.00 1.00 4.25 0.25 - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - - 0.50 - - - 2.50 - - vphaddd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 2.50 - - vphaddd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 - - - 2.50 - - vphaddsw %ymm0, %ymm1, %ymm2
Expand Down
Loading

0 comments on commit c7768ce

Please sign in to comment.