Skip to content

Commit

Permalink
[X86] SkylakeServerModel - conversion instructions don't use Port015
Browse files Browse the repository at this point in the history
Fixes a lot of throughput mismatches - the more complicated conversion instructions use SKXPort5+SKXPort01, not SKXPort5+SKXPort015 (SKXPort015 is mainly used for basic Logic + blend ops)

Fixing this should allow us to remove a lot of unnecessary scheduler overrides from SkylakeServerModel

Confirmed by both Agner + uops.info
  • Loading branch information
RKSimon committed Nov 12, 2022
1 parent b31a5d7 commit 07c8f3d
Show file tree
Hide file tree
Showing 8 changed files with 288 additions and 288 deletions.
24 changes: 12 additions & 12 deletions llvm/lib/Target/X86/X86SchedSkylakeServer.td
Original file line number Diff line number Diff line change
Expand Up @@ -1048,7 +1048,7 @@ def SKXWriteResGroup57 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort0156]> {
}
def: InstRW<[SKXWriteResGroup57], (instregex "LAR(16|32|64)rr")>;

def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> {
def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort01]> {
let Latency = 5;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
Expand Down Expand Up @@ -1093,7 +1093,7 @@ def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> {
}
def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>;

def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort015]> {
def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort01]> {
let Latency = 5;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
Expand Down Expand Up @@ -1221,7 +1221,7 @@ def SKXWriteResGroup81 : SchedWriteRes<[SKXPort23,SKXPort0156]> {
def: InstRW<[SKXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>;
def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)rmr")>;

def SKXWriteResGroup82 : SchedWriteRes<[SKXPort5,SKXPort015]> {
def SKXWriteResGroup82 : SchedWriteRes<[SKXPort5,SKXPort01]> {
let Latency = 6;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
Expand Down Expand Up @@ -1304,7 +1304,7 @@ def: InstRW<[SKXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm",
"(V?)PMOV(SX|ZX)WDrm",
"(V?)PMOV(SX|ZX)WQrm")>;

def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> {
def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort01]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
Expand Down Expand Up @@ -1407,7 +1407,7 @@ def SKXWriteResGroup99 : SchedWriteRes<[SKXPort23,SKXPort0156]> {
def: InstRW<[SKXWriteResGroup99], (instrs LEAVE, LEAVE64,
SCASB, SCASL, SCASQ, SCASW)>;

def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort015]> {
def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort01]> {
let Latency = 7;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
Expand Down Expand Up @@ -1726,7 +1726,7 @@ def: InstRW<[SKXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
"VPTESTNMQZ128rm(b?)",
"VPTESTNMWZ128rm(b?)")>;

def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> {
def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort01]> {
let Latency = 9;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
Expand Down Expand Up @@ -1793,7 +1793,7 @@ def: InstRW<[SKXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
"VPTESTNM(B|D|Q|W)Z256rm(b?)",
"VPTESTNM(B|D|Q|W)Zrm(b?)")>;

def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort015]> {
def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort01]> {
let Latency = 10;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
Expand Down Expand Up @@ -1858,7 +1858,7 @@ def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F(32|64)m")>;

def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort015]> {
def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort01]> {
let Latency = 11;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
Expand Down Expand Up @@ -1899,7 +1899,7 @@ def SKXWriteResGroup164 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>;

def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> {
let Latency = 11;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
Expand Down Expand Up @@ -1959,15 +1959,15 @@ def SKXWriteResGroup175 : SchedWriteRes<[SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>;

def SKXWriteResGroup176 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> {
def SKXWriteResGroup176 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort01]> {
let Latency = 12;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)",
"VCVT(T?)SS2USI64Zrm(b?)")>;

def SKXWriteResGroup177 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
def SKXWriteResGroup177 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> {
let Latency = 12;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
Expand Down Expand Up @@ -2006,7 +2006,7 @@ def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup187], (instregex "MUL_FI(16|32)m")>;

def SKXWriteResGroup188 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
def SKXWriteResGroup188 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> {
let Latency = 14;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
Expand Down
46 changes: 23 additions & 23 deletions llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
Original file line number Diff line number Diff line change
Expand Up @@ -1736,7 +1736,7 @@ vzeroupper

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - 126.00 318.58 196.58 173.17 173.17 34.00 337.58 6.25 12.67
# CHECK-NEXT: - 126.00 322.25 200.25 173.17 173.17 34.00 330.25 6.25 12.67

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
Expand Down Expand Up @@ -1829,59 +1829,59 @@ vzeroupper
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm0, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2ps (%rax), %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %ymm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2dq (%rax), %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2pd %xmm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2pd (%rax), %ymm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsd2ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtsd2ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %ecx, %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %rcx, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2sd %ecx, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2sd %rcx, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdq (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2ss %ecx, %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vcvtsi2ss %rcx, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2ss %ecx, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vcvtsi2ss %rcx, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssl (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssq (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtss2sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtss2sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtss2sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtss2sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvtss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - vcvtss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %ymm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %ymm0, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttps2dq (%rax), %ymm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttsd2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttsd2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttsd2si (%rax), %rcx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvttss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - vcvttss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttss2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttss2si (%rax), %rcx
# CHECK-NEXT: - 4.00 1.00 - - - - - - - vdivpd %xmm0, %xmm1, %xmm2
Expand Down

0 comments on commit 07c8f3d

Please sign in to comment.