Skip to content

Commit

Permalink
[X86][AVX2] Tag VPMOVSX/VPMOVZX ymm instructions as WriteShuffle256
Browse files Browse the repository at this point in the history
These are more like cross-lane shuffles than regular shuffles - we already do this for AVX512 equivalents.

Differential Revision: https://reviews.llvm.org/D46229

llvm-svn: 331659
  • Loading branch information
RKSimon committed May 7, 2018
1 parent f64f345 commit e480ed0
Show file tree
Hide file tree
Showing 9 changed files with 80 additions and 141 deletions.
17 changes: 10 additions & 7 deletions llvm/lib/Target/X86/X86InstrSSE.td
Expand Up @@ -4881,26 +4881,29 @@ multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
Sched<[sched.Folded]>;
}
// FIXME: YMM cases should use SchedWriteShuffle.YMM.

multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
X86MemOperand MemOp, X86MemOperand MemYOp,
X86SchedWriteWidths sched, Predicate prd> {
defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, sched.XMM>;
Predicate prd> {
defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128,
SchedWriteShuffle.XMM>;
let Predicates = [HasAVX, prd] in
defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
VR128, VR128, sched.XMM>, VEX, VEX_WIG;
VR128, VR128, SchedWriteShuffle.XMM>,
VEX, VEX_WIG;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
VR256, VR128, sched.XMM>, VEX, VEX_L, VEX_WIG;
VR256, VR128, WriteShuffle256>,
VEX, VEX_L, VEX_WIG;
}

multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
X86MemOperand MemYOp, Predicate prd> {
defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr),
MemOp, MemYOp, SchedWriteShuffle, prd>;
MemOp, MemYOp, prd>;
defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10),
!strconcat("pmovzx", OpcodeStr),
MemOp, MemYOp, SchedWriteShuffle, prd>;
MemOp, MemYOp, prd>;
}

defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>;
Expand Down
14 changes: 1 addition & 13 deletions llvm/lib/Target/X86/X86SchedBroadwell.td
Expand Up @@ -687,19 +687,7 @@ def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> {
let ResourceCycles = [1];
}
def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTBrr",
"VPBROADCASTWrr",
"VPMOVSXBDYrr",
"VPMOVSXBQYrr",
"VPMOVSXBWYrr",
"VPMOVSXDQYrr",
"VPMOVSXWDYrr",
"VPMOVSXWQYrr",
"VPMOVZXBDYrr",
"VPMOVZXBQYrr",
"VPMOVZXBWYrr",
"VPMOVZXDQYrr",
"VPMOVZXWDYrr",
"VPMOVZXWQYrr")>;
"VPBROADCASTWrr")>;

def BWWriteResGroup30 : SchedWriteRes<[BWPort0156]> {
let Latency = 2;
Expand Down
25 changes: 1 addition & 24 deletions llvm/lib/Target/X86/X86SchedHaswell.td
Expand Up @@ -1287,19 +1287,7 @@ def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> {
let ResourceCycles = [1];
}
def: InstRW<[HWWriteResGroup51], (instregex "VPBROADCASTBrr",
"VPBROADCASTWrr",
"VPMOVSXBDYrr",
"VPMOVSXBQYrr",
"VPMOVSXBWYrr",
"VPMOVSXDQYrr",
"VPMOVSXWDYrr",
"VPMOVSXWQYrr",
"VPMOVZXBDYrr",
"VPMOVZXBQYrr",
"VPMOVZXBWYrr",
"VPMOVZXDQYrr",
"VPMOVZXWDYrr",
"VPMOVZXWQYrr")>;
"VPBROADCASTWrr")>;

def HWWriteResGroup52 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 9;
Expand All @@ -1320,17 +1308,6 @@ def: InstRW<[HWWriteResGroup52_1], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"VCVTPS2DQYrm",
"VCVTTPS2DQYrm")>;

def HWWriteResGroup53 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 10;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXBDYrm",
"VPMOVZXBQYrm",
"VPMOVZXBWYrm",
"VPMOVZXDQYrm",
"VPMOVZXWQYrm")>;

def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 9;
let NumMicroOps = 2;
Expand Down
21 changes: 2 additions & 19 deletions llvm/lib/Target/X86/X86SchedSkylakeClient.td
Expand Up @@ -731,19 +731,7 @@ def: InstRW<[SKLWriteResGroup30], (instregex "(ADD|SUB|SUBR)_FPrST0",
"(ADD|SUB|SUBR)_FrST0",
"VPBROADCASTBrr",
"VPBROADCASTWrr",
"(V?)PCMPGTQ(Y?)rr",
"VPMOVSXBDYrr",
"VPMOVSXBQYrr",
"VPMOVSXBWYrr",
"VPMOVSXDQYrr",
"VPMOVSXWDYrr",
"VPMOVSXWQYrr",
"VPMOVZXBDYrr",
"VPMOVZXBQYrr",
"VPMOVZXBWYrr",
"VPMOVZXDQYrr",
"VPMOVZXWDYrr",
"VPMOVZXWQYrr")>;
"(V?)PCMPGTQ(Y?)rr")>;

def SKLWriteResGroup31 : SchedWriteRes<[SKLPort0,SKLPort5]> {
let Latency = 3;
Expand Down Expand Up @@ -1558,12 +1546,7 @@ def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> {
}
def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m",
"VPCMPGTQYrm",
"VPMOVZXBDYrm",
"VPMOVZXBQYrm",
"VPMOVZXBWYrm",
"VPMOVZXDQYrm",
"VPMOVZXWQYrm")>;
"VPCMPGTQYrm")>;

def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let Latency = 10;
Expand Down
17 changes: 0 additions & 17 deletions llvm/lib/Target/X86/X86SchedSkylakeServer.td
Expand Up @@ -1062,18 +1062,6 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_FPrST0",
"VPMINUQZ128rr",
"VPMINUQZ256rr",
"VPMINUQZrr",
"VPMOVSXBDYrr",
"VPMOVSXBQYrr",
"VPMOVSXBWYrr",
"VPMOVSXDQYrr",
"VPMOVSXWDYrr",
"VPMOVSXWQYrr",
"VPMOVZXBDYrr",
"VPMOVZXBQYrr",
"VPMOVZXBWYrr",
"VPMOVZXDQYrr",
"VPMOVZXWDYrr",
"VPMOVZXWQYrr",
"VPSADBWZrr", // TODO: 512-bit ops require ports 0/1 to be joined.
"VPTESTMBZ128rr",
"VPTESTMBZ256rr",
Expand Down Expand Up @@ -2603,11 +2591,6 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"VPMINSQZrm(b?)",
"VPMINUQZ256rm(b?)",
"VPMINUQZrm(b?)",
"VPMOVZXBDYrm",
"VPMOVZXBQYrm",
"VPMOVZXBWYrm",
"VPMOVZXDQYrm",
"VPMOVZXWQYrm",
"VPTESTMBZ256rm(b?)",
"VPTESTMBZrm(b?)",
"VPTESTMDZ256rm(b?)",
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Target/X86/X86ScheduleZnver1.td
Expand Up @@ -940,15 +940,20 @@ def ZnWriteFPU12Y : SchedWriteRes<[ZnFPU12]> {
let NumMicroOps = 2;
}
def ZnWriteFPU12m : SchedWriteRes<[ZnAGU, ZnFPU12]> ;
def ZnWriteFPU12Ym : SchedWriteRes<[ZnAGU, ZnFPU12]> {
let Latency = 8;
let NumMicroOps = 2;
}

def : InstRW<[ZnWriteFPU12], (instregex "MMX_PACKSSDWirr",
"MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>;
def : InstRW<[ZnWriteFPU12m], (instregex "MMX_PACKSSDWirm",
"MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>;

// VPMOVSX/ZX BW BD BQ DW DQ.
// VPMOVSX/ZX BW BD BQ WD WQ DQ.
// y <- x.
def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>;
def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrr")>;
def : InstRW<[ZnWriteFPU12Ym], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrm")>;

def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ;
def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> {
Expand Down

0 comments on commit e480ed0

Please sign in to comment.