diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 5eb893a82fcc7..e1fe2b680b96a 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1624,19 +1624,19 @@ multiclass avx512_perm_i_sizes opc, string OpcodeStr, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, AVX512VLVectorVTInfo ShuffleMask> { - defm NAME: avx512_perm_i, - avx512_perm_i_mb, EVEX_V512; + defm NAME#Z: avx512_perm_i, + avx512_perm_i_mb, EVEX_V512; let Predicates = [HasVLX] in { - defm NAME#128: avx512_perm_i, - avx512_perm_i_mb, EVEX_V128; - defm NAME#256: avx512_perm_i, - avx512_perm_i_mb, EVEX_V256; + defm NAME#Z128: avx512_perm_i, + avx512_perm_i_mb, EVEX_V128; + defm NAME#Z256: avx512_perm_i, + avx512_perm_i_mb, EVEX_V256; } } @@ -1646,13 +1646,13 @@ multiclass avx512_perm_i_sizes_bw opc, string OpcodeStr, AVX512VLVectorVTInfo Idx, Predicate Prd> { let Predicates = [Prd] in - defm NAME: avx512_perm_i, EVEX_V512; + defm NAME#Z: avx512_perm_i, EVEX_V512; let Predicates = [Prd, HasVLX] in { - defm NAME#128: avx512_perm_i, EVEX_V128; - defm NAME#256: avx512_perm_i, EVEX_V256; + defm NAME#Z128: avx512_perm_i, EVEX_V128; + defm NAME#Z256: avx512_perm_i, EVEX_V256; } } @@ -1702,9 +1702,9 @@ multiclass avx512_perm_i_lowering; -defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; -defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; +defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>; +defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>; +defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>; // VPERMT2 multiclass avx512_perm_t opc, string OpcodeStr, @@ -1743,19 +1743,19 @@ multiclass avx512_perm_t_sizes opc, string OpcodeStr, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, AVX512VLVectorVTInfo ShuffleMask> { - defm NAME: avx512_perm_t, - avx512_perm_t_mb, EVEX_V512; + defm NAME#Z: avx512_perm_t, + avx512_perm_t_mb, EVEX_V512; let Predicates = [HasVLX] in { - defm NAME#128: avx512_perm_t, - avx512_perm_t_mb, EVEX_V128; - defm NAME#256: avx512_perm_t, - avx512_perm_t_mb, EVEX_V256; + defm NAME#Z128: avx512_perm_t, + avx512_perm_t_mb, EVEX_V128; + defm NAME#Z256: avx512_perm_t, + avx512_perm_t_mb, EVEX_V256; } } @@ -1764,13 +1764,13 @@ multiclass avx512_perm_t_sizes_bw opc, string OpcodeStr, AVX512VLVectorVTInfo VTInfo, AVX512VLVectorVTInfo Idx, Predicate Prd> { let Predicates = [Prd] in - defm NAME: avx512_perm_t, EVEX_V512; + defm NAME#Z: avx512_perm_t, EVEX_V512; let Predicates = [Prd, HasVLX] in { - defm NAME#128: avx512_perm_t, EVEX_V128; - defm NAME#256: avx512_perm_t, EVEX_V256; + defm NAME#Z128: avx512_perm_t, EVEX_V128; + defm NAME#Z256: avx512_perm_t, EVEX_V256; } } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 7b607c6f198c7..bc2d5ed1e17dd 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -2138,45 +2138,45 @@ static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1, // commuted. static bool isCommutableVPERMV3Instruction(unsigned Opcode) { #define VPERM_CASES(Suffix) \ - case X86::VPERMI2##Suffix##128rr: \ - case X86::VPERMT2##Suffix##128rr: \ - case X86::VPERMI2##Suffix##256rr: \ - case X86::VPERMT2##Suffix##256rr: \ - case X86::VPERMI2##Suffix##rr: \ - case X86::VPERMT2##Suffix##rr: \ - case X86::VPERMI2##Suffix##128rm: \ - case X86::VPERMT2##Suffix##128rm: \ - case X86::VPERMI2##Suffix##256rm: \ - case X86::VPERMT2##Suffix##256rm: \ - case X86::VPERMI2##Suffix##rm: \ - case X86::VPERMT2##Suffix##rm: \ - case X86::VPERMI2##Suffix##128rrkz: \ - case X86::VPERMT2##Suffix##128rrkz: \ - case X86::VPERMI2##Suffix##256rrkz: \ - case X86::VPERMT2##Suffix##256rrkz: \ - case X86::VPERMI2##Suffix##rrkz: \ - case X86::VPERMT2##Suffix##rrkz: \ - case X86::VPERMI2##Suffix##128rmkz: \ - case X86::VPERMT2##Suffix##128rmkz: \ - case X86::VPERMI2##Suffix##256rmkz: \ - case X86::VPERMT2##Suffix##256rmkz: \ - case X86::VPERMI2##Suffix##rmkz: \ - case X86::VPERMT2##Suffix##rmkz: + case X86::VPERMI2##Suffix##Z128rr: \ + case X86::VPERMT2##Suffix##Z128rr: \ + case X86::VPERMI2##Suffix##Z256rr: \ + case X86::VPERMT2##Suffix##Z256rr: \ + case X86::VPERMI2##Suffix##Zrr: \ + case X86::VPERMT2##Suffix##Zrr: \ + case X86::VPERMI2##Suffix##Z128rm: \ + case X86::VPERMT2##Suffix##Z128rm: \ + case X86::VPERMI2##Suffix##Z256rm: \ + case X86::VPERMT2##Suffix##Z256rm: \ + case X86::VPERMI2##Suffix##Zrm: \ + case X86::VPERMT2##Suffix##Zrm: \ + case X86::VPERMI2##Suffix##Z128rrkz: \ + case X86::VPERMT2##Suffix##Z128rrkz: \ + case X86::VPERMI2##Suffix##Z256rrkz: \ + case X86::VPERMT2##Suffix##Z256rrkz: \ + case X86::VPERMI2##Suffix##Zrrkz: \ + case X86::VPERMT2##Suffix##Zrrkz: \ + case X86::VPERMI2##Suffix##Z128rmkz: \ + case X86::VPERMT2##Suffix##Z128rmkz: \ + case X86::VPERMI2##Suffix##Z256rmkz: \ + case X86::VPERMT2##Suffix##Z256rmkz: \ + case X86::VPERMI2##Suffix##Zrmkz: \ + case X86::VPERMT2##Suffix##Zrmkz: #define VPERM_CASES_BROADCAST(Suffix) \ VPERM_CASES(Suffix) \ - case X86::VPERMI2##Suffix##128rmb: \ - case X86::VPERMT2##Suffix##128rmb: \ - case X86::VPERMI2##Suffix##256rmb: \ - case X86::VPERMT2##Suffix##256rmb: \ - case X86::VPERMI2##Suffix##rmb: \ - case X86::VPERMT2##Suffix##rmb: \ - case X86::VPERMI2##Suffix##128rmbkz: \ - case X86::VPERMT2##Suffix##128rmbkz: \ - case X86::VPERMI2##Suffix##256rmbkz: \ - case X86::VPERMT2##Suffix##256rmbkz: \ - case X86::VPERMI2##Suffix##rmbkz: \ - case X86::VPERMT2##Suffix##rmbkz: + case X86::VPERMI2##Suffix##Z128rmb: \ + case X86::VPERMT2##Suffix##Z128rmb: \ + case X86::VPERMI2##Suffix##Z256rmb: \ + case X86::VPERMT2##Suffix##Z256rmb: \ + case X86::VPERMI2##Suffix##Zrmb: \ + case X86::VPERMT2##Suffix##Zrmb: \ + case X86::VPERMI2##Suffix##Z128rmbkz: \ + case X86::VPERMT2##Suffix##Z128rmbkz: \ + case X86::VPERMI2##Suffix##Z256rmbkz: \ + case X86::VPERMT2##Suffix##Z256rmbkz: \ + case X86::VPERMI2##Suffix##Zrmbkz: \ + case X86::VPERMT2##Suffix##Zrmbkz: switch (Opcode) { default: @@ -2197,45 +2197,45 @@ static bool isCommutableVPERMV3Instruction(unsigned Opcode) { // from the I opcode to the T opcode and vice versa. static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) { #define VPERM_CASES(Orig, New) \ - case X86::Orig##128rr: \ - return X86::New##128rr; \ - case X86::Orig##128rrkz: \ - return X86::New##128rrkz; \ - case X86::Orig##128rm: \ - return X86::New##128rm; \ - case X86::Orig##128rmkz: \ - return X86::New##128rmkz; \ - case X86::Orig##256rr: \ - return X86::New##256rr; \ - case X86::Orig##256rrkz: \ - return X86::New##256rrkz; \ - case X86::Orig##256rm: \ - return X86::New##256rm; \ - case X86::Orig##256rmkz: \ - return X86::New##256rmkz; \ - case X86::Orig##rr: \ - return X86::New##rr; \ - case X86::Orig##rrkz: \ - return X86::New##rrkz; \ - case X86::Orig##rm: \ - return X86::New##rm; \ - case X86::Orig##rmkz: \ - return X86::New##rmkz; + case X86::Orig##Z128rr: \ + return X86::New##Z128rr; \ + case X86::Orig##Z128rrkz: \ + return X86::New##Z128rrkz; \ + case X86::Orig##Z128rm: \ + return X86::New##Z128rm; \ + case X86::Orig##Z128rmkz: \ + return X86::New##Z128rmkz; \ + case X86::Orig##Z256rr: \ + return X86::New##Z256rr; \ + case X86::Orig##Z256rrkz: \ + return X86::New##Z256rrkz; \ + case X86::Orig##Z256rm: \ + return X86::New##Z256rm; \ + case X86::Orig##Z256rmkz: \ + return X86::New##Z256rmkz; \ + case X86::Orig##Zrr: \ + return X86::New##Zrr; \ + case X86::Orig##Zrrkz: \ + return X86::New##Zrrkz; \ + case X86::Orig##Zrm: \ + return X86::New##Zrm; \ + case X86::Orig##Zrmkz: \ + return X86::New##Zrmkz; #define VPERM_CASES_BROADCAST(Orig, New) \ VPERM_CASES(Orig, New) \ - case X86::Orig##128rmb: \ - return X86::New##128rmb; \ - case X86::Orig##128rmbkz: \ - return X86::New##128rmbkz; \ - case X86::Orig##256rmb: \ - return X86::New##256rmb; \ - case X86::Orig##256rmbkz: \ - return X86::New##256rmbkz; \ - case X86::Orig##rmb: \ - return X86::New##rmb; \ - case X86::Orig##rmbkz: \ - return X86::New##rmbkz; + case X86::Orig##Z128rmb: \ + return X86::New##Z128rmb; \ + case X86::Orig##Z128rmbkz: \ + return X86::New##Z128rmbkz; \ + case X86::Orig##Z256rmb: \ + return X86::New##Z256rmb; \ + case X86::Orig##Z256rmbkz: \ + return X86::New##Z256rmbkz; \ + case X86::Orig##Zrmb: \ + return X86::New##Zrmb; \ + case X86::Orig##Zrmbkz: \ + return X86::New##Zrmbkz; switch (Opcode) { VPERM_CASES(VPERMI2B, VPERMT2B) diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index e27af1433d455..2c660fad2ec7d 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -1392,12 +1392,12 @@ def ICXWriteResGroup97 : SchedWriteRes<[ICXPort5,ICXPort015]> { let NumMicroOps = 3; let ReleaseAtCycles = [2,1]; } -def: InstRW<[ICXWriteResGroup97], (instregex "VPERMI2W128rr", - "VPERMI2W256rr", - "VPERMI2Wrr", - "VPERMT2W128rr", - "VPERMT2W256rr", - "VPERMT2Wrr")>; +def: InstRW<[ICXWriteResGroup97], (instregex "VPERMI2WZ128rr", + "VPERMI2WZ256rr", + "VPERMI2WZrr", + "VPERMT2WZ128rr", + "VPERMT2WZ256rr", + "VPERMT2WZrr")>; def ICXWriteResGroup99 : SchedWriteRes<[ICXPort23,ICXPort0156]> { let Latency = 7; @@ -1692,14 +1692,14 @@ def: InstRW<[ICXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i", "VFPCLASSSDZrm(b?)", "VFPCLASSSSZrm(b?)", "(V?)PCMPGTQrm", - "VPERMI2D128rm(b?)", - "VPERMI2PD128rm(b?)", - "VPERMI2PS128rm(b?)", - "VPERMI2Q128rm(b?)", - "VPERMT2D128rm(b?)", - "VPERMT2PD128rm(b?)", - "VPERMT2PS128rm(b?)", - "VPERMT2Q128rm(b?)", + "VPERMI2DZ128rm(b?)", + "VPERMI2PDZ128rm(b?)", + "VPERMI2PSZ128rm(b?)", + "VPERMI2QZ128rm(b?)", + "VPERMT2DZ128rm(b?)", + "VPERMT2PDZ128rm(b?)", + "VPERMT2PSZ128rm(b?)", + "VPERMT2QZ128rm(b?)", "VPMAXSQZ128rm(b?)", "VPMAXUQZ128rm(b?)", "VPMINSQZ128rm(b?)", @@ -2002,8 +2002,8 @@ def ICXWriteResGroup183 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { let NumMicroOps = 4; let ReleaseAtCycles = [2,1,1]; } -def: InstRW<[ICXWriteResGroup183], (instregex "VPERMI2W128rm(b?)", - "VPERMT2W128rm(b?)")>; +def: InstRW<[ICXWriteResGroup183], (instregex "VPERMI2WZ128rm(b?)", + "VPERMT2WZ128rm(b?)")>; def ICXWriteResGroup187 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { let Latency = 14; @@ -2029,10 +2029,10 @@ def ICXWriteResGroup189 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { let NumMicroOps = 4; let ReleaseAtCycles = [2,1,1]; } -def: InstRW<[ICXWriteResGroup189], (instregex "VPERMI2W256rm(b?)", - "VPERMI2Wrm(b?)", - "VPERMT2W256rm(b?)", - "VPERMT2Wrm(b?)")>; +def: InstRW<[ICXWriteResGroup189], (instregex "VPERMI2WZ256rm(b?)", + "VPERMI2WZrm(b?)", + "VPERMT2WZ256rm(b?)", + "VPERMT2WZrm(b?)")>; def ICXWriteResGroup190 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> { let Latency = 14; diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td index 4eac53385ae54..bf9e4b7dc6d9a 100644 --- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td +++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td @@ -635,10 +635,10 @@ def : InstRW<[SPRWriteResGroup10, ReadAfterVecXLd], (instregex "^(V?)PACK(S|U)S( "^VPMULTISHIFTQBZ128rm(b?)$")>; def : InstRW<[SPRWriteResGroup10, ReadAfterVecXLd], (instrs VFPCLASSPHZ128rm)>; def : InstRW<[SPRWriteResGroup10, ReadAfterVecYLd], (instregex "^VFPCLASSP(D|H|S)Z((256)?)rm$", - "^VPERM(I|T)2(D|Q|PS)128rm((b|k|bk|kz)?)$", - "^VPERM(I|T)2(D|Q|PS)128rmbkz$", - "^VPERM(I|T)2PD128rm((b|k|bk|kz)?)$", - "^VPERM(I|T)2PD128rmbkz$")>; + "^VPERM(I|T)2(D|Q|PS)Z128rm((b|k|bk|kz)?)$", + "^VPERM(I|T)2(D|Q|PS)Z128rmbkz$", + "^VPERM(I|T)2PDZ128rm((b|k|bk|kz)?)$", + "^VPERM(I|T)2PDZ128rmbkz$")>; def : InstRW<[SPRWriteResGroup10, ReadAfterVecYLd], (instrs VPERMBZ128rm)>; def SPRWriteResGroup11 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { @@ -678,8 +678,8 @@ def : InstRW<[SPRWriteResGroup12], (instregex "^ADD_F(P?)rST0$", "^VPERM(B|D|Q)Zrr$", "^VPERM(D|Q)Z256rr((k|kz)?)$", "^VPERM(D|Q)Zrrk(z?)$", - "^VPERM(I|T)2(D|Q)(128|256)rr((k|kz)?)$", - "^VPERM(I|T)2(D|Q)rr((k|kz)?)$", + "^VPERM(I|T)2(D|Q)Z(128|256)rr((k|kz)?)$", + "^VPERM(I|T)2(D|Q)Zrr((k|kz)?)$", "^VPM(AX|IN)(S|U)QZ(128|256)rr((k|kz)?)$", "^VPMULTISHIFTQBZ(128|256)rr$", "^VPOPCNT(B|D|Q|W)Z(128|256)rr$", @@ -4274,7 +4274,7 @@ def SPRWriteResGroup455 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPo } def : InstRW<[SPRWriteResGroup455], (instregex "^VPCONFLICTQZ128rm((b|k|bk|kz)?)$")>; def : InstRW<[SPRWriteResGroup455], (instrs VPCONFLICTQZ128rmbkz)>; -def : InstRW<[SPRWriteResGroup455, ReadAfterVecYLd], (instregex "^VPERM(I|T)2B128rm$")>; +def : InstRW<[SPRWriteResGroup455, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZ128rm$")>; def SPRWriteResGroup456 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { let ReleaseAtCycles = [1, 2]; @@ -4325,127 +4325,127 @@ def SPRWriteResGroup462 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPo let Latency = 13; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup462, ReadAfterVecYLd], (instregex "^VPERM(I|T)2B128rmk(z?)$")>; -def : InstRW<[SPRWriteResGroup462, ReadAfterVecYLd], (instrs VPERMT2W128rm)>; +def : InstRW<[SPRWriteResGroup462, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZ128rmk(z?)$")>; +def : InstRW<[SPRWriteResGroup462, ReadAfterVecYLd], (instrs VPERMT2WZ128rm)>; def SPRWriteResGroup463 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 5; let NumMicroOps = 3; } -def : InstRW<[SPRWriteResGroup463], (instregex "^VPERM(I|T)2B(128|256)rr$")>; +def : InstRW<[SPRWriteResGroup463], (instregex "^VPERM(I|T)2BZ(128|256)rr$")>; def SPRWriteResGroup464 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 7; let NumMicroOps = 3; } -def : InstRW<[SPRWriteResGroup464], (instregex "^VPERM(I|T)2B(128|256)rrk(z?)$", - "^VPERM(I|T)2W(128|256)rr$")>; +def : InstRW<[SPRWriteResGroup464], (instregex "^VPERM(I|T)2BZ(128|256)rrk(z?)$", + "^VPERM(I|T)2WZ(128|256)rr$")>; def SPRWriteResGroup465 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 12; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup465, ReadAfterVecYLd], (instregex "^VPERM(I|T)2B256rm$")>; +def : InstRW<[SPRWriteResGroup465, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZ256rm$")>; def SPRWriteResGroup466 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 14; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instregex "^VPERM(I|T)2B256rmk(z?)$")>; -def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instrs VPERMI2W128rm, - VPERMT2W256rm)>; +def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZ256rmk(z?)$")>; +def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instrs VPERMI2WZ128rm, + VPERMT2WZ256rm)>; def SPRWriteResGroup467 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 12; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup467, ReadAfterVecYLd], (instregex "^VPERM(I|T)2Brm$")>; +def : InstRW<[SPRWriteResGroup467, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZrm$")>; def SPRWriteResGroup468 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 14; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup468, ReadAfterVecYLd], (instregex "^VPERM(I|T)2Brmk(z?)$")>; -def : InstRW<[SPRWriteResGroup468, ReadAfterVecYLd], (instrs VPERMT2Wrm)>; +def : InstRW<[SPRWriteResGroup468, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZrmk(z?)$")>; +def : InstRW<[SPRWriteResGroup468, ReadAfterVecYLd], (instrs VPERMT2WZrm)>; def SPRWriteResGroup469 : SchedWriteRes<[SPRPort00_05, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 5; let NumMicroOps = 3; } -def : InstRW<[SPRWriteResGroup469], (instregex "^VPERM(I|T)2Brr$")>; +def : InstRW<[SPRWriteResGroup469], (instregex "^VPERM(I|T)2BZrr$")>; def SPRWriteResGroup470 : SchedWriteRes<[SPRPort00_05, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 7; let NumMicroOps = 3; } -def : InstRW<[SPRWriteResGroup470], (instregex "^VPERM(I|T)2Brrk(z?)$", - "^VPERM(I|T)2Wrr$")>; +def : InstRW<[SPRWriteResGroup470], (instregex "^VPERM(I|T)2BZrrk(z?)$", + "^VPERM(I|T)2WZrr$")>; def SPRWriteResGroup471 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 16; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup471, ReadAfterVecYLd], (instregex "^VPERMI2W128rmk(z?)$", - "^VPERMT2W256rmk(z?)$")>; +def : InstRW<[SPRWriteResGroup471, ReadAfterVecYLd], (instregex "^VPERMI2WZ128rmk(z?)$", + "^VPERMT2WZ256rmk(z?)$")>; def SPRWriteResGroup472 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 9; let NumMicroOps = 3; } -def : InstRW<[SPRWriteResGroup472], (instregex "^VPERM(I|T)2W(128|256)rrk(z?)$")>; +def : InstRW<[SPRWriteResGroup472], (instregex "^VPERM(I|T)2WZ(128|256)rrk(z?)$")>; def SPRWriteResGroup473 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 15; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instregex "^VPERMT2W128rmk(z?)$")>; -def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instrs VPERMI2W256rm)>; +def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instregex "^VPERMT2WZ128rmk(z?)$")>; +def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instrs VPERMI2WZ256rm)>; def SPRWriteResGroup474 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 17; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup474, ReadAfterVecYLd], (instregex "^VPERMI2W256rmk(z?)$")>; +def : InstRW<[SPRWriteResGroup474, ReadAfterVecYLd], (instregex "^VPERMI2WZ256rmk(z?)$")>; def SPRWriteResGroup475 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 15; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup475, ReadAfterVecYLd], (instrs VPERMI2Wrm)>; +def : InstRW<[SPRWriteResGroup475, ReadAfterVecYLd], (instrs VPERMI2WZrm)>; def SPRWriteResGroup476 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 17; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup476, ReadAfterVecYLd], (instregex "^VPERMI2Wrmk(z?)$")>; +def : InstRW<[SPRWriteResGroup476, ReadAfterVecYLd], (instregex "^VPERMI2WZrmk(z?)$")>; def SPRWriteResGroup477 : SchedWriteRes<[SPRPort00_05, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 9; let NumMicroOps = 3; } -def : InstRW<[SPRWriteResGroup477], (instregex "^VPERM(I|T)2Wrrk(z?)$")>; +def : InstRW<[SPRWriteResGroup477], (instregex "^VPERM(I|T)2WZrrk(z?)$")>; def SPRWriteResGroup478 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 16; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup478, ReadAfterVecYLd], (instregex "^VPERMT2Wrmk(z?)$")>; +def : InstRW<[SPRWriteResGroup478, ReadAfterVecYLd], (instregex "^VPERMT2WZrmk(z?)$")>; def SPRWriteResGroup479 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { let Latency = 10; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 8194af8a6e1db..3da688cda2c6c 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -1379,12 +1379,12 @@ def SKXWriteResGroup97 : SchedWriteRes<[SKXPort5,SKXPort015]> { let NumMicroOps = 3; let ReleaseAtCycles = [2,1]; } -def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2W128rr", - "VPERMI2W256rr", - "VPERMI2Wrr", - "VPERMT2W128rr", - "VPERMT2W256rr", - "VPERMT2Wrr")>; +def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2WZ128rr", + "VPERMI2WZ256rr", + "VPERMI2WZrr", + "VPERMT2WZ128rr", + "VPERMT2WZ256rr", + "VPERMT2WZrr")>; def SKXWriteResGroup99 : SchedWriteRes<[SKXPort23,SKXPort0156]> { let Latency = 7; @@ -1675,14 +1675,14 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i", "VFPCLASSSDZrm(b?)", "VFPCLASSSSZrm(b?)", "(V?)PCMPGTQrm", - "VPERMI2D128rm(b?)", - "VPERMI2PD128rm(b?)", - "VPERMI2PS128rm(b?)", - "VPERMI2Q128rm(b?)", - "VPERMT2D128rm(b?)", - "VPERMT2PD128rm(b?)", - "VPERMT2PS128rm(b?)", - "VPERMT2Q128rm(b?)", + "VPERMI2DZ128rm(b?)", + "VPERMI2PDZ128rm(b?)", + "VPERMI2PSZ128rm(b?)", + "VPERMI2QZ128rm(b?)", + "VPERMT2DZ128rm(b?)", + "VPERMT2PDZ128rm(b?)", + "VPERMT2PSZ128rm(b?)", + "VPERMT2QZ128rm(b?)", "VPMAXSQZ128rm(b?)", "VPMAXUQZ128rm(b?)", "VPMINSQZ128rm(b?)", @@ -1983,8 +1983,8 @@ def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { let NumMicroOps = 4; let ReleaseAtCycles = [2,1,1]; } -def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2W128rm(b?)", - "VPERMT2W128rm(b?)")>; +def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2WZ128rm(b?)", + "VPERMT2WZ128rm(b?)")>; def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 14; @@ -2010,10 +2010,10 @@ def SKXWriteResGroup189 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { let NumMicroOps = 4; let ReleaseAtCycles = [2,1,1]; } -def: InstRW<[SKXWriteResGroup189], (instregex "VPERMI2W256rm(b?)", - "VPERMI2Wrm(b?)", - "VPERMT2W256rm(b?)", - "VPERMT2Wrm(b?)")>; +def: InstRW<[SKXWriteResGroup189], (instregex "VPERMI2WZ256rm(b?)", + "VPERMI2WZrm(b?)", + "VPERMT2WZ256rm(b?)", + "VPERMT2WZrm(b?)")>; def SKXWriteResGroup190 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> { let Latency = 14; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td index 405000f0851e9..dac4d8422582a 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver4.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td @@ -1743,8 +1743,8 @@ def Zn4PERMIT2_128: SchedWriteRes<[Zn4FPFMisc12]> { let NumMicroOps = 1; } def : InstRW<[Zn4PERMIT2_128], (instregex - "VPERM(I2|T2)(PS|PD|W)128(rr|rrk|rrkz)", - "VPERM(I2|T2)(B|D|Q)128(rr|rrk|rrkz)" + "VPERM(I2|T2)(PS|PD|W)Z128(rr|rrk|rrkz)", + "VPERM(I2|T2)(B|D|Q)Z128(rr|rrk|rrkz)" )>; def Zn4PERMIT2_128rr:SchedWriteRes<[Zn4FPFMisc12]> { @@ -1763,11 +1763,11 @@ def Zn4PERMIT2_256: SchedWriteRes<[Zn4FPFMisc12]> { let NumMicroOps = 1; } def : InstRW<[Zn4PERMIT2_256], (instregex - "VPERM(I2|T2)(PS|PD|W)256(rr|rrk|rrkz)", + "VPERM(I2|T2)(PS|PD|W)Z256(rr|rrk|rrkz)", "VPERMP(S|D)Z256(rr|rrk|rrkz)", "V(P?)COMPRESS(B|W|D|Q|PD|PS|SD|SQ)Z256(rr|rrk|rrkz)", "VPERM(B|D|Q|W)Z256(rr|rrk|rrkz)", - "VPERM(I2|Q|T2)(B|D|Q)(Z?)256(rr|rrk|rrkz)", + "VPERM(I2|Q|T2)(B|D|Q)Z256(rr|rrk|rrkz)", "VPEXPAND(B|W)Z256(rr|rrk|rrkz)" )>; @@ -1777,12 +1777,12 @@ def Zn4PERMIT2Z: SchedWriteRes<[Zn4FPFMisc12]> { let NumMicroOps = 1; } def : InstRW<[Zn4PERMIT2Z], (instregex - "VPERM(I2|T2)(PS|PD|W)(rr|rrk|rrkz)", + "VPERM(I2|T2)(PS|PD|W)Z(rr|rrk|rrkz)", "VPERM(B|D|W)Z(rr|rrk|rrkz)", - "VPERM(I2|Q|T2)(B|D|Q)(Z?)(rr|rrk|rrkz)", + "VPERM(I2|Q|T2)(B|D|Q)Z(rr|rrk|rrkz)", "V(P?)COMPRESS(B|W|D|Q|PD|PS|SD|SQ)Z(rr|rrk|rrkz)", "VPEXPAND(B|W)Z(rr|rrk|rrkz)", - "VPERMP(S|D)Z(rr|rrk|rrkz)" + "VPERMP(S|D)Z(rr|rrk|rrkz)" )>; // ALU SLOW Misc Instructions diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index b2609f01e86a2..67b5c6cc28891 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -4285,24 +4285,24 @@ static const X86FoldTableEntry Table3[] = { {X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0}, {X86::VPERMDZ256rrkz, X86::VPERMDZ256rmkz, 0}, {X86::VPERMDZrrkz, X86::VPERMDZrmkz, 0}, - {X86::VPERMI2B128rr, X86::VPERMI2B128rm, 0}, - {X86::VPERMI2B256rr, X86::VPERMI2B256rm, 0}, - {X86::VPERMI2Brr, X86::VPERMI2Brm, 0}, - {X86::VPERMI2D128rr, X86::VPERMI2D128rm, 0}, - {X86::VPERMI2D256rr, X86::VPERMI2D256rm, 0}, - {X86::VPERMI2Drr, X86::VPERMI2Drm, 0}, - {X86::VPERMI2PD128rr, X86::VPERMI2PD128rm, 0}, - {X86::VPERMI2PD256rr, X86::VPERMI2PD256rm, 0}, - {X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0}, - {X86::VPERMI2PS128rr, X86::VPERMI2PS128rm, 0}, - {X86::VPERMI2PS256rr, X86::VPERMI2PS256rm, 0}, - {X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0}, - {X86::VPERMI2Q128rr, X86::VPERMI2Q128rm, 0}, - {X86::VPERMI2Q256rr, X86::VPERMI2Q256rm, 0}, - {X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0}, - {X86::VPERMI2W128rr, X86::VPERMI2W128rm, 0}, - {X86::VPERMI2W256rr, X86::VPERMI2W256rm, 0}, - {X86::VPERMI2Wrr, X86::VPERMI2Wrm, 0}, + {X86::VPERMI2BZ128rr, X86::VPERMI2BZ128rm, 0}, + {X86::VPERMI2BZ256rr, X86::VPERMI2BZ256rm, 0}, + {X86::VPERMI2BZrr, X86::VPERMI2BZrm, 0}, + {X86::VPERMI2DZ128rr, X86::VPERMI2DZ128rm, 0}, + {X86::VPERMI2DZ256rr, X86::VPERMI2DZ256rm, 0}, + {X86::VPERMI2DZrr, X86::VPERMI2DZrm, 0}, + {X86::VPERMI2PDZ128rr, X86::VPERMI2PDZ128rm, 0}, + {X86::VPERMI2PDZ256rr, X86::VPERMI2PDZ256rm, 0}, + {X86::VPERMI2PDZrr, X86::VPERMI2PDZrm, 0}, + {X86::VPERMI2PSZ128rr, X86::VPERMI2PSZ128rm, 0}, + {X86::VPERMI2PSZ256rr, X86::VPERMI2PSZ256rm, 0}, + {X86::VPERMI2PSZrr, X86::VPERMI2PSZrm, 0}, + {X86::VPERMI2QZ128rr, X86::VPERMI2QZ128rm, 0}, + {X86::VPERMI2QZ256rr, X86::VPERMI2QZ256rm, 0}, + {X86::VPERMI2QZrr, X86::VPERMI2QZrm, 0}, + {X86::VPERMI2WZ128rr, X86::VPERMI2WZ128rm, 0}, + {X86::VPERMI2WZ256rr, X86::VPERMI2WZ256rm, 0}, + {X86::VPERMI2WZrr, X86::VPERMI2WZrm, 0}, {X86::VPERMIL2PDYrr, X86::VPERMIL2PDYrm, 0}, {X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0}, {X86::VPERMIL2PSYrr, X86::VPERMIL2PSYrm, 0}, @@ -4329,24 +4329,24 @@ static const X86FoldTableEntry Table3[] = { {X86::VPERMQZ256rrkz, X86::VPERMQZ256rmkz, 0}, {X86::VPERMQZrik, X86::VPERMQZmik, 0}, {X86::VPERMQZrrkz, X86::VPERMQZrmkz, 0}, - {X86::VPERMT2B128rr, X86::VPERMT2B128rm, 0}, - {X86::VPERMT2B256rr, X86::VPERMT2B256rm, 0}, - {X86::VPERMT2Brr, X86::VPERMT2Brm, 0}, - {X86::VPERMT2D128rr, X86::VPERMT2D128rm, 0}, - {X86::VPERMT2D256rr, X86::VPERMT2D256rm, 0}, - {X86::VPERMT2Drr, X86::VPERMT2Drm, 0}, - {X86::VPERMT2PD128rr, X86::VPERMT2PD128rm, 0}, - {X86::VPERMT2PD256rr, X86::VPERMT2PD256rm, 0}, - {X86::VPERMT2PDrr, X86::VPERMT2PDrm, 0}, - {X86::VPERMT2PS128rr, X86::VPERMT2PS128rm, 0}, - {X86::VPERMT2PS256rr, X86::VPERMT2PS256rm, 0}, - {X86::VPERMT2PSrr, X86::VPERMT2PSrm, 0}, - {X86::VPERMT2Q128rr, X86::VPERMT2Q128rm, 0}, - {X86::VPERMT2Q256rr, X86::VPERMT2Q256rm, 0}, - {X86::VPERMT2Qrr, X86::VPERMT2Qrm, 0}, - {X86::VPERMT2W128rr, X86::VPERMT2W128rm, 0}, - {X86::VPERMT2W256rr, X86::VPERMT2W256rm, 0}, - {X86::VPERMT2Wrr, X86::VPERMT2Wrm, 0}, + {X86::VPERMT2BZ128rr, X86::VPERMT2BZ128rm, 0}, + {X86::VPERMT2BZ256rr, X86::VPERMT2BZ256rm, 0}, + {X86::VPERMT2BZrr, X86::VPERMT2BZrm, 0}, + {X86::VPERMT2DZ128rr, X86::VPERMT2DZ128rm, 0}, + {X86::VPERMT2DZ256rr, X86::VPERMT2DZ256rm, 0}, + {X86::VPERMT2DZrr, X86::VPERMT2DZrm, 0}, + {X86::VPERMT2PDZ128rr, X86::VPERMT2PDZ128rm, 0}, + {X86::VPERMT2PDZ256rr, X86::VPERMT2PDZ256rm, 0}, + {X86::VPERMT2PDZrr, X86::VPERMT2PDZrm, 0}, + {X86::VPERMT2PSZ128rr, X86::VPERMT2PSZ128rm, 0}, + {X86::VPERMT2PSZ256rr, X86::VPERMT2PSZ256rm, 0}, + {X86::VPERMT2PSZrr, X86::VPERMT2PSZrm, 0}, + {X86::VPERMT2QZ128rr, X86::VPERMT2QZ128rm, 0}, + {X86::VPERMT2QZ256rr, X86::VPERMT2QZ256rm, 0}, + {X86::VPERMT2QZrr, X86::VPERMT2QZrm, 0}, + {X86::VPERMT2WZ128rr, X86::VPERMT2WZ128rm, 0}, + {X86::VPERMT2WZ256rr, X86::VPERMT2WZ256rm, 0}, + {X86::VPERMT2WZrr, X86::VPERMT2WZrm, 0}, {X86::VPERMWZ128rrkz, X86::VPERMWZ128rmkz, 0}, {X86::VPERMWZ256rrkz, X86::VPERMWZ256rmkz, 0}, {X86::VPERMWZrrkz, X86::VPERMWZrmkz, 0}, @@ -5561,42 +5561,42 @@ static const X86FoldTableEntry Table4[] = { {X86::VPERMBZrrk, X86::VPERMBZrmk, 0}, {X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0}, {X86::VPERMDZrrk, X86::VPERMDZrmk, 0}, - {X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0}, - {X86::VPERMI2B128rrkz, X86::VPERMI2B128rmkz, 0}, - {X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0}, - {X86::VPERMI2B256rrkz, X86::VPERMI2B256rmkz, 0}, - {X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0}, - {X86::VPERMI2Brrkz, X86::VPERMI2Brmkz, 0}, - {X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0}, - {X86::VPERMI2D128rrkz, X86::VPERMI2D128rmkz, 0}, - {X86::VPERMI2D256rrk, X86::VPERMI2D256rmk, 0}, - {X86::VPERMI2D256rrkz, X86::VPERMI2D256rmkz, 0}, - {X86::VPERMI2Drrk, X86::VPERMI2Drmk, 0}, - {X86::VPERMI2Drrkz, X86::VPERMI2Drmkz, 0}, - {X86::VPERMI2PD128rrk, X86::VPERMI2PD128rmk, 0}, - {X86::VPERMI2PD128rrkz, X86::VPERMI2PD128rmkz, 0}, - {X86::VPERMI2PD256rrk, X86::VPERMI2PD256rmk, 0}, - {X86::VPERMI2PD256rrkz, X86::VPERMI2PD256rmkz, 0}, - {X86::VPERMI2PDrrk, X86::VPERMI2PDrmk, 0}, - {X86::VPERMI2PDrrkz, X86::VPERMI2PDrmkz, 0}, - {X86::VPERMI2PS128rrk, X86::VPERMI2PS128rmk, 0}, - {X86::VPERMI2PS128rrkz, X86::VPERMI2PS128rmkz, 0}, - {X86::VPERMI2PS256rrk, X86::VPERMI2PS256rmk, 0}, - {X86::VPERMI2PS256rrkz, X86::VPERMI2PS256rmkz, 0}, - {X86::VPERMI2PSrrk, X86::VPERMI2PSrmk, 0}, - {X86::VPERMI2PSrrkz, X86::VPERMI2PSrmkz, 0}, - {X86::VPERMI2Q128rrk, X86::VPERMI2Q128rmk, 0}, - {X86::VPERMI2Q128rrkz, X86::VPERMI2Q128rmkz, 0}, - {X86::VPERMI2Q256rrk, X86::VPERMI2Q256rmk, 0}, - {X86::VPERMI2Q256rrkz, X86::VPERMI2Q256rmkz, 0}, - {X86::VPERMI2Qrrk, X86::VPERMI2Qrmk, 0}, - {X86::VPERMI2Qrrkz, X86::VPERMI2Qrmkz, 0}, - {X86::VPERMI2W128rrk, X86::VPERMI2W128rmk, 0}, - {X86::VPERMI2W128rrkz, X86::VPERMI2W128rmkz, 0}, - {X86::VPERMI2W256rrk, X86::VPERMI2W256rmk, 0}, - {X86::VPERMI2W256rrkz, X86::VPERMI2W256rmkz, 0}, - {X86::VPERMI2Wrrk, X86::VPERMI2Wrmk, 0}, - {X86::VPERMI2Wrrkz, X86::VPERMI2Wrmkz, 0}, + {X86::VPERMI2BZ128rrk, X86::VPERMI2BZ128rmk, 0}, + {X86::VPERMI2BZ128rrkz, X86::VPERMI2BZ128rmkz, 0}, + {X86::VPERMI2BZ256rrk, X86::VPERMI2BZ256rmk, 0}, + {X86::VPERMI2BZ256rrkz, X86::VPERMI2BZ256rmkz, 0}, + {X86::VPERMI2BZrrk, X86::VPERMI2BZrmk, 0}, + {X86::VPERMI2BZrrkz, X86::VPERMI2BZrmkz, 0}, + {X86::VPERMI2DZ128rrk, X86::VPERMI2DZ128rmk, 0}, + {X86::VPERMI2DZ128rrkz, X86::VPERMI2DZ128rmkz, 0}, + {X86::VPERMI2DZ256rrk, X86::VPERMI2DZ256rmk, 0}, + {X86::VPERMI2DZ256rrkz, X86::VPERMI2DZ256rmkz, 0}, + {X86::VPERMI2DZrrk, X86::VPERMI2DZrmk, 0}, + {X86::VPERMI2DZrrkz, X86::VPERMI2DZrmkz, 0}, + {X86::VPERMI2PDZ128rrk, X86::VPERMI2PDZ128rmk, 0}, + {X86::VPERMI2PDZ128rrkz, X86::VPERMI2PDZ128rmkz, 0}, + {X86::VPERMI2PDZ256rrk, X86::VPERMI2PDZ256rmk, 0}, + {X86::VPERMI2PDZ256rrkz, X86::VPERMI2PDZ256rmkz, 0}, + {X86::VPERMI2PDZrrk, X86::VPERMI2PDZrmk, 0}, + {X86::VPERMI2PDZrrkz, X86::VPERMI2PDZrmkz, 0}, + {X86::VPERMI2PSZ128rrk, X86::VPERMI2PSZ128rmk, 0}, + {X86::VPERMI2PSZ128rrkz, X86::VPERMI2PSZ128rmkz, 0}, + {X86::VPERMI2PSZ256rrk, X86::VPERMI2PSZ256rmk, 0}, + {X86::VPERMI2PSZ256rrkz, X86::VPERMI2PSZ256rmkz, 0}, + {X86::VPERMI2PSZrrk, X86::VPERMI2PSZrmk, 0}, + {X86::VPERMI2PSZrrkz, X86::VPERMI2PSZrmkz, 0}, + {X86::VPERMI2QZ128rrk, X86::VPERMI2QZ128rmk, 0}, + {X86::VPERMI2QZ128rrkz, X86::VPERMI2QZ128rmkz, 0}, + {X86::VPERMI2QZ256rrk, X86::VPERMI2QZ256rmk, 0}, + {X86::VPERMI2QZ256rrkz, X86::VPERMI2QZ256rmkz, 0}, + {X86::VPERMI2QZrrk, X86::VPERMI2QZrmk, 0}, + {X86::VPERMI2QZrrkz, X86::VPERMI2QZrmkz, 0}, + {X86::VPERMI2WZ128rrk, X86::VPERMI2WZ128rmk, 0}, + {X86::VPERMI2WZ128rrkz, X86::VPERMI2WZ128rmkz, 0}, + {X86::VPERMI2WZ256rrk, X86::VPERMI2WZ256rmk, 0}, + {X86::VPERMI2WZ256rrkz, X86::VPERMI2WZ256rmkz, 0}, + {X86::VPERMI2WZrrk, X86::VPERMI2WZrmk, 0}, + {X86::VPERMI2WZrrkz, X86::VPERMI2WZrmkz, 0}, {X86::VPERMILPDZ128rrk, X86::VPERMILPDZ128rmk, 0}, {X86::VPERMILPDZ256rrk, X86::VPERMILPDZ256rmk, 0}, {X86::VPERMILPDZrrk, X86::VPERMILPDZrmk, 0}, @@ -5609,42 +5609,42 @@ static const X86FoldTableEntry Table4[] = { {X86::VPERMPSZrrk, X86::VPERMPSZrmk, 0}, {X86::VPERMQZ256rrk, X86::VPERMQZ256rmk, 0}, {X86::VPERMQZrrk, X86::VPERMQZrmk, 0}, - {X86::VPERMT2B128rrk, X86::VPERMT2B128rmk, 0}, - {X86::VPERMT2B128rrkz, X86::VPERMT2B128rmkz, 0}, - {X86::VPERMT2B256rrk, X86::VPERMT2B256rmk, 0}, - {X86::VPERMT2B256rrkz, X86::VPERMT2B256rmkz, 0}, - {X86::VPERMT2Brrk, X86::VPERMT2Brmk, 0}, - {X86::VPERMT2Brrkz, X86::VPERMT2Brmkz, 0}, - {X86::VPERMT2D128rrk, X86::VPERMT2D128rmk, 0}, - {X86::VPERMT2D128rrkz, X86::VPERMT2D128rmkz, 0}, - {X86::VPERMT2D256rrk, X86::VPERMT2D256rmk, 0}, - {X86::VPERMT2D256rrkz, X86::VPERMT2D256rmkz, 0}, - {X86::VPERMT2Drrk, X86::VPERMT2Drmk, 0}, - {X86::VPERMT2Drrkz, X86::VPERMT2Drmkz, 0}, - {X86::VPERMT2PD128rrk, X86::VPERMT2PD128rmk, 0}, - {X86::VPERMT2PD128rrkz, X86::VPERMT2PD128rmkz, 0}, - {X86::VPERMT2PD256rrk, X86::VPERMT2PD256rmk, 0}, - {X86::VPERMT2PD256rrkz, X86::VPERMT2PD256rmkz, 0}, - {X86::VPERMT2PDrrk, X86::VPERMT2PDrmk, 0}, - {X86::VPERMT2PDrrkz, X86::VPERMT2PDrmkz, 0}, - {X86::VPERMT2PS128rrk, X86::VPERMT2PS128rmk, 0}, - {X86::VPERMT2PS128rrkz, X86::VPERMT2PS128rmkz, 0}, - {X86::VPERMT2PS256rrk, X86::VPERMT2PS256rmk, 0}, - {X86::VPERMT2PS256rrkz, X86::VPERMT2PS256rmkz, 0}, - {X86::VPERMT2PSrrk, X86::VPERMT2PSrmk, 0}, - {X86::VPERMT2PSrrkz, X86::VPERMT2PSrmkz, 0}, - {X86::VPERMT2Q128rrk, X86::VPERMT2Q128rmk, 0}, - {X86::VPERMT2Q128rrkz, X86::VPERMT2Q128rmkz, 0}, - {X86::VPERMT2Q256rrk, X86::VPERMT2Q256rmk, 0}, - {X86::VPERMT2Q256rrkz, X86::VPERMT2Q256rmkz, 0}, - {X86::VPERMT2Qrrk, X86::VPERMT2Qrmk, 0}, - {X86::VPERMT2Qrrkz, X86::VPERMT2Qrmkz, 0}, - {X86::VPERMT2W128rrk, X86::VPERMT2W128rmk, 0}, - {X86::VPERMT2W128rrkz, X86::VPERMT2W128rmkz, 0}, - {X86::VPERMT2W256rrk, X86::VPERMT2W256rmk, 0}, - {X86::VPERMT2W256rrkz, X86::VPERMT2W256rmkz, 0}, - {X86::VPERMT2Wrrk, X86::VPERMT2Wrmk, 0}, - {X86::VPERMT2Wrrkz, X86::VPERMT2Wrmkz, 0}, + {X86::VPERMT2BZ128rrk, X86::VPERMT2BZ128rmk, 0}, + {X86::VPERMT2BZ128rrkz, X86::VPERMT2BZ128rmkz, 0}, + {X86::VPERMT2BZ256rrk, X86::VPERMT2BZ256rmk, 0}, + {X86::VPERMT2BZ256rrkz, X86::VPERMT2BZ256rmkz, 0}, + {X86::VPERMT2BZrrk, X86::VPERMT2BZrmk, 0}, + {X86::VPERMT2BZrrkz, X86::VPERMT2BZrmkz, 0}, + {X86::VPERMT2DZ128rrk, X86::VPERMT2DZ128rmk, 0}, + {X86::VPERMT2DZ128rrkz, X86::VPERMT2DZ128rmkz, 0}, + {X86::VPERMT2DZ256rrk, X86::VPERMT2DZ256rmk, 0}, + {X86::VPERMT2DZ256rrkz, X86::VPERMT2DZ256rmkz, 0}, + {X86::VPERMT2DZrrk, X86::VPERMT2DZrmk, 0}, + {X86::VPERMT2DZrrkz, X86::VPERMT2DZrmkz, 0}, + {X86::VPERMT2PDZ128rrk, X86::VPERMT2PDZ128rmk, 0}, + {X86::VPERMT2PDZ128rrkz, X86::VPERMT2PDZ128rmkz, 0}, + {X86::VPERMT2PDZ256rrk, X86::VPERMT2PDZ256rmk, 0}, + {X86::VPERMT2PDZ256rrkz, X86::VPERMT2PDZ256rmkz, 0}, + {X86::VPERMT2PDZrrk, X86::VPERMT2PDZrmk, 0}, + {X86::VPERMT2PDZrrkz, X86::VPERMT2PDZrmkz, 0}, + {X86::VPERMT2PSZ128rrk, X86::VPERMT2PSZ128rmk, 0}, + {X86::VPERMT2PSZ128rrkz, X86::VPERMT2PSZ128rmkz, 0}, + {X86::VPERMT2PSZ256rrk, X86::VPERMT2PSZ256rmk, 0}, + {X86::VPERMT2PSZ256rrkz, X86::VPERMT2PSZ256rmkz, 0}, + {X86::VPERMT2PSZrrk, X86::VPERMT2PSZrmk, 0}, + {X86::VPERMT2PSZrrkz, X86::VPERMT2PSZrmkz, 0}, + {X86::VPERMT2QZ128rrk, X86::VPERMT2QZ128rmk, 0}, + {X86::VPERMT2QZ128rrkz, X86::VPERMT2QZ128rmkz, 0}, + {X86::VPERMT2QZ256rrk, X86::VPERMT2QZ256rmk, 0}, + {X86::VPERMT2QZ256rrkz, X86::VPERMT2QZ256rmkz, 0}, + {X86::VPERMT2QZrrk, X86::VPERMT2QZrmk, 0}, + {X86::VPERMT2QZrrkz, X86::VPERMT2QZrmkz, 0}, + {X86::VPERMT2WZ128rrk, X86::VPERMT2WZ128rmk, 0}, + {X86::VPERMT2WZ128rrkz, X86::VPERMT2WZ128rmkz, 0}, + {X86::VPERMT2WZ256rrk, X86::VPERMT2WZ256rmk, 0}, + {X86::VPERMT2WZ256rrkz, X86::VPERMT2WZ256rmkz, 0}, + {X86::VPERMT2WZrrk, X86::VPERMT2WZrmk, 0}, + {X86::VPERMT2WZrrkz, X86::VPERMT2WZrmkz, 0}, {X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0}, {X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0}, {X86::VPERMWZrrk, X86::VPERMWZrmk, 0}, @@ -7523,18 +7523,18 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VPDPWSSDZr, X86::VPDPWSSDZmb, TB_BCAST_D}, {X86::VPERMDZ256rrkz, X86::VPERMDZ256rmbkz, TB_BCAST_D}, {X86::VPERMDZrrkz, X86::VPERMDZrmbkz, TB_BCAST_D}, - {X86::VPERMI2D128rr, X86::VPERMI2D128rmb, TB_BCAST_D}, - {X86::VPERMI2D256rr, X86::VPERMI2D256rmb, TB_BCAST_D}, - {X86::VPERMI2Drr, X86::VPERMI2Drmb, TB_BCAST_D}, - {X86::VPERMI2PD128rr, X86::VPERMI2PD128rmb, TB_BCAST_SD}, - {X86::VPERMI2PD256rr, X86::VPERMI2PD256rmb, TB_BCAST_SD}, - {X86::VPERMI2PDrr, X86::VPERMI2PDrmb, TB_BCAST_SD}, - {X86::VPERMI2PS128rr, X86::VPERMI2PS128rmb, TB_BCAST_SS}, - {X86::VPERMI2PS256rr, X86::VPERMI2PS256rmb, TB_BCAST_SS}, - {X86::VPERMI2PSrr, X86::VPERMI2PSrmb, TB_BCAST_SS}, - {X86::VPERMI2Q128rr, X86::VPERMI2Q128rmb, TB_BCAST_Q}, - {X86::VPERMI2Q256rr, X86::VPERMI2Q256rmb, TB_BCAST_Q}, - {X86::VPERMI2Qrr, X86::VPERMI2Qrmb, TB_BCAST_Q}, + {X86::VPERMI2DZ128rr, X86::VPERMI2DZ128rmb, TB_BCAST_D}, + {X86::VPERMI2DZ256rr, X86::VPERMI2DZ256rmb, TB_BCAST_D}, + {X86::VPERMI2DZrr, X86::VPERMI2DZrmb, TB_BCAST_D}, + {X86::VPERMI2PDZ128rr, X86::VPERMI2PDZ128rmb, TB_BCAST_SD}, + {X86::VPERMI2PDZ256rr, X86::VPERMI2PDZ256rmb, TB_BCAST_SD}, + {X86::VPERMI2PDZrr, X86::VPERMI2PDZrmb, TB_BCAST_SD}, + {X86::VPERMI2PSZ128rr, X86::VPERMI2PSZ128rmb, TB_BCAST_SS}, + {X86::VPERMI2PSZ256rr, X86::VPERMI2PSZ256rmb, TB_BCAST_SS}, + {X86::VPERMI2PSZrr, X86::VPERMI2PSZrmb, TB_BCAST_SS}, + {X86::VPERMI2QZ128rr, X86::VPERMI2QZ128rmb, TB_BCAST_Q}, + {X86::VPERMI2QZ256rr, X86::VPERMI2QZ256rmb, TB_BCAST_Q}, + {X86::VPERMI2QZrr, X86::VPERMI2QZrmb, TB_BCAST_Q}, {X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mbik, TB_BCAST_SD}, {X86::VPERMILPDZ128rrkz, X86::VPERMILPDZ128rmbkz, TB_BCAST_SD}, {X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mbik, TB_BCAST_SD}, @@ -7557,18 +7557,18 @@ static const X86FoldTableEntry BroadcastTable3[] = { {X86::VPERMQZ256rrkz, X86::VPERMQZ256rmbkz, TB_BCAST_Q}, {X86::VPERMQZrik, X86::VPERMQZmbik, TB_BCAST_Q}, {X86::VPERMQZrrkz, X86::VPERMQZrmbkz, TB_BCAST_Q}, - {X86::VPERMT2D128rr, X86::VPERMT2D128rmb, TB_BCAST_D}, - {X86::VPERMT2D256rr, X86::VPERMT2D256rmb, TB_BCAST_D}, - {X86::VPERMT2Drr, X86::VPERMT2Drmb, TB_BCAST_D}, - {X86::VPERMT2PD128rr, X86::VPERMT2PD128rmb, TB_BCAST_SD}, - {X86::VPERMT2PD256rr, X86::VPERMT2PD256rmb, TB_BCAST_SD}, - {X86::VPERMT2PDrr, X86::VPERMT2PDrmb, TB_BCAST_SD}, - {X86::VPERMT2PS128rr, X86::VPERMT2PS128rmb, TB_BCAST_SS}, - {X86::VPERMT2PS256rr, X86::VPERMT2PS256rmb, TB_BCAST_SS}, - {X86::VPERMT2PSrr, X86::VPERMT2PSrmb, TB_BCAST_SS}, - {X86::VPERMT2Q128rr, X86::VPERMT2Q128rmb, TB_BCAST_Q}, - {X86::VPERMT2Q256rr, X86::VPERMT2Q256rmb, TB_BCAST_Q}, - {X86::VPERMT2Qrr, X86::VPERMT2Qrmb, TB_BCAST_Q}, + {X86::VPERMT2DZ128rr, X86::VPERMT2DZ128rmb, TB_BCAST_D}, + {X86::VPERMT2DZ256rr, X86::VPERMT2DZ256rmb, TB_BCAST_D}, + {X86::VPERMT2DZrr, X86::VPERMT2DZrmb, TB_BCAST_D}, + {X86::VPERMT2PDZ128rr, X86::VPERMT2PDZ128rmb, TB_BCAST_SD}, + {X86::VPERMT2PDZ256rr, X86::VPERMT2PDZ256rmb, TB_BCAST_SD}, + {X86::VPERMT2PDZrr, X86::VPERMT2PDZrmb, TB_BCAST_SD}, + {X86::VPERMT2PSZ128rr, X86::VPERMT2PSZ128rmb, TB_BCAST_SS}, + {X86::VPERMT2PSZ256rr, X86::VPERMT2PSZ256rmb, TB_BCAST_SS}, + {X86::VPERMT2PSZrr, X86::VPERMT2PSZrmb, TB_BCAST_SS}, + {X86::VPERMT2QZ128rr, X86::VPERMT2QZ128rmb, TB_BCAST_Q}, + {X86::VPERMT2QZ256rr, X86::VPERMT2QZ256rmb, TB_BCAST_Q}, + {X86::VPERMT2QZrr, X86::VPERMT2QZrmb, TB_BCAST_Q}, {X86::VPLZCNTDZ128rrk, X86::VPLZCNTDZ128rmbk, TB_BCAST_D}, {X86::VPLZCNTDZ256rrk, X86::VPLZCNTDZ256rmbk, TB_BCAST_D}, {X86::VPLZCNTDZrrk, X86::VPLZCNTDZrmbk, TB_BCAST_D}, @@ -8375,30 +8375,30 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VPDPWSSDZrkz, X86::VPDPWSSDZmbkz, TB_BCAST_D}, {X86::VPERMDZ256rrk, X86::VPERMDZ256rmbk, TB_BCAST_D}, {X86::VPERMDZrrk, X86::VPERMDZrmbk, TB_BCAST_D}, - {X86::VPERMI2D128rrk, X86::VPERMI2D128rmbk, TB_BCAST_D}, - {X86::VPERMI2D128rrkz, X86::VPERMI2D128rmbkz, TB_BCAST_D}, - {X86::VPERMI2D256rrk, X86::VPERMI2D256rmbk, TB_BCAST_D}, - {X86::VPERMI2D256rrkz, X86::VPERMI2D256rmbkz, TB_BCAST_D}, - {X86::VPERMI2Drrk, X86::VPERMI2Drmbk, TB_BCAST_D}, - {X86::VPERMI2Drrkz, X86::VPERMI2Drmbkz, TB_BCAST_D}, - {X86::VPERMI2PD128rrk, X86::VPERMI2PD128rmbk, TB_BCAST_SD}, - {X86::VPERMI2PD128rrkz, X86::VPERMI2PD128rmbkz, TB_BCAST_SD}, - {X86::VPERMI2PD256rrk, X86::VPERMI2PD256rmbk, TB_BCAST_SD}, - {X86::VPERMI2PD256rrkz, X86::VPERMI2PD256rmbkz, TB_BCAST_SD}, - {X86::VPERMI2PDrrk, X86::VPERMI2PDrmbk, TB_BCAST_SD}, - {X86::VPERMI2PDrrkz, X86::VPERMI2PDrmbkz, TB_BCAST_SD}, - {X86::VPERMI2PS128rrk, X86::VPERMI2PS128rmbk, TB_BCAST_SS}, - {X86::VPERMI2PS128rrkz, X86::VPERMI2PS128rmbkz, TB_BCAST_SS}, - {X86::VPERMI2PS256rrk, X86::VPERMI2PS256rmbk, TB_BCAST_SS}, - {X86::VPERMI2PS256rrkz, X86::VPERMI2PS256rmbkz, TB_BCAST_SS}, - {X86::VPERMI2PSrrk, X86::VPERMI2PSrmbk, TB_BCAST_SS}, - {X86::VPERMI2PSrrkz, X86::VPERMI2PSrmbkz, TB_BCAST_SS}, - {X86::VPERMI2Q128rrk, X86::VPERMI2Q128rmbk, TB_BCAST_Q}, - {X86::VPERMI2Q128rrkz, X86::VPERMI2Q128rmbkz, TB_BCAST_Q}, - {X86::VPERMI2Q256rrk, X86::VPERMI2Q256rmbk, TB_BCAST_Q}, - {X86::VPERMI2Q256rrkz, X86::VPERMI2Q256rmbkz, TB_BCAST_Q}, - {X86::VPERMI2Qrrk, X86::VPERMI2Qrmbk, TB_BCAST_Q}, - {X86::VPERMI2Qrrkz, X86::VPERMI2Qrmbkz, TB_BCAST_Q}, + {X86::VPERMI2DZ128rrk, X86::VPERMI2DZ128rmbk, TB_BCAST_D}, + {X86::VPERMI2DZ128rrkz, X86::VPERMI2DZ128rmbkz, TB_BCAST_D}, + {X86::VPERMI2DZ256rrk, X86::VPERMI2DZ256rmbk, TB_BCAST_D}, + {X86::VPERMI2DZ256rrkz, X86::VPERMI2DZ256rmbkz, TB_BCAST_D}, + {X86::VPERMI2DZrrk, X86::VPERMI2DZrmbk, TB_BCAST_D}, + {X86::VPERMI2DZrrkz, X86::VPERMI2DZrmbkz, TB_BCAST_D}, + {X86::VPERMI2PDZ128rrk, X86::VPERMI2PDZ128rmbk, TB_BCAST_SD}, + {X86::VPERMI2PDZ128rrkz, X86::VPERMI2PDZ128rmbkz, TB_BCAST_SD}, + {X86::VPERMI2PDZ256rrk, X86::VPERMI2PDZ256rmbk, TB_BCAST_SD}, + {X86::VPERMI2PDZ256rrkz, X86::VPERMI2PDZ256rmbkz, TB_BCAST_SD}, + {X86::VPERMI2PDZrrk, X86::VPERMI2PDZrmbk, TB_BCAST_SD}, + {X86::VPERMI2PDZrrkz, X86::VPERMI2PDZrmbkz, TB_BCAST_SD}, + {X86::VPERMI2PSZ128rrk, X86::VPERMI2PSZ128rmbk, TB_BCAST_SS}, + {X86::VPERMI2PSZ128rrkz, X86::VPERMI2PSZ128rmbkz, TB_BCAST_SS}, + {X86::VPERMI2PSZ256rrk, X86::VPERMI2PSZ256rmbk, TB_BCAST_SS}, + {X86::VPERMI2PSZ256rrkz, X86::VPERMI2PSZ256rmbkz, TB_BCAST_SS}, + {X86::VPERMI2PSZrrk, X86::VPERMI2PSZrmbk, TB_BCAST_SS}, + {X86::VPERMI2PSZrrkz, X86::VPERMI2PSZrmbkz, TB_BCAST_SS}, + {X86::VPERMI2QZ128rrk, X86::VPERMI2QZ128rmbk, TB_BCAST_Q}, + {X86::VPERMI2QZ128rrkz, X86::VPERMI2QZ128rmbkz, TB_BCAST_Q}, + {X86::VPERMI2QZ256rrk, X86::VPERMI2QZ256rmbk, TB_BCAST_Q}, + {X86::VPERMI2QZ256rrkz, X86::VPERMI2QZ256rmbkz, TB_BCAST_Q}, + {X86::VPERMI2QZrrk, X86::VPERMI2QZrmbk, TB_BCAST_Q}, + {X86::VPERMI2QZrrkz, X86::VPERMI2QZrmbkz, TB_BCAST_Q}, {X86::VPERMILPDZ128rrk, X86::VPERMILPDZ128rmbk, TB_BCAST_SD}, {X86::VPERMILPDZ256rrk, X86::VPERMILPDZ256rmbk, TB_BCAST_SD}, {X86::VPERMILPDZrrk, X86::VPERMILPDZrmbk, TB_BCAST_SD}, @@ -8411,30 +8411,30 @@ static const X86FoldTableEntry BroadcastTable4[] = { {X86::VPERMPSZrrk, X86::VPERMPSZrmbk, TB_BCAST_SS}, {X86::VPERMQZ256rrk, X86::VPERMQZ256rmbk, TB_BCAST_Q}, {X86::VPERMQZrrk, X86::VPERMQZrmbk, TB_BCAST_Q}, - {X86::VPERMT2D128rrk, X86::VPERMT2D128rmbk, TB_BCAST_D}, - {X86::VPERMT2D128rrkz, X86::VPERMT2D128rmbkz, TB_BCAST_D}, - {X86::VPERMT2D256rrk, X86::VPERMT2D256rmbk, TB_BCAST_D}, - {X86::VPERMT2D256rrkz, X86::VPERMT2D256rmbkz, TB_BCAST_D}, - {X86::VPERMT2Drrk, X86::VPERMT2Drmbk, TB_BCAST_D}, - {X86::VPERMT2Drrkz, X86::VPERMT2Drmbkz, TB_BCAST_D}, - {X86::VPERMT2PD128rrk, X86::VPERMT2PD128rmbk, TB_BCAST_SD}, - {X86::VPERMT2PD128rrkz, X86::VPERMT2PD128rmbkz, TB_BCAST_SD}, - {X86::VPERMT2PD256rrk, X86::VPERMT2PD256rmbk, TB_BCAST_SD}, - {X86::VPERMT2PD256rrkz, X86::VPERMT2PD256rmbkz, TB_BCAST_SD}, - {X86::VPERMT2PDrrk, X86::VPERMT2PDrmbk, TB_BCAST_SD}, - {X86::VPERMT2PDrrkz, X86::VPERMT2PDrmbkz, TB_BCAST_SD}, - {X86::VPERMT2PS128rrk, X86::VPERMT2PS128rmbk, TB_BCAST_SS}, - {X86::VPERMT2PS128rrkz, X86::VPERMT2PS128rmbkz, TB_BCAST_SS}, - {X86::VPERMT2PS256rrk, X86::VPERMT2PS256rmbk, TB_BCAST_SS}, - {X86::VPERMT2PS256rrkz, X86::VPERMT2PS256rmbkz, TB_BCAST_SS}, - {X86::VPERMT2PSrrk, X86::VPERMT2PSrmbk, TB_BCAST_SS}, - {X86::VPERMT2PSrrkz, X86::VPERMT2PSrmbkz, TB_BCAST_SS}, - {X86::VPERMT2Q128rrk, X86::VPERMT2Q128rmbk, TB_BCAST_Q}, - {X86::VPERMT2Q128rrkz, X86::VPERMT2Q128rmbkz, TB_BCAST_Q}, - {X86::VPERMT2Q256rrk, X86::VPERMT2Q256rmbk, TB_BCAST_Q}, - {X86::VPERMT2Q256rrkz, X86::VPERMT2Q256rmbkz, TB_BCAST_Q}, - {X86::VPERMT2Qrrk, X86::VPERMT2Qrmbk, TB_BCAST_Q}, - {X86::VPERMT2Qrrkz, X86::VPERMT2Qrmbkz, TB_BCAST_Q}, + {X86::VPERMT2DZ128rrk, X86::VPERMT2DZ128rmbk, TB_BCAST_D}, + {X86::VPERMT2DZ128rrkz, X86::VPERMT2DZ128rmbkz, TB_BCAST_D}, + {X86::VPERMT2DZ256rrk, X86::VPERMT2DZ256rmbk, TB_BCAST_D}, + {X86::VPERMT2DZ256rrkz, X86::VPERMT2DZ256rmbkz, TB_BCAST_D}, + {X86::VPERMT2DZrrk, X86::VPERMT2DZrmbk, TB_BCAST_D}, + {X86::VPERMT2DZrrkz, X86::VPERMT2DZrmbkz, TB_BCAST_D}, + {X86::VPERMT2PDZ128rrk, X86::VPERMT2PDZ128rmbk, TB_BCAST_SD}, + {X86::VPERMT2PDZ128rrkz, X86::VPERMT2PDZ128rmbkz, TB_BCAST_SD}, + {X86::VPERMT2PDZ256rrk, X86::VPERMT2PDZ256rmbk, TB_BCAST_SD}, + {X86::VPERMT2PDZ256rrkz, X86::VPERMT2PDZ256rmbkz, TB_BCAST_SD}, + {X86::VPERMT2PDZrrk, X86::VPERMT2PDZrmbk, TB_BCAST_SD}, + {X86::VPERMT2PDZrrkz, X86::VPERMT2PDZrmbkz, TB_BCAST_SD}, + {X86::VPERMT2PSZ128rrk, X86::VPERMT2PSZ128rmbk, TB_BCAST_SS}, + {X86::VPERMT2PSZ128rrkz, X86::VPERMT2PSZ128rmbkz, TB_BCAST_SS}, + {X86::VPERMT2PSZ256rrk, X86::VPERMT2PSZ256rmbk, TB_BCAST_SS}, + {X86::VPERMT2PSZ256rrkz, X86::VPERMT2PSZ256rmbkz, TB_BCAST_SS}, + {X86::VPERMT2PSZrrk, X86::VPERMT2PSZrmbk, TB_BCAST_SS}, + {X86::VPERMT2PSZrrkz, X86::VPERMT2PSZrmbkz, TB_BCAST_SS}, + {X86::VPERMT2QZ128rrk, X86::VPERMT2QZ128rmbk, TB_BCAST_Q}, + {X86::VPERMT2QZ128rrkz, X86::VPERMT2QZ128rmbkz, TB_BCAST_Q}, + {X86::VPERMT2QZ256rrk, X86::VPERMT2QZ256rmbk, TB_BCAST_Q}, + {X86::VPERMT2QZ256rrkz, X86::VPERMT2QZ256rmbkz, TB_BCAST_Q}, + {X86::VPERMT2QZrrk, X86::VPERMT2QZrmbk, TB_BCAST_Q}, + {X86::VPERMT2QZrrkz, X86::VPERMT2QZrmbkz, TB_BCAST_Q}, {X86::VPMADD52HUQZ128rk, X86::VPMADD52HUQZ128mbk, TB_BCAST_Q}, {X86::VPMADD52HUQZ128rkz, X86::VPMADD52HUQZ128mbkz, TB_BCAST_Q}, {X86::VPMADD52HUQZ256rk, X86::VPMADD52HUQZ256mbk, TB_BCAST_Q}, diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp index 5fb6b048542bd..83025205310e8 100644 --- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -530,26 +530,17 @@ void X86FoldTablesEmitter::addBroadcastEntry( StringRef MemInstName = MemInst->TheDef->getName(); Record *Domain = RegRec->getValueAsDef("ExeDomain"); bool IsSSEPackedInt = Domain->getName() == "SSEPackedInt"; - // TODO: Rename AVX512 instructions to simplify conditions, e.g. - // D128 -> DZ128 - // D256 -> DZ256 - // VPERMI2Drr -> VPERMI2DZrr - // VPERMI2Drmb -> VPERMI2DZrmb if ((RegInstName.contains("DZ") || RegInstName.contains("DWZ") || - RegInstName.contains("D128") || RegInstName.contains("D256") || RegInstName.contains("Dr") || RegInstName.contains("I32")) && IsSSEPackedInt) { assert((MemInstName.contains("DZ") || RegInstName.contains("DWZ") || - MemInstName.contains("D128") || MemInstName.contains("D256") || MemInstName.contains("Dr") || MemInstName.contains("I32")) && "Unmatched names for broadcast"); Result.BroadcastKind = X86FoldTableEntry::BCAST_D; } else if ((RegInstName.contains("QZ") || RegInstName.contains("QBZ") || - RegInstName.contains("Q128") || RegInstName.contains("Q256") || RegInstName.contains("Qr") || RegInstName.contains("I64")) && IsSSEPackedInt) { assert((MemInstName.contains("QZ") || MemInstName.contains("QBZ") || - MemInstName.contains("Q128") || MemInstName.contains("Q256") || MemInstName.contains("Qr") || MemInstName.contains("I64")) && "Unmatched names for broadcast"); Result.BroadcastKind = X86FoldTableEntry::BCAST_Q;