diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 9f0b5f32df20a..48d3b68b1823a 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -2230,7 +2230,7 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { unsigned CmpOpcode = (RetVT == MVT::f32) ? X86::VCMPSSrri : X86::VCMPSDrri; unsigned BlendOpcode = - (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr; + (RetVT == MVT::f32) ? X86::VBLENDVPSrrr : X86::VBLENDVPDrrr; Register CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpRHSReg, CC); diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 4a542b7e5a1bb..69d45366a1dbc 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -6266,27 +6266,27 @@ multiclass SS41I_quaternary_avx opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, ValueType VT, PatFrag mem_frag, SDNode OpNode, X86FoldableSchedWrite sched> { - def rr : Ii8Reg, TA, PD, VEX, VVVV, - Sched<[sched]>; + def rrr : Ii8Reg, TA, PD, VEX, VVVV, + Sched<[sched]>; - def rm : Ii8Reg, TA, PD, VEX, VVVV, - Sched<[sched.Folded, sched.ReadAfterFold, - // x86memop:$src2 - ReadDefault, ReadDefault, ReadDefault, ReadDefault, - ReadDefault, - // RC::$src3 - sched.ReadAfterFold]>; + def rmr : Ii8Reg, TA, PD, VEX, VVVV, + Sched<[sched.Folded, sched.ReadAfterFold, + // x86memop:$src2 + ReadDefault, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, + // RC::$src3 + sched.ReadAfterFold]>; } let Predicates = [HasAVX] in { @@ -6320,16 +6320,16 @@ defm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem, let Predicates = [HasAVX] in { def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1), (v4i32 VR128:$src2))), - (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; + (VBLENDVPSrrr VR128:$src2, VR128:$src1, VR128:$mask)>; def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1), (v2i64 VR128:$src2))), - (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; + (VBLENDVPDrrr VR128:$src2, VR128:$src1, VR128:$mask)>; def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1), (v8i32 VR256:$src2))), - (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + (VBLENDVPSYrrr VR256:$src2, VR256:$src1, VR256:$mask)>; def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1), (v4i64 VR256:$src2))), - (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + (VBLENDVPDYrrr VR256:$src2, VR256:$src1, VR256:$mask)>; } // Prefer a movss or movsd over a blendps when optimizing for size. these were diff --git a/llvm/lib/Target/X86/X86SchedAlderlakeP.td b/llvm/lib/Target/X86/X86SchedAlderlakeP.td index 4dc5ea3c86112..6f9d2cf7ffdf4 100644 --- a/llvm/lib/Target/X86/X86SchedAlderlakeP.td +++ b/llvm/lib/Target/X86/X86SchedAlderlakeP.td @@ -2158,16 +2158,16 @@ def ADLPWriteResGroup244 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_11]> { let Latency = 9; let NumMicroOps = 4; } -def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rm$")>; -def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrm)>; +def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rmr$")>; +def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrmr)>; def ADLPWriteResGroup245 : SchedWriteRes<[ADLPPort00_01_05]> { let ReleaseAtCycles = [3]; let Latency = 3; let NumMicroOps = 3; } -def : InstRW<[ADLPWriteResGroup245], (instregex "^VBLENDVP(D|S)rr$")>; -def : InstRW<[ADLPWriteResGroup245], (instrs VPBLENDVBrr)>; +def : InstRW<[ADLPWriteResGroup245], (instregex "^VBLENDVP(D|S)rrr$")>; +def : InstRW<[ADLPWriteResGroup245], (instrs VPBLENDVBrrr)>; def ADLPWriteResGroup246 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> { let ReleaseAtCycles = [6, 7, 18]; diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td index 3c698d2c9f7a0..88bb9ad8f1d74 100644 --- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td +++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td @@ -2673,25 +2673,25 @@ def SPRWriteResGroup259 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { let Latency = 10; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)Yrm$")>; -def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBYrm)>; +def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)Yrmr$")>; +def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBYrmr)>; def SPRWriteResGroup260 : SchedWriteRes<[SPRPort00_01_05]> { let ReleaseAtCycles = [3]; let Latency = 3; let NumMicroOps = 3; } -def : InstRW<[SPRWriteResGroup260], (instregex "^VBLENDVP(S|DY)rr$", - "^VBLENDVP(D|SY)rr$", - "^VPBLENDVB(Y?)rr$")>; +def : InstRW<[SPRWriteResGroup260], (instregex "^VBLENDVP(S|DY)rrr$", + "^VBLENDVP(D|SY)rrr$", + "^VPBLENDVB(Y?)rrr$")>; def SPRWriteResGroup261 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { let ReleaseAtCycles = [3, 1]; let Latency = 9; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rm$")>; -def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrm)>; +def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rmr$")>; +def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrmr)>; def SPRWriteResGroup262 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { let Latency = 9; diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index e0fccd42e47f7..eea4f87cae9ce 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -2363,10 +2363,10 @@ static const X86FoldTableEntry Table2[] = { {X86::VBLENDPDrri, X86::VBLENDPDrmi, 0}, {X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0}, {X86::VBLENDPSrri, X86::VBLENDPSrmi, 0}, - {X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0}, - {X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0}, - {X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0}, - {X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0}, + {X86::VBLENDVPDYrrr, X86::VBLENDVPDYrmr, 0}, + {X86::VBLENDVPDrrr, X86::VBLENDVPDrmr, 0}, + {X86::VBLENDVPSYrrr, X86::VBLENDVPSYrmr, 0}, + {X86::VBLENDVPSrrr, X86::VBLENDVPSrmr, 0}, {X86::VBROADCASTF32X2Z256rrkz, X86::VBROADCASTF32X2Z256rmkz, TB_NO_REVERSE}, {X86::VBROADCASTF32X2Zrrkz, X86::VBROADCASTF32X2Zrmkz, TB_NO_REVERSE}, {X86::VBROADCASTI32X2Z128rrkz, X86::VBROADCASTI32X2Z128rmkz, TB_NO_REVERSE}, @@ -3042,8 +3042,8 @@ static const X86FoldTableEntry Table2[] = { {X86::VPBLENDMWZ128rr, X86::VPBLENDMWZ128rm, 0}, {X86::VPBLENDMWZ256rr, X86::VPBLENDMWZ256rm, 0}, {X86::VPBLENDMWZrr, X86::VPBLENDMWZrm, 0}, - {X86::VPBLENDVBYrr, X86::VPBLENDVBYrm, 0}, - {X86::VPBLENDVBrr, X86::VPBLENDVBrm, 0}, + {X86::VPBLENDVBYrrr, X86::VPBLENDVBYrmr, 0}, + {X86::VPBLENDVBrrr, X86::VPBLENDVBrmr, 0}, {X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0}, {X86::VPBLENDWrri, X86::VPBLENDWrmi, 0}, {X86::VPBROADCASTBZ128rrkz, X86::VPBROADCASTBZ128rmkz, TB_NO_REVERSE},