Skip to content

Commit

Permalink
[X86] Add missing register qualifier to the VBLENDVPD/VBLENDVPS/VPBLE…
Browse files Browse the repository at this point in the history
…NDVB instruction names

Matches the SSE variants (which has a 0 qualifier to indicate the xmm0 explicit dependency)
  • Loading branch information
RKSimon committed Mar 11, 2024
1 parent a924da6 commit 0858c90
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 42 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2230,7 +2230,7 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
unsigned CmpOpcode =
(RetVT == MVT::f32) ? X86::VCMPSSrri : X86::VCMPSDrri;
unsigned BlendOpcode =
(RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
(RetVT == MVT::f32) ? X86::VBLENDVPSrrr : X86::VBLENDVPDrrr;

Register CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpRHSReg,
CC);
Expand Down
48 changes: 24 additions & 24 deletions llvm/lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -6266,27 +6266,27 @@ multiclass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType VT,
PatFrag mem_frag, SDNode OpNode,
X86FoldableSchedWrite sched> {
def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))],
SSEPackedInt>, TA, PD, VEX, VVVV,
Sched<[sched]>;
def rrr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))],
SSEPackedInt>, TA, PD, VEX, VVVV,
Sched<[sched]>;

def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src3, (mem_frag addr:$src2),
RC:$src1))], SSEPackedInt>, TA, PD, VEX, VVVV,
Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC::$src3
sched.ReadAfterFold]>;
def rmr : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src3, (mem_frag addr:$src2),
RC:$src1))], SSEPackedInt>, TA, PD, VEX, VVVV,
Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC::$src3
sched.ReadAfterFold]>;
}

let Predicates = [HasAVX] in {
Expand Down Expand Up @@ -6320,16 +6320,16 @@ defm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem,
let Predicates = [HasAVX] in {
def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1),
(v4i32 VR128:$src2))),
(VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
(VBLENDVPSrrr VR128:$src2, VR128:$src1, VR128:$mask)>;
def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1),
(v2i64 VR128:$src2))),
(VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
(VBLENDVPDrrr VR128:$src2, VR128:$src1, VR128:$mask)>;
def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1),
(v8i32 VR256:$src2))),
(VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
(VBLENDVPSYrrr VR256:$src2, VR256:$src1, VR256:$mask)>;
def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1),
(v4i64 VR256:$src2))),
(VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
(VBLENDVPDYrrr VR256:$src2, VR256:$src1, VR256:$mask)>;
}

// Prefer a movss or movsd over a blendps when optimizing for size. these were
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/X86/X86SchedAlderlakeP.td
Original file line number Diff line number Diff line change
Expand Up @@ -2158,16 +2158,16 @@ def ADLPWriteResGroup244 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_11]> {
let Latency = 9;
let NumMicroOps = 4;
}
def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rm$")>;
def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrm)>;
def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rmr$")>;
def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrmr)>;

def ADLPWriteResGroup245 : SchedWriteRes<[ADLPPort00_01_05]> {
let ReleaseAtCycles = [3];
let Latency = 3;
let NumMicroOps = 3;
}
def : InstRW<[ADLPWriteResGroup245], (instregex "^VBLENDVP(D|S)rr$")>;
def : InstRW<[ADLPWriteResGroup245], (instrs VPBLENDVBrr)>;
def : InstRW<[ADLPWriteResGroup245], (instregex "^VBLENDVP(D|S)rrr$")>;
def : InstRW<[ADLPWriteResGroup245], (instrs VPBLENDVBrrr)>;

def ADLPWriteResGroup246 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> {
let ReleaseAtCycles = [6, 7, 18];
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/X86/X86SchedSapphireRapids.td
Original file line number Diff line number Diff line change
Expand Up @@ -2673,25 +2673,25 @@ def SPRWriteResGroup259 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
let Latency = 10;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)Yrm$")>;
def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBYrm)>;
def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)Yrmr$")>;
def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBYrmr)>;

def SPRWriteResGroup260 : SchedWriteRes<[SPRPort00_01_05]> {
let ReleaseAtCycles = [3];
let Latency = 3;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup260], (instregex "^VBLENDVP(S|DY)rr$",
"^VBLENDVP(D|SY)rr$",
"^VPBLENDVB(Y?)rr$")>;
def : InstRW<[SPRWriteResGroup260], (instregex "^VBLENDVP(S|DY)rrr$",
"^VBLENDVP(D|SY)rrr$",
"^VPBLENDVB(Y?)rrr$")>;

def SPRWriteResGroup261 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
let ReleaseAtCycles = [3, 1];
let Latency = 9;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rm$")>;
def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrm)>;
def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rmr$")>;
def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrmr)>;

def SPRWriteResGroup262 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
let Latency = 9;
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/TableGen/x86-fold-tables.inc
Original file line number Diff line number Diff line change
Expand Up @@ -2363,10 +2363,10 @@ static const X86FoldTableEntry Table2[] = {
{X86::VBLENDPDrri, X86::VBLENDPDrmi, 0},
{X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0},
{X86::VBLENDPSrri, X86::VBLENDPSrmi, 0},
{X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0},
{X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0},
{X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0},
{X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0},
{X86::VBLENDVPDYrrr, X86::VBLENDVPDYrmr, 0},
{X86::VBLENDVPDrrr, X86::VBLENDVPDrmr, 0},
{X86::VBLENDVPSYrrr, X86::VBLENDVPSYrmr, 0},
{X86::VBLENDVPSrrr, X86::VBLENDVPSrmr, 0},
{X86::VBROADCASTF32X2Z256rrkz, X86::VBROADCASTF32X2Z256rmkz, TB_NO_REVERSE},
{X86::VBROADCASTF32X2Zrrkz, X86::VBROADCASTF32X2Zrmkz, TB_NO_REVERSE},
{X86::VBROADCASTI32X2Z128rrkz, X86::VBROADCASTI32X2Z128rmkz, TB_NO_REVERSE},
Expand Down Expand Up @@ -3042,8 +3042,8 @@ static const X86FoldTableEntry Table2[] = {
{X86::VPBLENDMWZ128rr, X86::VPBLENDMWZ128rm, 0},
{X86::VPBLENDMWZ256rr, X86::VPBLENDMWZ256rm, 0},
{X86::VPBLENDMWZrr, X86::VPBLENDMWZrm, 0},
{X86::VPBLENDVBYrr, X86::VPBLENDVBYrm, 0},
{X86::VPBLENDVBrr, X86::VPBLENDVBrm, 0},
{X86::VPBLENDVBYrrr, X86::VPBLENDVBYrmr, 0},
{X86::VPBLENDVBrrr, X86::VPBLENDVBrmr, 0},
{X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0},
{X86::VPBLENDWrri, X86::VPBLENDWrmi, 0},
{X86::VPBROADCASTBZ128rrkz, X86::VPBROADCASTBZ128rmkz, TB_NO_REVERSE},
Expand Down

0 comments on commit 0858c90

Please sign in to comment.