diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 41fda603d5a9f..70d7cecce31bd 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7048,6 +7048,50 @@ def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128 addr:$src)>; } +//===----------------------------------------------------------------------===// +// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks +// + +let ExeDomain = SSEPackedSingle in { +let isCommutable = 1 in +def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, u8imm:$src3), + "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, + VEX_4V, VEX_L, Sched<[WriteFShuffle256]>; +def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2, u8imm:$src3), + "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, + VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; +} + +// Immediate transform to help with commuting. +def Perm2XCommuteImm : SDNodeXFormgetZExtValue() ^ 0x22, SDLoc(N)); +}]>; + +multiclass vperm2x128_lowering { + def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))), + (!cast(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>; + def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))), + (!cast(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>; + // Pattern with load in other operand. + def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))), + (!cast(InstrStr#rm) VR256:$src1, addr:$src2, + (Perm2XCommuteImm timm:$imm))>; +} + +let Predicates = [HasAVX] in { + defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>; + defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>; +} + +let Predicates = [HasAVX1Only] in { + defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>; + defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>; + defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>; + defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>; +} + //===----------------------------------------------------------------------===// // VINSERTF128 - Insert packed floating-point values // @@ -7296,50 +7340,6 @@ let ExeDomain = SSEPackedDouble in { SchedWriteFVarShuffle.YMM>, VEX_L; } -//===----------------------------------------------------------------------===// -// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks -// - -let ExeDomain = SSEPackedSingle in { -let isCommutable = 1 in -def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, u8imm:$src3), - "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, - VEX_4V, VEX_L, Sched<[WriteFShuffle256]>; -def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, f256mem:$src2, u8imm:$src3), - "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, - VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; -} - -// Immediate transform to help with commuting. -def Perm2XCommuteImm : SDNodeXFormgetZExtValue() ^ 0x22, SDLoc(N)); -}]>; - -multiclass vperm2x128_lowering { - def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))), - (!cast(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>; - def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))), - (!cast(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>; - // Pattern with load in other operand. - def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))), - (!cast(InstrStr#rm) VR256:$src1, addr:$src2, - (Perm2XCommuteImm timm:$imm))>; -} - -let Predicates = [HasAVX] in { - defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>; - defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>; -} - -let Predicates = [HasAVX1Only] in { - defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>; - defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>; - defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>; - defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>; -} - //===----------------------------------------------------------------------===// // VZERO - Zero YMM registers // Note: These instruction do not affect the YMM16-YMM31.