diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index fab74a75a96b7..ad77fb4ef09d2 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -40,7 +40,7 @@ let isCodeGenOnly = 1 in { } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class -multiclass sse12_fp_scalar_int opc, string OpcodeStr, +multiclass sse12_fp_scalar_int opc, SDPatternOperator OpNode, RegisterClass RC, ValueType VT, string asm, Operand memopr, PatFrags mem_frags, Domain d, @@ -187,8 +187,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // don't use movss/movsd for copies. //===----------------------------------------------------------------------===// -multiclass sse12_move_rr { let isCommutable = 1 in def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), @@ -210,7 +209,7 @@ multiclass sse12_move { // AVX let Predicates = [UseAVX, OptForSize] in - defm V#NAME : sse12_move_rr, VEX_4V, VEX_LIG, VEX_WIG; @@ -222,7 +221,7 @@ multiclass sse12_move; } @@ -2266,7 +2265,7 @@ defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, /// There are no patterns here because isel prefers integer versions for SSE2 /// and later. There are SSE1 v4f32 patterns later. multiclass sse12_fp_packed_logical opc, string OpcodeStr, - SDNode OpNode, X86SchedWriteWidths sched> { + X86SchedWriteWidths sched> { let Predicates = [HasAVX, NoVLX] in { defm V#NAME#PSY : sse12_fp_packed_logical_rm opc, string OpcodeStr, } } -defm AND : sse12_fp_packed_logical<0x54, "and", and, SchedWriteFLogic>; -defm OR : sse12_fp_packed_logical<0x56, "or", or, SchedWriteFLogic>; -defm XOR : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>; +defm AND : sse12_fp_packed_logical<0x54, "and", SchedWriteFLogic>; +defm OR : sse12_fp_packed_logical<0x56, "or", SchedWriteFLogic>; +defm XOR : sse12_fp_packed_logical<0x57, "xor", SchedWriteFLogic>; let isCommutable = 0 in - defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>; + defm ANDN : sse12_fp_packed_logical<0x55, "andn", SchedWriteFLogic>; let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)), @@ -2643,18 +2642,18 @@ multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteSizes sched> { let Uses = [MXCSR], mayRaiseFPException = 1 in { - defm V#NAME#SS : sse12_fp_scalar_int, XS, VEX_4V, VEX_LIG, VEX_WIG; - defm V#NAME#SD : sse12_fp_scalar_int, XD, VEX_4V, VEX_LIG, VEX_WIG; let Constraints = "$src1 = $dst" in { - defm SS : sse12_fp_scalar_int, XS; - defm SD : sse12_fp_scalar_int, XD; } @@ -2790,8 +2789,8 @@ defm : scalar_math_patterns opc, string OpcodeStr, RegisterClass RC, - ValueType ScalarVT, X86MemOperand x86memop, - Operand intmemop, SDPatternOperator OpNode, Domain d, + X86MemOperand x86memop, Operand intmemop, + SDPatternOperator OpNode, Domain d, X86FoldableSchedWrite sched, Predicate target> { let isCodeGenOnly = 1, hasSideEffects = 0 in { def r : I opc, string OpcodeStr, RegisterClass RC, } -multiclass sse_fp_unop_s_intr { +multiclass sse_fp_unop_s_intr { let Predicates = [target] in { // These are unary operations, but they are modeled as having 2 source operands // because the high elements of the destination are unchanged in SSE. @@ -2841,7 +2839,7 @@ multiclass sse_fp_unop_s_intr { let Predicates = [target] in { def : Pat<(Intr VR128:$src), @@ -2972,12 +2970,11 @@ let Predicates = [HasAVX, NoVLX] in { Sched<[sched.XMM.Folded]>; } -multiclass sse1_fp_unop_s_intr opc, string OpcodeStr, SDNode OpNode, - X86SchedWriteWidths sched, Predicate AVXTarget> { - defm SS : sse_fp_unop_s_intr { + defm SS : sse_fp_unop_s_intr("int_x86_sse_"#OpcodeStr#_ss), - UseSSE1, "SS">, XS; - defm V#NAME#SS : avx_fp_unop_s_intr, XS; + defm V#NAME#SS : avx_fp_unop_s_intr("int_x86_sse_"#OpcodeStr#_ss), AVXTarget>, XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; @@ -2985,7 +2982,7 @@ multiclass sse1_fp_unop_s_intr opc, string OpcodeStr, SDNode OpNode, multiclass sse1_fp_unop_s opc, string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteWidths sched, Predicate AVXTarget> { - defm SS : sse_fp_unop_s, XS; defm V#NAME#SS : avx_fp_unop_s, @@ -2994,7 +2991,7 @@ multiclass sse1_fp_unop_s opc, string OpcodeStr, SDPatternOperator OpNod multiclass sse2_fp_unop_s opc, string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteWidths sched, Predicate AVXTarget> { - defm SD : sse_fp_unop_s, XD; defm V#NAME#SD : avx_fp_unop_s, @@ -3010,10 +3007,10 @@ defm SQRT : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>, // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>, - sse1_fp_unop_s_intr<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>, + sse1_fp_unop_s_intr<"rsqrt", HasAVX>, sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>; defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>, - sse1_fp_unop_s_intr<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>, + sse1_fp_unop_s_intr<"rcp", HasAVX>, sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>; // There is no f64 version of the reciprocal approximation instructions. @@ -7889,10 +7886,8 @@ let Predicates = [HasAVX2, NoVLX] in { // VGATHER - GATHER Operations // FIXME: Improve scheduling of gather instructions. -multiclass avx2_gather opc, string OpcodeStr, ValueType VTx, - ValueType VTy, RegisterClass RC256, - X86MemOperand memop128, X86MemOperand memop256, - ValueType MTx = VTx, ValueType MTy = VTy> { +multiclass avx2_gather opc, string OpcodeStr, RegisterClass RC256, + X86MemOperand memop128, X86MemOperand memop256> { let mayLoad = 1, hasSideEffects = 0 in { def rm : AVX28I, VEX_W; - defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64, - VR256, vx128mem, vy256mem>, VEX_W; - defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32, - VR256, vx128mem, vy256mem>; - defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32, - VR128, vx64mem, vy128mem>; + defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", + VR256, vx128mem, vx256mem>, VEX_W; + defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", + VR256, vx128mem, vy256mem>, VEX_W; + defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", + VR256, vx128mem, vy256mem>; + defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", + VR128, vx64mem, vy128mem>; let ExeDomain = SSEPackedDouble in { - defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64, - VR256, vx128mem, vx256mem, v2i64, v4i64>, VEX_W; - defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64, - VR256, vx128mem, vy256mem, v2i64, v4i64>, VEX_W; + defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", + VR256, vx128mem, vx256mem>, VEX_W; + defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", + VR256, vx128mem, vy256mem>, VEX_W; } let ExeDomain = SSEPackedSingle in { - defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32, - VR256, vx128mem, vy256mem, v4i32, v8i32>; - defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32, - VR128, vx64mem, vy128mem, v4i32, v4i32>; + defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", + VR256, vx128mem, vy256mem>; + defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", + VR128, vx64mem, vy128mem>; } } }