diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 27841b702dd2b..0cfe8d7457635 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -606,10 +606,12 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::MUL, MVT::v4i32, 1 }, // pmulld (Skylake from agner.org) { ISD::MUL, MVT::v8i64, 8 }, // 3*pmuludq/3*shift/2*add + { ISD::FNEG, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/ { ISD::FADD, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/ { ISD::FSUB, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/ { ISD::FMUL, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/ + { ISD::FNEG, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/ { ISD::FADD, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/ { ISD::FSUB, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/ { ISD::FMUL, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/ @@ -777,6 +779,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::MUL, MVT::v8i32, 2 }, // pmulld (Haswell from agner.org) { ISD::MUL, MVT::v4i64, 8 }, // 3*pmuludq/3*shift/2*add + { ISD::FNEG, MVT::v4f64, 1 }, // Haswell from http://www.agner.org/ + { ISD::FNEG, MVT::v8f32, 1 }, // Haswell from http://www.agner.org/ { ISD::FADD, MVT::v4f64, 1 }, // Haswell from http://www.agner.org/ { ISD::FADD, MVT::v8f32, 1 }, // Haswell from http://www.agner.org/ { ISD::FSUB, MVT::v4f64, 1 }, // Haswell from http://www.agner.org/ @@ -821,6 +825,9 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence. + { ISD::FNEG, MVT::v4f64, 2 }, // BTVER2 from http://www.agner.org/ + { ISD::FNEG, MVT::v8f32, 2 }, // BTVER2 from http://www.agner.org/ + { ISD::FDIV, MVT::f32, 14 }, // SNB from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, 14 }, // SNB from http://www.agner.org/ { ISD::FDIV, MVT::v8f32, 28 }, // SNB from http://www.agner.org/ @@ -919,6 +926,11 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/ { ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/ + { ISD::FNEG, MVT::f32, 1 }, // Pentium IV from http://www.agner.org/ + { ISD::FNEG, MVT::f64, 1 }, // Pentium IV from http://www.agner.org/ + { ISD::FNEG, MVT::v4f32, 1 }, // Pentium IV from http://www.agner.org/ + { ISD::FNEG, MVT::v2f64, 1 }, // Pentium IV from http://www.agner.org/ + { ISD::FADD, MVT::f32, 2 }, // Pentium IV from http://www.agner.org/ { ISD::FADD, MVT::f64, 2 }, // Pentium IV from http://www.agner.org/ @@ -934,6 +946,9 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/ + { ISD::FNEG, MVT::f32, 2 }, // Pentium III from http://www.agner.org/ + { ISD::FNEG, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/ + { ISD::FADD, MVT::f32, 1 }, // Pentium III from http://www.agner.org/ { ISD::FADD, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/ diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll index d5d47a99e9199..0d2d94c813dbc 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll @@ -322,10 +322,10 @@ define i32 @fneg_idiom(i32 %arg) { define i32 @fneg(i32 %arg) { ; SSE1-LABEL: 'fneg' -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fneg <8 x float> undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = fneg <16 x float> undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fneg float undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <4 x float> undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fneg <16 x float> undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fneg double undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fneg <4 x double> undef @@ -333,69 +333,80 @@ define i32 @fneg(i32 %arg) { ; SSE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE2-LABEL: 'fneg' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fneg <8 x float> undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = fneg <16 x float> undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fneg double undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fneg <4 x double> undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fneg <8 x double> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fneg' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fneg <8 x float> undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = fneg <16 x float> undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fneg double undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fneg <4 x double> undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fneg <8 x double> undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX-LABEL: 'fneg' -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> undef -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fneg <16 x float> undef -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fneg double undef -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <4 x double> undef -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fneg <8 x double> undef -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX1-LABEL: 'fneg' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'fneg' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fneg <8 x float> undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fneg <16 x float> undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fneg <4 x double> undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fneg <8 x double> undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fneg' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fneg double undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <4 x double> undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fneg <8 x float> undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fneg <16 x float> undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fneg <4 x double> undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fneg <8 x double> undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fneg' -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fneg <8 x float> undef -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = fneg <16 x float> undef -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fneg double undef -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fneg <4 x double> undef -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fneg <8 x double> undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'fneg' -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fneg <8 x float> undef -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = fneg <16 x float> undef -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fneg double undef -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fneg <4 x double> undef -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fneg <8 x double> undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F32 = fneg float undef diff --git a/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll index 5aedf451ed20a..162fb6c4f87a4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll @@ -5,9 +5,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" -; CHECK: Found an estimated cost of 4 for VF 1 For instruction: %neg = fneg float %{{.*}} -; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %neg = fneg float %{{.*}} -; CHECK: Found an estimated cost of 4 for VF 4 For instruction: %neg = fneg float %{{.*}} +; CHECK: Found an estimated cost of 1 for VF 1 For instruction: %neg = fneg float %{{.*}} +; CHECK: Found an estimated cost of 1 for VF 2 For instruction: %neg = fneg float %{{.*}} +; CHECK: Found an estimated cost of 1 for VF 4 For instruction: %neg = fneg float %{{.*}} define void @fneg_cost(float* %a, i64 %n) { entry: br label %for.body