From b9de62c2b69e3b6342045ae82e1532d4f70c6d34 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 25 Apr 2020 23:00:57 -0700 Subject: [PATCH] [X86] Fix the cost of v16i1->v16i16 sext/zext on avx targets. Previously we were hitting the scalarization case in the default implementation. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 4 ++++ llvm/test/Analysis/CostModel/X86/extend.ll | 24 +++++++++---------- .../CostModel/X86/min-legal-vector-width.ll | 8 +++---- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 4d99e3c16d142..fa2c7c5019a57 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1623,6 +1623,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, @@ -1656,6 +1658,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 4 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 4 }, { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 }, diff --git a/llvm/test/Analysis/CostModel/X86/extend.ll b/llvm/test/Analysis/CostModel/X86/extend.ll index c51bcac449ab3..219030f015560 100644 --- a/llvm/test/Analysis/CostModel/X86/extend.ll +++ b/llvm/test/Analysis/CostModel/X86/extend.ll @@ -374,8 +374,8 @@ define i32 @zext_vXi1() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> @@ -399,8 +399,8 @@ define i32 @zext_vXi1() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> @@ -474,8 +474,8 @@ define i32 @zext_vXi1() { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> @@ -877,8 +877,8 @@ define i32 @sext_vXi1() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8> @@ -902,8 +902,8 @@ define i32 @sext_vXi1() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8> @@ -977,8 +977,8 @@ define i32 @sext_vXi1() { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8> diff --git a/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll b/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll index a6865cfc28bd1..28dd66d7980bf 100644 --- a/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll +++ b/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll @@ -228,8 +228,8 @@ define i32 @zext256_vXi1() "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8> @@ -362,8 +362,8 @@ define i32 @sext256_vXi1() "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>