diff --git a/llvm/test/Analysis/CostModel/X86/arith-costkinds.ll b/llvm/test/Analysis/CostModel/X86/arith-costkinds.ll deleted file mode 100644 index dfb0f8971fef6..0000000000000 --- a/llvm/test/Analysis/CostModel/X86/arith-costkinds.ll +++ /dev/null @@ -1,634 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse2 -cost-kind=latency < %s | FileCheck %s --check-prefixes=LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse2 -cost-kind=code-size < %s | FileCheck %s --check-prefixes=SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse2 -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=SIZE_LATE -; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx -cost-kind=latency < %s | FileCheck %s --check-prefixes=LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx -cost-kind=code-size < %s | FileCheck %s --check-prefixes=SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=SIZE_LATE -; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx2 -cost-kind=latency < %s | FileCheck %s --check-prefixes=LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx2 -cost-kind=code-size < %s | FileCheck %s --check-prefixes=SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx2 -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=SIZE_LATE -; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512vl -cost-kind=latency < %s | FileCheck %s --check-prefixes=LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512vl -cost-kind=code-size < %s | FileCheck %s --check-prefixes=SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512vl -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=SIZE_LATE -; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx512bw,+avx512vl -cost-kind=latency < %s | FileCheck %s --check-prefixes=LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx512bw,+avx512vl -cost-kind=code-size < %s | FileCheck %s --check-prefixes=SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx512bw,+avx512vl -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=SIZE_LATE -; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=slm -cost-kind=latency < %s | FileCheck %s --check-prefixes=LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=slm -cost-kind=code-size < %s | FileCheck %s --check-prefixes=SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=slm -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=SIZE_LATE -; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=goldmont -cost-kind=latency < %s | FileCheck %s --check-prefixes=LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=goldmont -cost-kind=code-size < %s | FileCheck %s --check-prefixes=SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=goldmont -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=SIZE_LATE - -define i32 @add(i32 %arg) { -; LATE-LABEL: 'add' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = add <4 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = add <8 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = add <8 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = add <16 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = add <16 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = add <32 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = add <32 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = add <64 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE-LABEL: 'add' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = add <4 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = add <8 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = add <8 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = add <16 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = add <16 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = add <32 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = add <32 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = add <64 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE_LATE-LABEL: 'add' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = add <4 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = add <8 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = add <8 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = add <16 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = add <16 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = add <32 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = add <32 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = add <64 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %I64 = add i64 undef, undef - %V2I64 = add <2 x i64> undef, undef - %V4I64 = add <4 x i64> undef, undef - %V8I64 = add <8 x i64> undef, undef - - %I32 = add i32 undef, undef - %V4I32 = add <4 x i32> undef, undef - %V8I32 = add <8 x i32> undef, undef - %V16I32 = add <16 x i32> undef, undef - - %I16 = add i16 undef, undef - %V8I16 = add <8 x i16> undef, undef - %V16I16 = add <16 x i16> undef, undef - %V32I16 = add <32 x i16> undef, undef - - %I8 = add i8 undef, undef - %V16I8 = add <16 x i8> undef, undef - %V32I8 = add <32 x i8> undef, undef - %V64I8 = add <64 x i8> undef, undef - - ret i32 undef -} - -define i32 @sub(i32 %arg) { -; LATE-LABEL: 'sub' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = sub <4 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = sub <8 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = sub <8 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = sub <16 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = sub <16 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = sub <32 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = sub <32 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = sub <64 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE-LABEL: 'sub' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = sub <4 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = sub <8 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = sub <8 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = sub <16 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = sub <16 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = sub <32 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = sub <32 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = sub <64 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE_LATE-LABEL: 'sub' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = sub <4 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = sub <8 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = sub <8 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = sub <16 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = sub <16 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = sub <32 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = sub <32 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = sub <64 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %I64 = sub i64 undef, undef - %V2I64 = sub <2 x i64> undef, undef - %V4I64 = sub <4 x i64> undef, undef - %V8I64 = sub <8 x i64> undef, undef - - %I32 = sub i32 undef, undef - %V4I32 = sub <4 x i32> undef, undef - %V8I32 = sub <8 x i32> undef, undef - %V16I32 = sub <16 x i32> undef, undef - - %I16 = sub i16 undef, undef - %V8I16 = sub <8 x i16> undef, undef - %V16I16 = sub <16 x i16> undef, undef - %V32I16 = sub <32 x i16> undef, undef - - %I8 = sub i8 undef, undef - %V16I8 = sub <16 x i8> undef, undef - %V32I8 = sub <32 x i8> undef, undef - %V64I8 = sub <64 x i8> undef, undef - - ret i32 undef -} - -define i32 @or(i32 %arg) { -; LATE-LABEL: 'or' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = or <8 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = or <16 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = or <32 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = or <64 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = or <64 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE-LABEL: 'or' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = or <8 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = or <16 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = or <32 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = or <64 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = or <64 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE_LATE-LABEL: 'or' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = or <8 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = or <16 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = or <32 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = or <64 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = or <64 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %I64 = or i64 undef, undef - %V2I64 = or <2 x i64> undef, undef - %V4I64 = or <4 x i64> undef, undef - %V8I64 = or <8 x i64> undef, undef - - %I32 = or i32 undef, undef - %V4I32 = or <4 x i32> undef, undef - %V8I32 = or <8 x i32> undef, undef - %V16I32 = or <16 x i32> undef, undef - - %I16 = or i16 undef, undef - %V8I16 = or <8 x i16> undef, undef - %V16I16 = or <16 x i16> undef, undef - %V32I16 = or <32 x i16> undef, undef - - %I8 = or i8 undef, undef - %V16I8 = or <16 x i8> undef, undef - %V32I8 = or <32 x i8> undef, undef - %V64I8 = or <64 x i8> undef, undef - - %I1 = or i1 undef, undef - %V2I1 = or <2 x i1> undef, undef - %V4I1 = or <4 x i1> undef, undef - %V8I1 = or <8 x i1> undef, undef - %V16I1 = or <16 x i1> undef, undef - %V32I1 = or <32 x i1> undef, undef - %V64I1 = or <64 x i1> undef, undef - - ret i32 undef -} - -define i32 @xor(i32 %arg) { -; LATE-LABEL: 'xor' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = xor <8 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = xor <16 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = xor <32 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = xor <64 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = xor <64 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE-LABEL: 'xor' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = xor <8 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = xor <16 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = xor <32 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = xor <64 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = xor <64 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE_LATE-LABEL: 'xor' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = xor <8 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = xor <16 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = xor <32 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = xor <64 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = xor <64 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %I64 = xor i64 undef, undef - %V2I64 = xor <2 x i64> undef, undef - %V4I64 = xor <4 x i64> undef, undef - %V8I64 = xor <8 x i64> undef, undef - - %I32 = xor i32 undef, undef - %V4I32 = xor <4 x i32> undef, undef - %V8I32 = xor <8 x i32> undef, undef - %V16I32 = xor <16 x i32> undef, undef - - %I16 = xor i16 undef, undef - %V8I16 = xor <8 x i16> undef, undef - %V16I16 = xor <16 x i16> undef, undef - %V32I16 = xor <32 x i16> undef, undef - - %I8 = xor i8 undef, undef - %V16I8 = xor <16 x i8> undef, undef - %V32I8 = xor <32 x i8> undef, undef - %V64I8 = xor <64 x i8> undef, undef - - %I1 = xor i1 undef, undef - %V2I1 = xor <2 x i1> undef, undef - %V4I1 = xor <4 x i1> undef, undef - %V8I1 = xor <8 x i1> undef, undef - %V16I1 = xor <16 x i1> undef, undef - %V32I1 = xor <32 x i1> undef, undef - %V64I1 = xor <64 x i1> undef, undef - - ret i32 undef -} - -define i32 @and(i32 %arg) { -; LATE-LABEL: 'and' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = and <8 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = and <16 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = and <32 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = and <64 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = and <64 x i1> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE-LABEL: 'and' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = and <8 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = and <16 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = and <32 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = and <64 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = and <64 x i1> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE_LATE-LABEL: 'and' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = and <8 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = and <16 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = and <32 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = and <64 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = and <64 x i1> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %I64 = and i64 undef, undef - %V2I64 = and <2 x i64> undef, undef - %V4I64 = and <4 x i64> undef, undef - %V8I64 = and <8 x i64> undef, undef - - %I32 = and i32 undef, undef - %V4I32 = and <4 x i32> undef, undef - %V8I32 = and <8 x i32> undef, undef - %V16I32 = and <16 x i32> undef, undef - - %I16 = and i16 undef, undef - %V8I16 = and <8 x i16> undef, undef - %V16I16 = and <16 x i16> undef, undef - %V32I16 = and <32 x i16> undef, undef - - %I8 = and i8 undef, undef - %V16I8 = and <16 x i8> undef, undef - %V32I8 = and <32 x i8> undef, undef - %V64I8 = and <64 x i8> undef, undef - - %I1 = and i1 undef, undef - %V2I1 = and <2 x i1> undef, undef - %V4I1 = and <4 x i1> undef, undef - %V8I1 = and <8 x i1> undef, undef - %V16I1 = and <16 x i1> undef, undef - %V32I1 = and <32 x i1> undef, undef - %V64I1 = and <64 x i1> undef, undef - - ret i32 undef -} - -define i32 @mul(i32 %arg) { -; LATE-LABEL: 'mul' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = mul <2 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = mul <4 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = mul <8 x i64> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = mul <4 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = mul <8 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = mul <16 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = mul <32 x i16> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = mul <16 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = mul <32 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = mul <64 x i8> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE-LABEL: 'mul' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = mul <2 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = mul <4 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = mul <8 x i64> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = mul <4 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = mul <8 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = mul <16 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = mul <32 x i16> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = mul <16 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = mul <32 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = mul <64 x i8> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE_LATE-LABEL: 'mul' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = mul <2 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = mul <4 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = mul <8 x i64> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = mul <4 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = mul <8 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = mul <16 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = mul <32 x i16> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = mul <16 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = mul <32 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = mul <64 x i8> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %I64 = mul i64 undef, undef - %V2I64 = mul <2 x i64> undef, undef - %V4I64 = mul <4 x i64> undef, undef - %V8I64 = mul <8 x i64> undef, undef - - %I32 = mul i32 undef, undef - %V4I32 = mul <4 x i32> undef, undef - %V8I32 = mul <8 x i32> undef, undef - %V16I32 = mul <16 x i32> undef, undef - - %I16 = mul i16 undef, undef - %V8I16 = mul <8 x i16> undef, undef - %V16I16 = mul <16 x i16> undef, undef - %V32I16 = mul <32 x i16> undef, undef - - %I8 = mul i8 undef, undef - %V2I8 = mul <2 x i8> undef, undef - %V4I8 = mul <4 x i8> undef, undef - %V8I8 = mul <8 x i8> undef, undef - %V16I8 = mul <16 x i8> undef, undef - %V32I8 = mul <32 x i8> undef, undef - %V64I8 = mul <64 x i8> undef, undef - - ret i32 undef -} - -; A <2 x i32> gets expanded to a <2 x i64> vector. -; A <2 x i64> vector multiply is implemented using -; 3 PMULUDQ and 2 PADDS and 4 shifts. -define void @mul_2i32() { -; LATE-LABEL: 'mul_2i32' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A0 = mul <2 x i32> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'mul_2i32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A0 = mul <2 x i32> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'mul_2i32' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A0 = mul <2 x i32> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - %A0 = mul <2 x i32> undef, undef - - ret void -} diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll new file mode 100644 index 0000000000000..5dccd4d593733 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll @@ -0,0 +1,602 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,SSE1 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 +; +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 + +define i32 @fadd(i32 %arg) { +; CHECK-LABEL: 'fadd' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fadd <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fadd <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fadd <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fadd <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fadd float undef, undef + %V4F32 = fadd <4 x float> undef, undef + %V8F32 = fadd <8 x float> undef, undef + %V16F32 = fadd <16 x float> undef, undef + + %F64 = fadd double undef, undef + %V2F64 = fadd <2 x double> undef, undef + %V4F64 = fadd <4 x double> undef, undef + %V8F64 = fadd <8 x double> undef, undef + + ret i32 undef +} + +define i32 @fsub(i32 %arg) { +; CHECK-LABEL: 'fsub' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fsub float undef, undef + %V4F32 = fsub <4 x float> undef, undef + %V8F32 = fsub <8 x float> undef, undef + %V16F32 = fsub <16 x float> undef, undef + + %F64 = fsub double undef, undef + %V2F64 = fsub <2 x double> undef, undef + %V4F64 = fsub <4 x double> undef, undef + %V8F64 = fsub <8 x double> undef, undef + + ret i32 undef +} + +define i32 @fneg_idiom(i32 %arg) { +; CHECK-LABEL: 'fneg_idiom' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fsub float -0.0, undef + %V4F32 = fsub <4 x float> , undef + %V8F32 = fsub <8 x float> , undef + %V16F32 = fsub <16 x float> , undef + + %F64 = fsub double -0.0, undef + %V2F64 = fsub <2 x double> , undef + %V4F64 = fsub <4 x double> , undef + %V8F64 = fsub <8 x double> , undef + + ret i32 undef +} + +define i32 @fneg(i32 %arg) { +; CHECK-LABEL: 'fneg' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fneg <8 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fneg <16 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fneg <4 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fneg <8 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fneg float undef + %V4F32 = fneg <4 x float> undef + %V8F32 = fneg <8 x float> undef + %V16F32 = fneg <16 x float> undef + + %F64 = fneg double undef + %V2F64 = fneg <2 x double> undef + %V4F64 = fneg <4 x double> undef + %V8F64 = fneg <8 x double> undef + + ret i32 undef +} + +define i32 @fmul(i32 %arg) { +; CHECK-LABEL: 'fmul' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fmul <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fmul <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fmul <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fmul <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fmul float undef, undef + %V4F32 = fmul <4 x float> undef, undef + %V8F32 = fmul <8 x float> undef, undef + %V16F32 = fmul <16 x float> undef, undef + + %F64 = fmul double undef, undef + %V2F64 = fmul <2 x double> undef, undef + %V4F64 = fmul <4 x double> undef, undef + %V8F64 = fmul <8 x double> undef, undef + + ret i32 undef +} + +define i32 @fdiv(i32 %arg) { +; CHECK-LABEL: 'fdiv' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fdiv float undef, undef + %V4F32 = fdiv <4 x float> undef, undef + %V8F32 = fdiv <8 x float> undef, undef + %V16F32 = fdiv <16 x float> undef, undef + + %F64 = fdiv double undef, undef + %V2F64 = fdiv <2 x double> undef, undef + %V4F64 = fdiv <4 x double> undef, undef + %V8F64 = fdiv <8 x double> undef, undef + + ret i32 undef +} + +define i32 @frem(i32 %arg) { +; CHECK-LABEL: 'frem' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = frem float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = frem <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = frem <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = frem <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = frem double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = frem <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = frem <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = frem float undef, undef + %V4F32 = frem <4 x float> undef, undef + %V8F32 = frem <8 x float> undef, undef + %V16F32 = frem <16 x float> undef, undef + + %F64 = frem double undef, undef + %V2F64 = frem <2 x double> undef, undef + %V4F64 = frem <4 x double> undef, undef + %V8F64 = frem <8 x double> undef, undef + + ret i32 undef +} + +define i32 @fsqrt(i32 %arg) { +; SSE1-LABEL: 'fsqrt' +; SSE1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fsqrt' +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fsqrt' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX1-LABEL: 'fsqrt' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX2-LABEL: 'fsqrt' +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fsqrt' +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fsqrt' +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fsqrt' +; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.sqrt.f32(float undef) + %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) + + %F64 = call double @llvm.sqrt.f64(double undef) + %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @fabs(i32 %arg) { +; SSE1-LABEL: 'fabs' +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fabs' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fabs' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX-LABEL: 'fabs' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fabs' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fabs' +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fabs' +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.fabs.f32(float undef) + %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) + + %F64 = call double @llvm.fabs.f64(double undef) + %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @fcopysign(i32 %arg) { +; SSE1-LABEL: 'fcopysign' +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fcopysign' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fcopysign' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX-LABEL: 'fcopysign' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fcopysign' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fcopysign' +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fcopysign' +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.copysign.f32(float undef, float undef) + %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) + %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) + %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) + + %F64 = call double @llvm.copysign.f64(double undef, double undef) + %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) + %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) + %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) + + ret i32 undef +} + +define i32 @fma(i32 %arg) { +; SSE1-LABEL: 'fma' +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fma' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fma' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX-LABEL: 'fma' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fma' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fma' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fma' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) + %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) + %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) + %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) + + %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) + %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) + %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) + %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) + + ret i32 undef +} + +declare float @llvm.sqrt.f32(float) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) +declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) + +declare double @llvm.sqrt.f64(double) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) +declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) + +declare float @llvm.fabs.f32(float) +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) +declare <8 x float> @llvm.fabs.v8f32(<8 x float>) +declare <16 x float> @llvm.fabs.v16f32(<16 x float>) + +declare double @llvm.fabs.f64(double) +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) +declare <4 x double> @llvm.fabs.v4f64(<4 x double>) +declare <8 x double> @llvm.fabs.v8f64(<8 x double>) + +declare float @llvm.copysign.f32(float, float) +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) +declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>) +declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>) + +declare double @llvm.copysign.f64(double, double) +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) +declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) +declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>) + +declare float @llvm.fma.f32(float, float, float) +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) +declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) +declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) + +declare double @llvm.fma.f64(double, double, double) +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) +declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) +declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-costkinds.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-costkinds.ll deleted file mode 100644 index ce29e233684da..0000000000000 --- a/llvm/test/Analysis/CostModel/X86/arith-fp-costkinds.ll +++ /dev/null @@ -1,904 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse2 -cost-kind=latency < %s | FileCheck %s --check-prefixes=CHECK,LATE,SSE-LATE,SSE2-LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse2 -cost-kind=code-size < %s | FileCheck %s --check-prefixes=CHECK,SIZE,SSE-SIZE,SSE2-SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse2 -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=CHECK,SIZE_LATE,SSE-SIZE_LATE,SSE2-SIZE_LATE -; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx -cost-kind=latency < %s | FileCheck %s --check-prefixes=CHECK,LATE,AVX-LATE,AVX1-LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx -cost-kind=code-size < %s | FileCheck %s --check-prefixes=CHECK,SIZE,AVX-SIZE,AVX1-SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=CHECK,SIZE_LATE,AVX-SIZE_LATE,AVX1-SIZE_LATE -; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx2 -cost-kind=latency < %s | FileCheck %s --check-prefixes=CHECK,LATE,AVX-LATE,AVX2-LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx2 -cost-kind=code-size < %s | FileCheck %s --check-prefixes=CHECK,SIZE,AVX-SIZE,AVX2-SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx2 -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=CHECK,SIZE_LATE,AVX-SIZE_LATE,AVX2-SIZE_LATE -; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512vl -cost-kind=latency < %s | FileCheck %s --check-prefixes=CHECK,LATE,AVX512-LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512vl -cost-kind=code-size < %s | FileCheck %s --check-prefixes=CHECK,SIZE,AVX512-SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512vl -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=CHECK,SIZE_LATE,AVX512-SIZE_LATE -; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx512bw,+avx512vl -cost-kind=latency < %s | FileCheck %s --check-prefixes=CHECK,LATE,AVX512-LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx512bw,+avx512vl -cost-kind=code-size < %s | FileCheck %s --check-prefixes=CHECK,SIZE,AVX512-SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx512bw,+avx512vl -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=CHECK,SIZE_LATE,AVX512-SIZE_LATE -; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=slm -cost-kind=latency < %s | FileCheck %s --check-prefixes=CHECK,LATE,SSE-LATE,SLM-LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=slm -cost-kind=code-size < %s | FileCheck %s --check-prefixes=CHECK,SIZE,SSE-SIZE,SLM-SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=slm -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=CHECK,SIZE_LATE,SSE-SIZE_LATE,SLM-SIZE_LATE -; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=goldmont -cost-kind=latency < %s | FileCheck %s --check-prefixes=CHECK,LATE,SSE-LATE,GLM-LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=goldmont -cost-kind=code-size < %s | FileCheck %s --check-prefixes=CHECK,SIZE,SSE-SIZE,GLM-SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=goldmont -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=CHECK,SIZE_LATE,SSE-SIZE_LATE,GLM-SIZE_LATE - -define i32 @fadd(i32 %arg) { -; LATE-LABEL: 'fadd' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fadd float undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fadd <4 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fadd <8 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fadd <16 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fadd double undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fadd <2 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fadd <4 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fadd <8 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE-LABEL: 'fadd' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fadd <8 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fadd <16 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fadd <4 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fadd <8 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE_LATE-LABEL: 'fadd' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fadd <8 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fadd <16 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fadd <4 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fadd <8 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %F32 = fadd float undef, undef - %V4F32 = fadd <4 x float> undef, undef - %V8F32 = fadd <8 x float> undef, undef - %V16F32 = fadd <16 x float> undef, undef - - %F64 = fadd double undef, undef - %V2F64 = fadd <2 x double> undef, undef - %V4F64 = fadd <4 x double> undef, undef - %V8F64 = fadd <8 x double> undef, undef - - ret i32 undef -} - -define i32 @fsub(i32 %arg) { -; LATE-LABEL: 'fsub' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fsub <8 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fsub <16 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fsub <4 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fsub <8 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE-LABEL: 'fsub' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE_LATE-LABEL: 'fsub' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %F32 = fsub float undef, undef - %V4F32 = fsub <4 x float> undef, undef - %V8F32 = fsub <8 x float> undef, undef - %V16F32 = fsub <16 x float> undef, undef - - %F64 = fsub double undef, undef - %V2F64 = fsub <2 x double> undef, undef - %V4F64 = fsub <4 x double> undef, undef - %V8F64 = fsub <8 x double> undef, undef - - ret i32 undef -} - -define i32 @fneg_idiom(i32 %arg) { -; LATE-LABEL: 'fneg_idiom' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> , undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fsub <8 x float> , undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fsub <16 x float> , undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> , undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fsub <4 x double> , undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fsub <8 x double> , undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE-LABEL: 'fneg_idiom' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE_LATE-LABEL: 'fneg_idiom' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> , undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> , undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %F32 = fsub float -0.0, undef - %V4F32 = fsub <4 x float> , undef - %V8F32 = fsub <8 x float> , undef - %V16F32 = fsub <16 x float> , undef - - %F64 = fsub double -0.0, undef - %V2F64 = fsub <2 x double> , undef - %V4F64 = fsub <4 x double> , undef - %V8F64 = fsub <8 x double> , undef - - ret i32 undef -} - -define i32 @fneg(i32 %arg) { -; LATE-LABEL: 'fneg' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fneg float undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fneg <4 x float> undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fneg <8 x float> undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fneg <16 x float> undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fneg double undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fneg <2 x double> undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fneg <4 x double> undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fneg <8 x double> undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE-LABEL: 'fneg' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fneg <8 x float> undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fneg <16 x float> undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fneg <4 x double> undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fneg <8 x double> undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE_LATE-LABEL: 'fneg' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fneg <8 x float> undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fneg <16 x float> undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fneg <4 x double> undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fneg <8 x double> undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %F32 = fneg float undef - %V4F32 = fneg <4 x float> undef - %V8F32 = fneg <8 x float> undef - %V16F32 = fneg <16 x float> undef - - %F64 = fneg double undef - %V2F64 = fneg <2 x double> undef - %V4F64 = fneg <4 x double> undef - %V8F64 = fneg <8 x double> undef - - ret i32 undef -} - -define i32 @fmul(i32 %arg) { -; LATE-LABEL: 'fmul' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fmul float undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fmul <4 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fmul <8 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fmul <16 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fmul double undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fmul <2 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fmul <4 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fmul <8 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE-LABEL: 'fmul' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fmul <8 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fmul <16 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fmul <4 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fmul <8 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE_LATE-LABEL: 'fmul' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fmul <8 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fmul <16 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fmul <4 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fmul <8 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %F32 = fmul float undef, undef - %V4F32 = fmul <4 x float> undef, undef - %V8F32 = fmul <8 x float> undef, undef - %V16F32 = fmul <16 x float> undef, undef - - %F64 = fmul double undef, undef - %V2F64 = fmul <2 x double> undef, undef - %V4F64 = fmul <4 x double> undef, undef - %V8F64 = fmul <8 x double> undef, undef - - ret i32 undef -} - -define i32 @fdiv(i32 %arg) { -; CHECK-LABEL: 'fdiv' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %F32 = fdiv float undef, undef - %V4F32 = fdiv <4 x float> undef, undef - %V8F32 = fdiv <8 x float> undef, undef - %V16F32 = fdiv <16 x float> undef, undef - - %F64 = fdiv double undef, undef - %V2F64 = fdiv <2 x double> undef, undef - %V4F64 = fdiv <4 x double> undef, undef - %V8F64 = fdiv <8 x double> undef, undef - - ret i32 undef -} - -define i32 @frem(i32 %arg) { -; CHECK-LABEL: 'frem' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = frem float undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = frem <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = frem <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = frem <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = frem double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = frem <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = frem <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %F32 = frem float undef, undef - %V4F32 = frem <4 x float> undef, undef - %V8F32 = frem <8 x float> undef, undef - %V16F32 = frem <16 x float> undef, undef - - %F64 = frem double undef, undef - %V2F64 = frem <2 x double> undef, undef - %V4F64 = frem <4 x double> undef, undef - %V8F64 = frem <8 x double> undef, undef - - ret i32 undef -} - -define i32 @fsqrt(i32 %arg) { -; SSE2-LATE-LABEL: 'fsqrt' -; SSE2-LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; SSE2-LATE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; SSE2-LATE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; SSE2-LATE-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; SSE2-LATE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; SSE2-LATE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; SSE2-LATE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; SSE2-LATE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; SSE2-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SSE2-SIZE-LABEL: 'fsqrt' -; SSE2-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; SSE2-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; SSE2-SIZE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; SSE2-SIZE-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; SSE2-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; SSE2-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; SSE2-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; SSE2-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; SSE2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SSE2-SIZE_LATE-LABEL: 'fsqrt' -; SSE2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; SSE2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; SSE2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; SSE2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; SSE2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; SSE2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; SSE2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; SSE2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; SSE2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX1-LATE-LABEL: 'fsqrt' -; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX1-SIZE-LABEL: 'fsqrt' -; AVX1-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; AVX1-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; AVX1-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; AVX1-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; AVX1-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; AVX1-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; AVX1-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; AVX1-SIZE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; AVX1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX1-SIZE_LATE-LABEL: 'fsqrt' -; AVX1-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; AVX1-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; AVX1-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; AVX1-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; AVX1-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; AVX1-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; AVX1-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; AVX1-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; AVX1-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX2-LATE-LABEL: 'fsqrt' -; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX2-SIZE-LABEL: 'fsqrt' -; AVX2-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; AVX2-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; AVX2-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; AVX2-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; AVX2-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; AVX2-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; AVX2-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; AVX2-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; AVX2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX2-SIZE_LATE-LABEL: 'fsqrt' -; AVX2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; AVX2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; AVX2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; AVX2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; AVX2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; AVX2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; AVX2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; AVX2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; AVX2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX512-LATE-LABEL: 'fsqrt' -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX512-SIZE-LABEL: 'fsqrt' -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX512-SIZE_LATE-LABEL: 'fsqrt' -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SLM-LATE-LABEL: 'fsqrt' -; SLM-LATE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; SLM-LATE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; SLM-LATE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; SLM-LATE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; SLM-LATE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; SLM-LATE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; SLM-LATE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; SLM-LATE-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; SLM-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SLM-SIZE-LABEL: 'fsqrt' -; SLM-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; SLM-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; SLM-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; SLM-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; SLM-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; SLM-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; SLM-SIZE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; SLM-SIZE-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; SLM-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SLM-SIZE_LATE-LABEL: 'fsqrt' -; SLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; SLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; SLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; SLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; SLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; SLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; SLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; SLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; SLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; GLM-LATE-LABEL: 'fsqrt' -; GLM-LATE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; GLM-LATE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; GLM-LATE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; GLM-LATE-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; GLM-LATE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; GLM-LATE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; GLM-LATE-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; GLM-LATE-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; GLM-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; GLM-SIZE-LABEL: 'fsqrt' -; GLM-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; GLM-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; GLM-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; GLM-SIZE-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; GLM-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; GLM-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; GLM-SIZE-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; GLM-SIZE-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; GLM-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; GLM-SIZE_LATE-LABEL: 'fsqrt' -; GLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; GLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; GLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; GLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; GLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; GLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; GLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; GLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; GLM-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %F32 = call float @llvm.sqrt.f32(float undef) - %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) - %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) - %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) - - %F64 = call double @llvm.sqrt.f64(double undef) - %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) - %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) - %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) - - ret i32 undef -} - -define i32 @fabs(i32 %arg) { -; SSE-LATE-LABEL: 'fabs' -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SSE-SIZE-LABEL: 'fabs' -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SSE-SIZE_LATE-LABEL: 'fabs' -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX-LATE-LABEL: 'fabs' -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX-SIZE-LABEL: 'fabs' -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX-SIZE_LATE-LABEL: 'fabs' -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX512-LATE-LABEL: 'fabs' -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX512-SIZE-LABEL: 'fabs' -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX512-SIZE_LATE-LABEL: 'fabs' -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %F32 = call float @llvm.fabs.f32(float undef) - %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) - %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) - %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) - - %F64 = call double @llvm.fabs.f64(double undef) - %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) - %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) - %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) - - ret i32 undef -} - -define i32 @fcopysign(i32 %arg) { -; SSE-LATE-LABEL: 'fcopysign' -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SSE-SIZE-LABEL: 'fcopysign' -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SSE-SIZE_LATE-LABEL: 'fcopysign' -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX-LATE-LABEL: 'fcopysign' -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX-SIZE-LABEL: 'fcopysign' -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX-SIZE_LATE-LABEL: 'fcopysign' -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX512-LATE-LABEL: 'fcopysign' -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX512-SIZE-LABEL: 'fcopysign' -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX512-SIZE_LATE-LABEL: 'fcopysign' -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %F32 = call float @llvm.copysign.f32(float undef, float undef) - %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) - %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) - %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) - - %F64 = call double @llvm.copysign.f64(double undef, double undef) - %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) - %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) - %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) - - ret i32 undef -} - -define i32 @fma(i32 %arg) { -; SSE-LATE-LABEL: 'fma' -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) -; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SSE-SIZE-LABEL: 'fma' -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) -; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SSE-SIZE_LATE-LABEL: 'fma' -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) -; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX-LATE-LABEL: 'fma' -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) -; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX-SIZE-LABEL: 'fma' -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) -; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX-SIZE_LATE-LABEL: 'fma' -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) -; AVX-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX512-LATE-LABEL: 'fma' -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) -; AVX512-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX512-SIZE-LABEL: 'fma' -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) -; AVX512-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; AVX512-SIZE_LATE-LABEL: 'fma' -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) -; AVX512-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; - %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) - %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) - %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) - %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) - - %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) - %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) - %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) - %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) - - ret i32 undef -} - -declare float @llvm.sqrt.f32(float) -declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) -declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) -declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) - -declare double @llvm.sqrt.f64(double) -declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) -declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) -declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) - -declare float @llvm.fabs.f32(float) -declare <4 x float> @llvm.fabs.v4f32(<4 x float>) -declare <8 x float> @llvm.fabs.v8f32(<8 x float>) -declare <16 x float> @llvm.fabs.v16f32(<16 x float>) - -declare double @llvm.fabs.f64(double) -declare <2 x double> @llvm.fabs.v2f64(<2 x double>) -declare <4 x double> @llvm.fabs.v4f64(<4 x double>) -declare <8 x double> @llvm.fabs.v8f64(<8 x double>) - -declare float @llvm.copysign.f32(float, float) -declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) -declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>) -declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>) - -declare double @llvm.copysign.f64(double, double) -declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) -declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) -declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>) - -declare float @llvm.fma.f32(float, float, float) -declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) -declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) -declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) - -declare double @llvm.fma.f64(double, double, double) -declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) -declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) -declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll new file mode 100644 index 0000000000000..461283653f54e --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll @@ -0,0 +1,602 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,SSE1 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 +; +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 + +define i32 @fadd(i32 %arg) { +; CHECK-LABEL: 'fadd' +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fadd float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fadd <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fadd <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fadd <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fadd double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fadd <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fadd <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fadd <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fadd float undef, undef + %V4F32 = fadd <4 x float> undef, undef + %V8F32 = fadd <8 x float> undef, undef + %V16F32 = fadd <16 x float> undef, undef + + %F64 = fadd double undef, undef + %V2F64 = fadd <2 x double> undef, undef + %V4F64 = fadd <4 x double> undef, undef + %V8F64 = fadd <8 x double> undef, undef + + ret i32 undef +} + +define i32 @fsub(i32 %arg) { +; CHECK-LABEL: 'fsub' +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fsub <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fsub <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fsub <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fsub <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fsub float undef, undef + %V4F32 = fsub <4 x float> undef, undef + %V8F32 = fsub <8 x float> undef, undef + %V16F32 = fsub <16 x float> undef, undef + + %F64 = fsub double undef, undef + %V2F64 = fsub <2 x double> undef, undef + %V4F64 = fsub <4 x double> undef, undef + %V8F64 = fsub <8 x double> undef, undef + + ret i32 undef +} + +define i32 @fneg_idiom(i32 %arg) { +; CHECK-LABEL: 'fneg_idiom' +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fsub <8 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fsub <16 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fsub <4 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fsub <8 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fsub float -0.0, undef + %V4F32 = fsub <4 x float> , undef + %V8F32 = fsub <8 x float> , undef + %V16F32 = fsub <16 x float> , undef + + %F64 = fsub double -0.0, undef + %V2F64 = fsub <2 x double> , undef + %V4F64 = fsub <4 x double> , undef + %V8F64 = fsub <8 x double> , undef + + ret i32 undef +} + +define i32 @fneg(i32 %arg) { +; CHECK-LABEL: 'fneg' +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fneg float undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fneg <4 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fneg <8 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fneg <16 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fneg double undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fneg <2 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fneg <4 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fneg <8 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fneg float undef + %V4F32 = fneg <4 x float> undef + %V8F32 = fneg <8 x float> undef + %V16F32 = fneg <16 x float> undef + + %F64 = fneg double undef + %V2F64 = fneg <2 x double> undef + %V4F64 = fneg <4 x double> undef + %V8F64 = fneg <8 x double> undef + + ret i32 undef +} + +define i32 @fmul(i32 %arg) { +; CHECK-LABEL: 'fmul' +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fmul float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fmul <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fmul <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fmul <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fmul double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fmul <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fmul <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fmul <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fmul float undef, undef + %V4F32 = fmul <4 x float> undef, undef + %V8F32 = fmul <8 x float> undef, undef + %V16F32 = fmul <16 x float> undef, undef + + %F64 = fmul double undef, undef + %V2F64 = fmul <2 x double> undef, undef + %V4F64 = fmul <4 x double> undef, undef + %V8F64 = fmul <8 x double> undef, undef + + ret i32 undef +} + +define i32 @fdiv(i32 %arg) { +; CHECK-LABEL: 'fdiv' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fdiv float undef, undef + %V4F32 = fdiv <4 x float> undef, undef + %V8F32 = fdiv <8 x float> undef, undef + %V16F32 = fdiv <16 x float> undef, undef + + %F64 = fdiv double undef, undef + %V2F64 = fdiv <2 x double> undef, undef + %V4F64 = fdiv <4 x double> undef, undef + %V8F64 = fdiv <8 x double> undef, undef + + ret i32 undef +} + +define i32 @frem(i32 %arg) { +; CHECK-LABEL: 'frem' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = frem float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = frem <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = frem <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = frem <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = frem double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = frem <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = frem <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = frem float undef, undef + %V4F32 = frem <4 x float> undef, undef + %V8F32 = frem <8 x float> undef, undef + %V16F32 = frem <16 x float> undef, undef + + %F64 = frem double undef, undef + %V2F64 = frem <2 x double> undef, undef + %V4F64 = frem <4 x double> undef, undef + %V8F64 = frem <8 x double> undef, undef + + ret i32 undef +} + +define i32 @fsqrt(i32 %arg) { +; SSE1-LABEL: 'fsqrt' +; SSE1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fsqrt' +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fsqrt' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX1-LABEL: 'fsqrt' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX2-LABEL: 'fsqrt' +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fsqrt' +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fsqrt' +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fsqrt' +; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.sqrt.f32(float undef) + %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) + + %F64 = call double @llvm.sqrt.f64(double undef) + %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @fabs(i32 %arg) { +; SSE1-LABEL: 'fabs' +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fabs' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fabs' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX-LABEL: 'fabs' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fabs' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fabs' +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fabs' +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.fabs.f32(float undef) + %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) + + %F64 = call double @llvm.fabs.f64(double undef) + %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @fcopysign(i32 %arg) { +; SSE1-LABEL: 'fcopysign' +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fcopysign' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fcopysign' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX-LABEL: 'fcopysign' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fcopysign' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fcopysign' +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fcopysign' +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.copysign.f32(float undef, float undef) + %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) + %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) + %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) + + %F64 = call double @llvm.copysign.f64(double undef, double undef) + %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) + %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) + %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) + + ret i32 undef +} + +define i32 @fma(i32 %arg) { +; SSE1-LABEL: 'fma' +; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fma' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fma' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX-LABEL: 'fma' +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fma' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fma' +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fma' +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) + %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) + %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) + %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) + + %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) + %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) + %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) + %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) + + ret i32 undef +} + +declare float @llvm.sqrt.f32(float) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) +declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) + +declare double @llvm.sqrt.f64(double) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) +declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) + +declare float @llvm.fabs.f32(float) +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) +declare <8 x float> @llvm.fabs.v8f32(<8 x float>) +declare <16 x float> @llvm.fabs.v16f32(<16 x float>) + +declare double @llvm.fabs.f64(double) +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) +declare <4 x double> @llvm.fabs.v4f64(<4 x double>) +declare <8 x double> @llvm.fabs.v8f64(<8 x double>) + +declare float @llvm.copysign.f32(float, float) +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) +declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>) +declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>) + +declare double @llvm.copysign.f64(double, double) +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) +declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) +declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>) + +declare float @llvm.fma.f32(float, float, float) +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) +declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) +declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) + +declare double @llvm.fma.f64(double, double, double) +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) +declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) +declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll new file mode 100644 index 0000000000000..bbeff8a8f8528 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll @@ -0,0 +1,602 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,SSE1 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 +; +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM +; RUN: opt < %s -enable-no-nans-fp-math -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 + +define i32 @fadd(i32 %arg) { +; CHECK-LABEL: 'fadd' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fadd <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fadd <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fadd <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fadd <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fadd float undef, undef + %V4F32 = fadd <4 x float> undef, undef + %V8F32 = fadd <8 x float> undef, undef + %V16F32 = fadd <16 x float> undef, undef + + %F64 = fadd double undef, undef + %V2F64 = fadd <2 x double> undef, undef + %V4F64 = fadd <4 x double> undef, undef + %V8F64 = fadd <8 x double> undef, undef + + ret i32 undef +} + +define i32 @fsub(i32 %arg) { +; CHECK-LABEL: 'fsub' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fsub float undef, undef + %V4F32 = fsub <4 x float> undef, undef + %V8F32 = fsub <8 x float> undef, undef + %V16F32 = fsub <16 x float> undef, undef + + %F64 = fsub double undef, undef + %V2F64 = fsub <2 x double> undef, undef + %V4F64 = fsub <4 x double> undef, undef + %V8F64 = fsub <8 x double> undef, undef + + ret i32 undef +} + +define i32 @fneg_idiom(i32 %arg) { +; CHECK-LABEL: 'fneg_idiom' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fsub float -0.0, undef + %V4F32 = fsub <4 x float> , undef + %V8F32 = fsub <8 x float> , undef + %V16F32 = fsub <16 x float> , undef + + %F64 = fsub double -0.0, undef + %V2F64 = fsub <2 x double> , undef + %V4F64 = fsub <4 x double> , undef + %V8F64 = fsub <8 x double> , undef + + ret i32 undef +} + +define i32 @fneg(i32 %arg) { +; CHECK-LABEL: 'fneg' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fneg <8 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fneg <16 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fneg <4 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fneg <8 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fneg float undef + %V4F32 = fneg <4 x float> undef + %V8F32 = fneg <8 x float> undef + %V16F32 = fneg <16 x float> undef + + %F64 = fneg double undef + %V2F64 = fneg <2 x double> undef + %V4F64 = fneg <4 x double> undef + %V8F64 = fneg <8 x double> undef + + ret i32 undef +} + +define i32 @fmul(i32 %arg) { +; CHECK-LABEL: 'fmul' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fmul <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fmul <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fmul <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fmul <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fmul float undef, undef + %V4F32 = fmul <4 x float> undef, undef + %V8F32 = fmul <8 x float> undef, undef + %V16F32 = fmul <16 x float> undef, undef + + %F64 = fmul double undef, undef + %V2F64 = fmul <2 x double> undef, undef + %V4F64 = fmul <4 x double> undef, undef + %V8F64 = fmul <8 x double> undef, undef + + ret i32 undef +} + +define i32 @fdiv(i32 %arg) { +; CHECK-LABEL: 'fdiv' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fdiv float undef, undef + %V4F32 = fdiv <4 x float> undef, undef + %V8F32 = fdiv <8 x float> undef, undef + %V16F32 = fdiv <16 x float> undef, undef + + %F64 = fdiv double undef, undef + %V2F64 = fdiv <2 x double> undef, undef + %V4F64 = fdiv <4 x double> undef, undef + %V8F64 = fdiv <8 x double> undef, undef + + ret i32 undef +} + +define i32 @frem(i32 %arg) { +; CHECK-LABEL: 'frem' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = frem float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = frem <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = frem <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = frem <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = frem double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = frem <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = frem <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = frem float undef, undef + %V4F32 = frem <4 x float> undef, undef + %V8F32 = frem <8 x float> undef, undef + %V16F32 = frem <16 x float> undef, undef + + %F64 = frem double undef, undef + %V2F64 = frem <2 x double> undef, undef + %V4F64 = frem <4 x double> undef, undef + %V8F64 = frem <8 x double> undef, undef + + ret i32 undef +} + +define i32 @fsqrt(i32 %arg) { +; SSE1-LABEL: 'fsqrt' +; SSE1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fsqrt' +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fsqrt' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX1-LABEL: 'fsqrt' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX2-LABEL: 'fsqrt' +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fsqrt' +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fsqrt' +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fsqrt' +; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.sqrt.f32(float undef) + %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) + + %F64 = call double @llvm.sqrt.f64(double undef) + %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @fabs(i32 %arg) { +; SSE1-LABEL: 'fabs' +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fabs' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fabs' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX-LABEL: 'fabs' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fabs' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fabs' +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fabs' +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.fabs.f32(float undef) + %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) + + %F64 = call double @llvm.fabs.f64(double undef) + %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @fcopysign(i32 %arg) { +; SSE1-LABEL: 'fcopysign' +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fcopysign' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fcopysign' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX-LABEL: 'fcopysign' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fcopysign' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fcopysign' +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fcopysign' +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.copysign.f32(float undef, float undef) + %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) + %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) + %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) + + %F64 = call double @llvm.copysign.f64(double undef, double undef) + %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) + %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) + %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) + + ret i32 undef +} + +define i32 @fma(i32 %arg) { +; SSE1-LABEL: 'fma' +; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fma' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fma' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX-LABEL: 'fma' +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fma' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fma' +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fma' +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) + %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) + %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) + %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) + + %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) + %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) + %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) + %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) + + ret i32 undef +} + +declare float @llvm.sqrt.f32(float) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) +declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) + +declare double @llvm.sqrt.f64(double) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) +declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) + +declare float @llvm.fabs.f32(float) +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) +declare <8 x float> @llvm.fabs.v8f32(<8 x float>) +declare <16 x float> @llvm.fabs.v16f32(<16 x float>) + +declare double @llvm.fabs.f64(double) +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) +declare <4 x double> @llvm.fabs.v4f64(<4 x double>) +declare <8 x double> @llvm.fabs.v8f64(<8 x double>) + +declare float @llvm.copysign.f32(float, float) +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) +declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>) +declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>) + +declare double @llvm.copysign.f64(double, double) +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) +declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) +declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>) + +declare float @llvm.fma.f32(float, float, float) +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) +declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) +declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) + +declare double @llvm.fma.f64(double, double, double) +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) +declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) +declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>) diff --git a/llvm/test/Analysis/CostModel/X86/arith-int-codesize.ll b/llvm/test/Analysis/CostModel/X86/arith-int-codesize.ll new file mode 100644 index 0000000000000..a8e506c1c8534 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/arith-int-codesize.ll @@ -0,0 +1,334 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=+ssse3 | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=+avx | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mattr=+avx512f,+avx512dq | FileCheck %s +; +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mcpu=slm | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=code-size -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s + +define i32 @add(i32 %arg) { +; CHECK-LABEL: 'add' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = add <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = add <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = add <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = add <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = add <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = add <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = add <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = add <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = add i64 undef, undef + %V2I64 = add <2 x i64> undef, undef + %V4I64 = add <4 x i64> undef, undef + %V8I64 = add <8 x i64> undef, undef + + %I32 = add i32 undef, undef + %V4I32 = add <4 x i32> undef, undef + %V8I32 = add <8 x i32> undef, undef + %V16I32 = add <16 x i32> undef, undef + + %I16 = add i16 undef, undef + %V8I16 = add <8 x i16> undef, undef + %V16I16 = add <16 x i16> undef, undef + %V32I16 = add <32 x i16> undef, undef + + %I8 = add i8 undef, undef + %V16I8 = add <16 x i8> undef, undef + %V32I8 = add <32 x i8> undef, undef + %V64I8 = add <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @sub(i32 %arg) { +; CHECK-LABEL: 'sub' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = sub <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = sub <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = sub <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = sub <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = sub <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = sub <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = sub <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = sub <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sub i64 undef, undef + %V2I64 = sub <2 x i64> undef, undef + %V4I64 = sub <4 x i64> undef, undef + %V8I64 = sub <8 x i64> undef, undef + + %I32 = sub i32 undef, undef + %V4I32 = sub <4 x i32> undef, undef + %V8I32 = sub <8 x i32> undef, undef + %V16I32 = sub <16 x i32> undef, undef + + %I16 = sub i16 undef, undef + %V8I16 = sub <8 x i16> undef, undef + %V16I16 = sub <16 x i16> undef, undef + %V32I16 = sub <32 x i16> undef, undef + + %I8 = sub i8 undef, undef + %V16I8 = sub <16 x i8> undef, undef + %V32I8 = sub <32 x i8> undef, undef + %V64I8 = sub <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @or(i32 %arg) { +; CHECK-LABEL: 'or' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = or <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = or <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = or <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = or <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = or <64 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = or i64 undef, undef + %V2I64 = or <2 x i64> undef, undef + %V4I64 = or <4 x i64> undef, undef + %V8I64 = or <8 x i64> undef, undef + + %I32 = or i32 undef, undef + %V4I32 = or <4 x i32> undef, undef + %V8I32 = or <8 x i32> undef, undef + %V16I32 = or <16 x i32> undef, undef + + %I16 = or i16 undef, undef + %V8I16 = or <8 x i16> undef, undef + %V16I16 = or <16 x i16> undef, undef + %V32I16 = or <32 x i16> undef, undef + + %I8 = or i8 undef, undef + %V16I8 = or <16 x i8> undef, undef + %V32I8 = or <32 x i8> undef, undef + %V64I8 = or <64 x i8> undef, undef + + %I1 = or i1 undef, undef + %V2I1 = or <2 x i1> undef, undef + %V4I1 = or <4 x i1> undef, undef + %V8I1 = or <8 x i1> undef, undef + %V16I1 = or <16 x i1> undef, undef + %V32I1 = or <32 x i1> undef, undef + %V64I1 = or <64 x i1> undef, undef + + ret i32 undef +} + +define i32 @xor(i32 %arg) { +; CHECK-LABEL: 'xor' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = xor <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = xor <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = xor <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = xor <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = xor <64 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = xor i64 undef, undef + %V2I64 = xor <2 x i64> undef, undef + %V4I64 = xor <4 x i64> undef, undef + %V8I64 = xor <8 x i64> undef, undef + + %I32 = xor i32 undef, undef + %V4I32 = xor <4 x i32> undef, undef + %V8I32 = xor <8 x i32> undef, undef + %V16I32 = xor <16 x i32> undef, undef + + %I16 = xor i16 undef, undef + %V8I16 = xor <8 x i16> undef, undef + %V16I16 = xor <16 x i16> undef, undef + %V32I16 = xor <32 x i16> undef, undef + + %I8 = xor i8 undef, undef + %V16I8 = xor <16 x i8> undef, undef + %V32I8 = xor <32 x i8> undef, undef + %V64I8 = xor <64 x i8> undef, undef + + %I1 = xor i1 undef, undef + %V2I1 = xor <2 x i1> undef, undef + %V4I1 = xor <4 x i1> undef, undef + %V8I1 = xor <8 x i1> undef, undef + %V16I1 = xor <16 x i1> undef, undef + %V32I1 = xor <32 x i1> undef, undef + %V64I1 = xor <64 x i1> undef, undef + + ret i32 undef +} + +define i32 @and(i32 %arg) { +; CHECK-LABEL: 'and' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = and <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = and <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = and <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = and <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = and <64 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = and i64 undef, undef + %V2I64 = and <2 x i64> undef, undef + %V4I64 = and <4 x i64> undef, undef + %V8I64 = and <8 x i64> undef, undef + + %I32 = and i32 undef, undef + %V4I32 = and <4 x i32> undef, undef + %V8I32 = and <8 x i32> undef, undef + %V16I32 = and <16 x i32> undef, undef + + %I16 = and i16 undef, undef + %V8I16 = and <8 x i16> undef, undef + %V16I16 = and <16 x i16> undef, undef + %V32I16 = and <32 x i16> undef, undef + + %I8 = and i8 undef, undef + %V16I8 = and <16 x i8> undef, undef + %V32I8 = and <32 x i8> undef, undef + %V64I8 = and <64 x i8> undef, undef + + %I1 = and i1 undef, undef + %V2I1 = and <2 x i1> undef, undef + %V4I1 = and <4 x i1> undef, undef + %V8I1 = and <8 x i1> undef, undef + %V16I1 = and <16 x i1> undef, undef + %V32I1 = and <32 x i1> undef, undef + %V64I1 = and <64 x i1> undef, undef + + ret i32 undef +} + +define i32 @mul(i32 %arg) { +; CHECK-LABEL: 'mul' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = mul <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = mul <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = mul <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = mul <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = mul <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = mul <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = mul <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = mul <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = mul <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = mul <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = mul i64 undef, undef + %V2I64 = mul <2 x i64> undef, undef + %V4I64 = mul <4 x i64> undef, undef + %V8I64 = mul <8 x i64> undef, undef + + %I32 = mul i32 undef, undef + %V4I32 = mul <4 x i32> undef, undef + %V8I32 = mul <8 x i32> undef, undef + %V16I32 = mul <16 x i32> undef, undef + + %I16 = mul i16 undef, undef + %V8I16 = mul <8 x i16> undef, undef + %V16I16 = mul <16 x i16> undef, undef + %V32I16 = mul <32 x i16> undef, undef + + %I8 = mul i8 undef, undef + %V2I8 = mul <2 x i8> undef, undef + %V4I8 = mul <4 x i8> undef, undef + %V8I8 = mul <8 x i8> undef, undef + %V16I8 = mul <16 x i8> undef, undef + %V32I8 = mul <32 x i8> undef, undef + %V64I8 = mul <64 x i8> undef, undef + + ret i32 undef +} + +; A <2 x i32> gets expanded to a <2 x i64> vector. +; A <2 x i64> vector multiply is implemented using +; 3 PMULUDQ and 2 PADDS and 4 shifts. +define void @mul_2i32() { +; CHECK-LABEL: 'mul_2i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A0 = mul <2 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %A0 = mul <2 x i32> undef, undef + + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/arith-int-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-int-latency.ll new file mode 100644 index 0000000000000..6d59a0745e223 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/arith-int-latency.ll @@ -0,0 +1,334 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+ssse3 | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f,+avx512dq | FileCheck %s +; +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=slm | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s + +define i32 @add(i32 %arg) { +; CHECK-LABEL: 'add' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = add <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = add <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = add <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = add <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = add <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = add <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = add <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = add <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = add i64 undef, undef + %V2I64 = add <2 x i64> undef, undef + %V4I64 = add <4 x i64> undef, undef + %V8I64 = add <8 x i64> undef, undef + + %I32 = add i32 undef, undef + %V4I32 = add <4 x i32> undef, undef + %V8I32 = add <8 x i32> undef, undef + %V16I32 = add <16 x i32> undef, undef + + %I16 = add i16 undef, undef + %V8I16 = add <8 x i16> undef, undef + %V16I16 = add <16 x i16> undef, undef + %V32I16 = add <32 x i16> undef, undef + + %I8 = add i8 undef, undef + %V16I8 = add <16 x i8> undef, undef + %V32I8 = add <32 x i8> undef, undef + %V64I8 = add <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @sub(i32 %arg) { +; CHECK-LABEL: 'sub' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = sub <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = sub <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = sub <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = sub <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = sub <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = sub <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = sub <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = sub <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sub i64 undef, undef + %V2I64 = sub <2 x i64> undef, undef + %V4I64 = sub <4 x i64> undef, undef + %V8I64 = sub <8 x i64> undef, undef + + %I32 = sub i32 undef, undef + %V4I32 = sub <4 x i32> undef, undef + %V8I32 = sub <8 x i32> undef, undef + %V16I32 = sub <16 x i32> undef, undef + + %I16 = sub i16 undef, undef + %V8I16 = sub <8 x i16> undef, undef + %V16I16 = sub <16 x i16> undef, undef + %V32I16 = sub <32 x i16> undef, undef + + %I8 = sub i8 undef, undef + %V16I8 = sub <16 x i8> undef, undef + %V32I8 = sub <32 x i8> undef, undef + %V64I8 = sub <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @or(i32 %arg) { +; CHECK-LABEL: 'or' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = or <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = or <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = or <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = or <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = or <64 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = or i64 undef, undef + %V2I64 = or <2 x i64> undef, undef + %V4I64 = or <4 x i64> undef, undef + %V8I64 = or <8 x i64> undef, undef + + %I32 = or i32 undef, undef + %V4I32 = or <4 x i32> undef, undef + %V8I32 = or <8 x i32> undef, undef + %V16I32 = or <16 x i32> undef, undef + + %I16 = or i16 undef, undef + %V8I16 = or <8 x i16> undef, undef + %V16I16 = or <16 x i16> undef, undef + %V32I16 = or <32 x i16> undef, undef + + %I8 = or i8 undef, undef + %V16I8 = or <16 x i8> undef, undef + %V32I8 = or <32 x i8> undef, undef + %V64I8 = or <64 x i8> undef, undef + + %I1 = or i1 undef, undef + %V2I1 = or <2 x i1> undef, undef + %V4I1 = or <4 x i1> undef, undef + %V8I1 = or <8 x i1> undef, undef + %V16I1 = or <16 x i1> undef, undef + %V32I1 = or <32 x i1> undef, undef + %V64I1 = or <64 x i1> undef, undef + + ret i32 undef +} + +define i32 @xor(i32 %arg) { +; CHECK-LABEL: 'xor' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = xor <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = xor <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = xor <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = xor <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = xor <64 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = xor i64 undef, undef + %V2I64 = xor <2 x i64> undef, undef + %V4I64 = xor <4 x i64> undef, undef + %V8I64 = xor <8 x i64> undef, undef + + %I32 = xor i32 undef, undef + %V4I32 = xor <4 x i32> undef, undef + %V8I32 = xor <8 x i32> undef, undef + %V16I32 = xor <16 x i32> undef, undef + + %I16 = xor i16 undef, undef + %V8I16 = xor <8 x i16> undef, undef + %V16I16 = xor <16 x i16> undef, undef + %V32I16 = xor <32 x i16> undef, undef + + %I8 = xor i8 undef, undef + %V16I8 = xor <16 x i8> undef, undef + %V32I8 = xor <32 x i8> undef, undef + %V64I8 = xor <64 x i8> undef, undef + + %I1 = xor i1 undef, undef + %V2I1 = xor <2 x i1> undef, undef + %V4I1 = xor <4 x i1> undef, undef + %V8I1 = xor <8 x i1> undef, undef + %V16I1 = xor <16 x i1> undef, undef + %V32I1 = xor <32 x i1> undef, undef + %V64I1 = xor <64 x i1> undef, undef + + ret i32 undef +} + +define i32 @and(i32 %arg) { +; CHECK-LABEL: 'and' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = and <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = and <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = and <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = and <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = and <64 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = and i64 undef, undef + %V2I64 = and <2 x i64> undef, undef + %V4I64 = and <4 x i64> undef, undef + %V8I64 = and <8 x i64> undef, undef + + %I32 = and i32 undef, undef + %V4I32 = and <4 x i32> undef, undef + %V8I32 = and <8 x i32> undef, undef + %V16I32 = and <16 x i32> undef, undef + + %I16 = and i16 undef, undef + %V8I16 = and <8 x i16> undef, undef + %V16I16 = and <16 x i16> undef, undef + %V32I16 = and <32 x i16> undef, undef + + %I8 = and i8 undef, undef + %V16I8 = and <16 x i8> undef, undef + %V32I8 = and <32 x i8> undef, undef + %V64I8 = and <64 x i8> undef, undef + + %I1 = and i1 undef, undef + %V2I1 = and <2 x i1> undef, undef + %V4I1 = and <4 x i1> undef, undef + %V8I1 = and <8 x i1> undef, undef + %V16I1 = and <16 x i1> undef, undef + %V32I1 = and <32 x i1> undef, undef + %V64I1 = and <64 x i1> undef, undef + + ret i32 undef +} + +define i32 @mul(i32 %arg) { +; CHECK-LABEL: 'mul' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = mul <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = mul <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = mul <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = mul <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = mul <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = mul <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = mul <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = mul <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = mul <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = mul <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = mul i64 undef, undef + %V2I64 = mul <2 x i64> undef, undef + %V4I64 = mul <4 x i64> undef, undef + %V8I64 = mul <8 x i64> undef, undef + + %I32 = mul i32 undef, undef + %V4I32 = mul <4 x i32> undef, undef + %V8I32 = mul <8 x i32> undef, undef + %V16I32 = mul <16 x i32> undef, undef + + %I16 = mul i16 undef, undef + %V8I16 = mul <8 x i16> undef, undef + %V16I16 = mul <16 x i16> undef, undef + %V32I16 = mul <32 x i16> undef, undef + + %I8 = mul i8 undef, undef + %V2I8 = mul <2 x i8> undef, undef + %V4I8 = mul <4 x i8> undef, undef + %V8I8 = mul <8 x i8> undef, undef + %V16I8 = mul <16 x i8> undef, undef + %V32I8 = mul <32 x i8> undef, undef + %V64I8 = mul <64 x i8> undef, undef + + ret i32 undef +} + +; A <2 x i32> gets expanded to a <2 x i64> vector. +; A <2 x i64> vector multiply is implemented using +; 3 PMULUDQ and 2 PADDS and 4 shifts. +define void @mul_2i32() { +; CHECK-LABEL: 'mul_2i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A0 = mul <2 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %A0 = mul <2 x i32> undef, undef + + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/arith-int-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-int-sizelatency.ll new file mode 100644 index 0000000000000..e963fa0c2cbec --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/arith-int-sizelatency.ll @@ -0,0 +1,334 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+ssse3 | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f,+avx512dq | FileCheck %s +; +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=slm | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s + +define i32 @add(i32 %arg) { +; CHECK-LABEL: 'add' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = add <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = add <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = add <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = add <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = add <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = add <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = add <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = add <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = add i64 undef, undef + %V2I64 = add <2 x i64> undef, undef + %V4I64 = add <4 x i64> undef, undef + %V8I64 = add <8 x i64> undef, undef + + %I32 = add i32 undef, undef + %V4I32 = add <4 x i32> undef, undef + %V8I32 = add <8 x i32> undef, undef + %V16I32 = add <16 x i32> undef, undef + + %I16 = add i16 undef, undef + %V8I16 = add <8 x i16> undef, undef + %V16I16 = add <16 x i16> undef, undef + %V32I16 = add <32 x i16> undef, undef + + %I8 = add i8 undef, undef + %V16I8 = add <16 x i8> undef, undef + %V32I8 = add <32 x i8> undef, undef + %V64I8 = add <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @sub(i32 %arg) { +; CHECK-LABEL: 'sub' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = sub <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = sub <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = sub <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = sub <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = sub <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = sub <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = sub <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = sub <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sub i64 undef, undef + %V2I64 = sub <2 x i64> undef, undef + %V4I64 = sub <4 x i64> undef, undef + %V8I64 = sub <8 x i64> undef, undef + + %I32 = sub i32 undef, undef + %V4I32 = sub <4 x i32> undef, undef + %V8I32 = sub <8 x i32> undef, undef + %V16I32 = sub <16 x i32> undef, undef + + %I16 = sub i16 undef, undef + %V8I16 = sub <8 x i16> undef, undef + %V16I16 = sub <16 x i16> undef, undef + %V32I16 = sub <32 x i16> undef, undef + + %I8 = sub i8 undef, undef + %V16I8 = sub <16 x i8> undef, undef + %V32I8 = sub <32 x i8> undef, undef + %V64I8 = sub <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @or(i32 %arg) { +; CHECK-LABEL: 'or' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = or <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = or <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = or <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = or <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = or <64 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = or i64 undef, undef + %V2I64 = or <2 x i64> undef, undef + %V4I64 = or <4 x i64> undef, undef + %V8I64 = or <8 x i64> undef, undef + + %I32 = or i32 undef, undef + %V4I32 = or <4 x i32> undef, undef + %V8I32 = or <8 x i32> undef, undef + %V16I32 = or <16 x i32> undef, undef + + %I16 = or i16 undef, undef + %V8I16 = or <8 x i16> undef, undef + %V16I16 = or <16 x i16> undef, undef + %V32I16 = or <32 x i16> undef, undef + + %I8 = or i8 undef, undef + %V16I8 = or <16 x i8> undef, undef + %V32I8 = or <32 x i8> undef, undef + %V64I8 = or <64 x i8> undef, undef + + %I1 = or i1 undef, undef + %V2I1 = or <2 x i1> undef, undef + %V4I1 = or <4 x i1> undef, undef + %V8I1 = or <8 x i1> undef, undef + %V16I1 = or <16 x i1> undef, undef + %V32I1 = or <32 x i1> undef, undef + %V64I1 = or <64 x i1> undef, undef + + ret i32 undef +} + +define i32 @xor(i32 %arg) { +; CHECK-LABEL: 'xor' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = xor <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = xor <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = xor <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = xor <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = xor <64 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = xor i64 undef, undef + %V2I64 = xor <2 x i64> undef, undef + %V4I64 = xor <4 x i64> undef, undef + %V8I64 = xor <8 x i64> undef, undef + + %I32 = xor i32 undef, undef + %V4I32 = xor <4 x i32> undef, undef + %V8I32 = xor <8 x i32> undef, undef + %V16I32 = xor <16 x i32> undef, undef + + %I16 = xor i16 undef, undef + %V8I16 = xor <8 x i16> undef, undef + %V16I16 = xor <16 x i16> undef, undef + %V32I16 = xor <32 x i16> undef, undef + + %I8 = xor i8 undef, undef + %V16I8 = xor <16 x i8> undef, undef + %V32I8 = xor <32 x i8> undef, undef + %V64I8 = xor <64 x i8> undef, undef + + %I1 = xor i1 undef, undef + %V2I1 = xor <2 x i1> undef, undef + %V4I1 = xor <4 x i1> undef, undef + %V8I1 = xor <8 x i1> undef, undef + %V16I1 = xor <16 x i1> undef, undef + %V32I1 = xor <32 x i1> undef, undef + %V64I1 = xor <64 x i1> undef, undef + + ret i32 undef +} + +define i32 @and(i32 %arg) { +; CHECK-LABEL: 'and' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = and <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = and <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = and <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = and <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = and <64 x i1> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = and i64 undef, undef + %V2I64 = and <2 x i64> undef, undef + %V4I64 = and <4 x i64> undef, undef + %V8I64 = and <8 x i64> undef, undef + + %I32 = and i32 undef, undef + %V4I32 = and <4 x i32> undef, undef + %V8I32 = and <8 x i32> undef, undef + %V16I32 = and <16 x i32> undef, undef + + %I16 = and i16 undef, undef + %V8I16 = and <8 x i16> undef, undef + %V16I16 = and <16 x i16> undef, undef + %V32I16 = and <32 x i16> undef, undef + + %I8 = and i8 undef, undef + %V16I8 = and <16 x i8> undef, undef + %V32I8 = and <32 x i8> undef, undef + %V64I8 = and <64 x i8> undef, undef + + %I1 = and i1 undef, undef + %V2I1 = and <2 x i1> undef, undef + %V4I1 = and <4 x i1> undef, undef + %V8I1 = and <8 x i1> undef, undef + %V16I1 = and <16 x i1> undef, undef + %V32I1 = and <32 x i1> undef, undef + %V64I1 = and <64 x i1> undef, undef + + ret i32 undef +} + +define i32 @mul(i32 %arg) { +; CHECK-LABEL: 'mul' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = mul <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = mul <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = mul <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = mul <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = mul <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = mul <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = mul <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = mul <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = mul <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = mul <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = mul i64 undef, undef + %V2I64 = mul <2 x i64> undef, undef + %V4I64 = mul <4 x i64> undef, undef + %V8I64 = mul <8 x i64> undef, undef + + %I32 = mul i32 undef, undef + %V4I32 = mul <4 x i32> undef, undef + %V8I32 = mul <8 x i32> undef, undef + %V16I32 = mul <16 x i32> undef, undef + + %I16 = mul i16 undef, undef + %V8I16 = mul <8 x i16> undef, undef + %V16I16 = mul <16 x i16> undef, undef + %V32I16 = mul <32 x i16> undef, undef + + %I8 = mul i8 undef, undef + %V2I8 = mul <2 x i8> undef, undef + %V4I8 = mul <4 x i8> undef, undef + %V8I8 = mul <8 x i8> undef, undef + %V16I8 = mul <16 x i8> undef, undef + %V32I8 = mul <32 x i8> undef, undef + %V64I8 = mul <64 x i8> undef, undef + + ret i32 undef +} + +; A <2 x i32> gets expanded to a <2 x i64> vector. +; A <2 x i64> vector multiply is implemented using +; 3 PMULUDQ and 2 PADDS and 4 shifts. +define void @mul_2i32() { +; CHECK-LABEL: 'mul_2i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A0 = mul <2 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %A0 = mul <2 x i32> undef, undef + + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/arith.ll b/llvm/test/Analysis/CostModel/X86/arith-int.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/arith.ll rename to llvm/test/Analysis/CostModel/X86/arith-int.ll