137 changes: 137 additions & 0 deletions libcxx/test/std/time/time.syn/formatter.sys_info.pass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
//===----------------------------------------------------------------------===//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11, c++14, c++17
// UNSUPPORTED: no-localization

// TODO FMT This test should not require std::to_chars(floating-point)
// XFAIL: availability-fp_to_chars-missing

// XFAIL: libcpp-has-no-incomplete-tzdb

// REQUIRES: locale.fr_FR.UTF-8
// REQUIRES: locale.ja_JP.UTF-8

// <chrono>
//
// template<class charT> struct formatter<chrono::sys_info, charT>;

#include <chrono>
#include <format>

#include <cassert>
#include <concepts>
#include <locale>
#include <iostream>
#include <type_traits>

#include "formatter_tests.h"
#include "make_string.h"
#include "platform_support.h" // locale name macros
#include "test_macros.h"

template <class CharT>
static void test_no_chrono_specs() {
// This test libc++ specific due to
// [time.zone.info.sys]/7
// Effects: Streams out the sys_info object r in an unspecified format.
#ifdef _LIBCPP_VERSION
using namespace std::literals::chrono_literals;
namespace tz = std::chrono;

std::locale::global(std::locale(LOCALE_fr_FR_UTF_8));

// Non localized output

check(SV("[-10484-10-16 15:30:08, 14423-03-17 15:30:07) 00:00:00 0min \"TZ\""),
SV("{}"),
tz::sys_info{tz::sys_seconds::min(), tz::sys_seconds::max(), 0s, 0min, "TZ"});

check(SV("[1970-01-01 00:00:00, 2038-12-31 00:00:00) 12:23:45 -67min \"DMY\""),
SV("{}"),
tz::sys_info{static_cast<tz::sys_days>(tz::year_month_day{1970y, tz::January, 1d}),
static_cast<tz::sys_days>(tz::year_month_day{2038y, tz::December, 31d}),
12h + 23min + 45s,
-67min,
"DMY"});

std::locale::global(std::locale::classic());
#endif // _LIBCPP_VERSION
}

template <class CharT>
static void test_valid_values() {
using namespace std::literals::chrono_literals;

constexpr std::basic_string_view<CharT> fmt = SV("{:%%z='%z'%t%%Ez='%Ez'%t%%Oz='%Oz'%t%%Z='%Z'%n}");
constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%z='%z'%t%%Ez='%Ez'%t%%Oz='%Oz'%t%%Z='%Z'%n}");

const std::locale loc(LOCALE_ja_JP_UTF_8);
std::locale::global(std::locale(LOCALE_fr_FR_UTF_8));

// Non localized output using C-locale
check(SV("%z='-0200'\t%Ez='-02:00'\t%Oz='-02:00'\t%Z='NEG'\n"),
fmt,
std::chrono::sys_info{std::chrono::sys_seconds{0s}, std::chrono::sys_seconds{0s}, -2h, 0min, "NEG"});

check(SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='ZERO'\n"),
fmt,
std::chrono::sys_info{std::chrono::sys_seconds{0s}, std::chrono::sys_seconds{0s}, 0s, 0min, "ZERO"});

check(SV("%z='+1115'\t%Ez='+11:15'\t%Oz='+11:15'\t%Z='POS'\n"),
fmt,
std::chrono::sys_info{std::chrono::sys_seconds{0s}, std::chrono::sys_seconds{0s}, 11h + 15min, 0min, "POS"});

// Use the global locale (fr_FR)
check(SV("%z='-0200'\t%Ez='-02:00'\t%Oz='-02:00'\t%Z='NEG'\n"),
lfmt,
std::chrono::sys_info{std::chrono::sys_seconds{0s}, std::chrono::sys_seconds{0s}, -2h, 0min, "NEG"});

check(SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='ZERO'\n"),
lfmt,
std::chrono::sys_info{std::chrono::sys_seconds{0s}, std::chrono::sys_seconds{0s}, 0s, 0min, "ZERO"});

check(SV("%z='+1115'\t%Ez='+11:15'\t%Oz='+11:15'\t%Z='POS'\n"),
lfmt,
std::chrono::sys_info{std::chrono::sys_seconds{0s}, std::chrono::sys_seconds{0s}, 11h + 15min, 0min, "POS"});

// Use supplied locale (ja_JP).
check(loc,
SV("%z='-0200'\t%Ez='-02:00'\t%Oz='-02:00'\t%Z='NEG'\n"),
lfmt,
std::chrono::sys_info{std::chrono::sys_seconds{0s}, std::chrono::sys_seconds{0s}, -2h, 0min, "NEG"});

check(loc,
SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='ZERO'\n"),
lfmt,
std::chrono::sys_info{std::chrono::sys_seconds{0s}, std::chrono::sys_seconds{0s}, 0s, 0min, "ZERO"});

check(loc,
SV("%z='+1115'\t%Ez='+11:15'\t%Oz='+11:15'\t%Z='POS'\n"),
lfmt,
std::chrono::sys_info{std::chrono::sys_seconds{0s}, std::chrono::sys_seconds{0s}, 11h + 15min, 0min, "POS"});

std::locale::global(std::locale::classic());
}

template <class CharT>
static void test() {
test_no_chrono_specs<CharT>();
test_valid_values<CharT>();

check_invalid_types<CharT>({SV("z"), SV("Z"), SV("Ez"), SV("Oz")}, std::chrono::sys_info{});
}

int main(int, char**) {
test<char>();

#ifndef TEST_HAS_NO_WIDE_CHARACTERS
test<wchar_t>();
#endif

return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11, c++14, c++17
// UNSUPPORTED: no-localization

// TODO FMT This test should not require std::to_chars(floating-point)
// XFAIL: availability-fp_to_chars-missing

// XFAIL: libcpp-has-no-incomplete-tzdb

// <chrono>

// template<class charT, class traits>
// basic_ostream<charT, traits>&
// operator<<(basic_ostream<charT, traits>& os, const sys_info& r);

// [time.zone.info.sys]
// 7 Effects: Streams out the sys_info object r in an unspecified format.
// 8 Returns: os.
//
// There is a private libc++ test that validates the exact output.

#include <cassert>
#include <chrono>
#include <memory>
#include <sstream>

#include "test_macros.h"

template <class CharT>
static void test() {
using namespace std::literals::chrono_literals;
std::chrono::sys_info s{std::chrono::sys_seconds{0s}, std::chrono::sys_seconds{0s}, 0h, 0min, ""};
std::basic_ostringstream<CharT> os;
std::basic_ostream<CharT>& result = std::chrono::operator<<(os, s);
assert(std::addressof(result) == std::addressof(os));
}

int main(int, const char**) {
test<char>();
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
test<wchar_t>();
#endif

return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,12 @@ void test_P1361() {

assert_is_formattable<std::chrono::hh_mm_ss<std::chrono::microseconds>, CharT>();

//assert_is_formattable<std::chrono::sys_info, CharT>();
# if !defined(TEST_HAS_NO_INCOMPLETE_TZDB)
assert_is_formattable<std::chrono::sys_info, CharT>();
//assert_is_formattable<std::chrono::local_info, CharT>();

//assert_is_formattable<std::chrono::zoned_time, CharT>();
# endif // !defined(TEST_HAS_NO_INCOMPLETE_TZDB)

#endif // TEST_HAS_NO_LOCALIZATION
}
Expand Down
8 changes: 8 additions & 0 deletions libcxx/test/support/test_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,14 @@ inline Tp const& DoNotOptimize(Tp const& value) {
# define TEST_HAS_NO_RANDOM_DEVICE
#endif

#if defined(_LIBCPP_HAS_NO_INCOMPLETE_TZDB)
# define TEST_HAS_NO_INCOMPLETE_TZDB
#endif

#if defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE)
# define TEST_HAS_NO_TIME_ZONE_DATABASE
#endif

#if defined(TEST_COMPILER_CLANG)
# define TEST_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
# define TEST_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -2805,6 +2805,12 @@ class TargetLoweringBase {
Type *Ty, unsigned AddrSpace,
Instruction *I = nullptr) const;

/// Returns true if the targets addressing mode can target thread local
/// storage (TLS).
virtual bool addressingModeSupportsTLS(const GlobalValue &) const {
return false;
}

/// Return the prefered common base offset.
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
int64_t MaxOffset) const {
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Support/TargetOpcodes.def
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,9 @@ HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUND)
/// INTRINSIC round to integer intrinsic.
HANDLE_TARGET_OPCODE(G_INTRINSIC_LRINT)

/// INTRINSIC long round to integer intrinsic.
HANDLE_TARGET_OPCODE(G_INTRINSIC_LLRINT)

/// INTRINSIC roundeven intrinsic.
HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUNDEVEN)

Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/Target/GenericOpcodes.td
Original file line number Diff line number Diff line change
Expand Up @@ -1089,6 +1089,12 @@ def G_INTRINSIC_LRINT : GenericInstruction {
let hasSideEffects = false;
}

def G_INTRINSIC_LLRINT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
let hasSideEffects = false;
}

def G_INTRINSIC_ROUNDEVEN : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def : GINodeEquiv<G_INTRINSIC_TRUNC, ftrunc>;
def : GINodeEquiv<G_INTRINSIC_ROUND, fround>;
def : GINodeEquiv<G_INTRINSIC_ROUNDEVEN, froundeven>;
def : GINodeEquiv<G_INTRINSIC_LRINT, lrint>;
def : GINodeEquiv<G_INTRINSIC_LLRINT, llrint>;
def : GINodeEquiv<G_FCOPYSIGN, fcopysign>;
def : GINodeEquiv<G_SMIN, smin>;
def : GINodeEquiv<G_SMAX, smax>;
Expand Down
29 changes: 25 additions & 4 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5082,6 +5082,15 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
}
return true;
}
case Instruction::Call:
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
if (TLI.addressingModeSupportsTLS(GV))
return matchAddr(AddrInst->getOperand(0), Depth);
}
}
break;
}
return false;
}
Expand Down Expand Up @@ -5620,11 +5629,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
return Modified;
}

if (AddrMode.BaseGV) {
GlobalValue *BaseGV = AddrMode.BaseGV;
if (BaseGV != nullptr) {
if (ResultPtr)
return Modified;

ResultPtr = AddrMode.BaseGV;
if (BaseGV->isThreadLocal()) {
ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
} else {
ResultPtr = BaseGV;
}
}

// If the real base value actually came from an inttoptr, then the matcher
Expand Down Expand Up @@ -5789,8 +5803,15 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
}

// Add in the BaseGV if present.
if (AddrMode.BaseGV) {
Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
GlobalValue *BaseGV = AddrMode.BaseGV;
if (BaseGV != nullptr) {
Value *BaseGVPtr;
if (BaseGV->isThreadLocal()) {
BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
} else {
BaseGVPtr = BaseGV;
}
Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
if (Result)
Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1955,6 +1955,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_PTRMASK;
case Intrinsic::lrint:
return TargetOpcode::G_INTRINSIC_LRINT;
case Intrinsic::llrint:
return TargetOpcode::G_INTRINSIC_LLRINT;
// FADD/FMUL require checking the FMF, so are handled elsewhere.
case Intrinsic::vector_reduce_fmin:
return TargetOpcode::G_VECREDUCE_FMIN;
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(ROUNDEVEN_F);
case TargetOpcode::G_INTRINSIC_LRINT:
RTLIBCASE(LRINT_F);
case TargetOpcode::G_INTRINSIC_LLRINT:
RTLIBCASE(LLRINT_F);
}
llvm_unreachable("Unknown libcall function");
}
Expand Down Expand Up @@ -1061,7 +1063,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
return Status;
break;
}
case TargetOpcode::G_INTRINSIC_LRINT: {
case TargetOpcode::G_INTRINSIC_LRINT:
case TargetOpcode::G_INTRINSIC_LLRINT: {
LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
unsigned Size = LLTy.getSizeInBits();
Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
Expand Down Expand Up @@ -2661,6 +2664,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_INTRINSIC_LRINT:
case TargetOpcode::G_INTRINSIC_LLRINT:
case TargetOpcode::G_IS_FPCLASS:
Observer.changingInstr(MI);

Expand Down
145 changes: 75 additions & 70 deletions llvm/lib/Target/AArch64/AArch64SchedA510.td

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.minScalar(0, s32)
.scalarize(0);

getActionDefinitionsBuilder(G_INTRINSIC_LRINT)
getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
.legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
.libcallFor({{s64, s128}})
.minScalarOrElt(1, MinFPScalar);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_INTRINSIC_LRINT:
case TargetOpcode::G_INTRINSIC_LLRINT:
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
break;
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16062,7 +16062,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
if (HasSystemScope)
return AtomicExpansionKind::CmpXChg;

if ((AS == AMDGPUAS::GLOBAL_ADDRESS ||
if ((AMDGPU::isExtendedGlobalAddrSpace(AS) ||
AS == AMDGPUAS::BUFFER_FAT_POINTER) &&
Ty->isFloatTy()) {
// global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+.
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -3785,8 +3785,7 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>

def Callseq_Start :
NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
"\\{ // callseq $amt1, $amt2\n"
"\t.reg .b32 temp_param_reg;",
"\\{ // callseq $amt1, $amt2",
[(callseq_start timm:$amt1, timm:$amt2)]>;
def Callseq_End :
NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
Expand Down
7 changes: 3 additions & 4 deletions llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2927,11 +2927,10 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
}

bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
// Cannot use 32 bit constants to reference objects in kernel code model.
// Cannot use 32 bit constants to reference objects in large PIC mode since
// GOTOFF is 64 bits.
// Cannot use 32 bit constants to reference objects in kernel/large code
// model.
if (TM.getCodeModel() == CodeModel::Kernel ||
(TM.getCodeModel() == CodeModel::Large && TM.isPositionIndependent()))
TM.getCodeModel() == CodeModel::Large)
return false;

// In static codegen with small code model, we can get the address of a label
Expand Down
24 changes: 24 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18920,6 +18920,30 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("TLS not implemented for this target.");
}

bool X86TargetLowering::addressingModeSupportsTLS(const GlobalValue &GV) const {
if (Subtarget.is64Bit() && Subtarget.isTargetELF()) {
const TargetMachine &TM = getTargetMachine();
TLSModel::Model Model = TM.getTLSModel(&GV);
switch (Model) {
case TLSModel::LocalExec:
case TLSModel::InitialExec:
// We can include the %fs segment register in addressing modes.
return true;
case TLSModel::LocalDynamic:
case TLSModel::GeneralDynamic:
// These models do not result in %fs relative addresses unless
// TLS descriptior are used.
//
// Even in the case of TLS descriptors we currently have no way to model
// the difference between %fs access and the computations needed for the
// offset and returning `true` for TLS-desc currently duplicates both
// which is detrimental :-/
return false;
}
}
return false;
}

/// Lower SRA_PARTS and friends, which return two i32 values
/// and take a 2 x i32 value to shift plus a shift amount.
/// TODO: Can this be moved to general expansion code?
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,8 @@ namespace llvm {
Type *Ty, unsigned AS,
Instruction *I = nullptr) const override;

bool addressingModeSupportsTLS(const GlobalValue &GV) const override;

/// Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
/// compare a register against the immediate without having to materialize
Expand Down
41 changes: 34 additions & 7 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3402,6 +3402,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
{ ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
{ ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
{ X86ISD::VROTLI, MVT::v32i16, { 1, 1, 1, 1 } },
{ X86ISD::VROTLI, MVT::v16i16, { 1, 1, 1, 1 } },
{ X86ISD::VROTLI, MVT::v8i16, { 1, 1, 1, 1 } },
};
static const CostKindTblEntry AVX512BITALGCostTbl[] = {
{ ISD::CTPOP, MVT::v32i16, { 1, 1, 1, 1 } },
Expand Down Expand Up @@ -3498,6 +3501,12 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
{ ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
{ ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
{ X86ISD::VROTLI, MVT::v32i16, { 2, 5, 3, 3 } },
{ X86ISD::VROTLI, MVT::v16i16, { 1, 5, 3, 3 } },
{ X86ISD::VROTLI, MVT::v8i16, { 1, 5, 3, 3 } },
{ X86ISD::VROTLI, MVT::v64i8, { 2, 9, 3, 4 } },
{ X86ISD::VROTLI, MVT::v32i8, { 1, 9, 3, 4 } },
{ X86ISD::VROTLI, MVT::v16i8, { 1, 8, 3, 4 } },
{ ISD::SADDSAT, MVT::v32i16, { 1 } },
{ ISD::SADDSAT, MVT::v64i8, { 1 } },
{ ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
Expand Down Expand Up @@ -3556,6 +3565,12 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
{ ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
{ ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
{ X86ISD::VROTLI, MVT::v8i64, { 1, 1, 1, 1 } },
{ X86ISD::VROTLI, MVT::v4i64, { 1, 1, 1, 1 } },
{ X86ISD::VROTLI, MVT::v2i64, { 1, 1, 1, 1 } },
{ X86ISD::VROTLI, MVT::v16i32, { 1, 1, 1, 1 } },
{ X86ISD::VROTLI, MVT::v8i32, { 1, 1, 1, 1 } },
{ X86ISD::VROTLI, MVT::v4i32, { 1, 1, 1, 1 } },
{ ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
{ ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
{ ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
Expand Down Expand Up @@ -3642,7 +3657,15 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
{ ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
{ ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
{ ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } }
{ ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } },
{ X86ISD::VROTLI, MVT::v4i64, { 4, 7, 5, 6 } },
{ X86ISD::VROTLI, MVT::v8i32, { 4, 7, 5, 6 } },
{ X86ISD::VROTLI, MVT::v16i16, { 4, 7, 5, 6 } },
{ X86ISD::VROTLI, MVT::v32i8, { 4, 7, 5, 6 } },
{ X86ISD::VROTLI, MVT::v2i64, { 1, 3, 1, 1 } },
{ X86ISD::VROTLI, MVT::v4i32, { 1, 3, 1, 1 } },
{ X86ISD::VROTLI, MVT::v8i16, { 1, 3, 1, 1 } },
{ X86ISD::VROTLI, MVT::v16i8, { 1, 3, 1, 1 } },
};
static const CostKindTblEntry AVX2CostTbl[] = {
{ ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } }, // VBLENDVPD(X,VPSUBQ(0,X),X)
Expand Down Expand Up @@ -4096,9 +4119,11 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
if (Args[0] == Args[1]) {
ISD = ISD::ROTL;
// Handle scalar constant rotation amounts.
// TODO: Handle vector + funnel-shift cases.
if (isa_and_nonnull<ConstantInt>(Args[2]))
// Handle uniform constant rotation amounts.
// TODO: Handle funnel-shift cases.
const APInt *Amt;
if (Args[2] &&
PatternMatch::match(Args[2], PatternMatch::m_APIntAllowUndef(Amt)))
ISD = X86ISD::VROTLI;
}
}
Expand All @@ -4109,10 +4134,12 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
if (!ICA.isTypeBasedOnly()) {
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
if (Args[0] == Args[1]) {
// Handle scalar constant rotation amount.
// TODO: Handle vector + funnel-shift cases.
ISD = ISD::ROTR;
if (isa_and_nonnull<ConstantInt>(Args[2]))
// Handle uniform constant rotation amount.
// TODO: Handle funnel-shift cases.
const APInt *Amt;
if (Args[2] &&
PatternMatch::match(Args[2], PatternMatch::m_APIntAllowUndef(Amt)))
ISD = X86ISD::VROTLI;
}
}
Expand Down
29 changes: 12 additions & 17 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3012,20 +3012,15 @@ class BoUpSLP {
if (!isConstant(V)) {
auto *I = dyn_cast<CastInst>(V);
AllConstsOrCasts &= I && I->getType()->isIntegerTy();
ValueToGatherNodes.try_emplace(V).first->getSecond().insert(Last);
if (GatheredLoadsEntriesFirst == NoGatheredLoads ||
Last->Idx < GatheredLoadsEntriesFirst || UserTreeIdx.UserTE ||
S.getOpcode() != Instruction::Load)
ValueToGatherNodes.try_emplace(V).first->getSecond().insert(Last);
}
if (AllConstsOrCasts)
CastMaxMinBWSizes =
std::make_pair(std::numeric_limits<unsigned>::max(), 1);
MustGather.insert(VL.begin(), VL.end());
if (GatheredLoadsEntriesFirst == NoGatheredLoads ||
Last->Idx < GatheredLoadsEntriesFirst || UserTreeIdx.UserTE ||
S.getOpcode() != Instruction::Load) {
// Build a map for gathered scalars to the nodes where they are used.
for (Value *V : VL)
if (!isConstant(V))
ValueToGatherNodes.try_emplace(V).first->getSecond().insert(Last);
}
}

if (UserTreeIdx.UserTE) {
Expand Down Expand Up @@ -5618,8 +5613,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
if (CurrentEntry->getOpcode() == Instruction::Call) {
auto *CI = cast<CallInst>(CurrentEntry->getMainOp());
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
if (I >= CI->arg_size() ||
isVectorIntrinsicWithScalarOpAtArg(ID, I))
if (I >= CI->arg_size() || isVectorIntrinsicWithScalarOpAtArg(ID, I))
continue;
}
const TreeEntry *Op = getOperandEntry(CurrentEntry, I);
Expand Down Expand Up @@ -6066,9 +6060,10 @@ void BoUpSLP::tryToVectorizeGatheredLoads() {
return L1.second > L2.second;
};

auto GetVectorizedRanges = [this](ArrayRef<LoadInst *> Loads,
BoUpSLP::ValueSet &VectorizedLoads,
SmallVectorImpl<LoadInst *> &NonVectorized) {
auto GetVectorizedRanges = [this](
ArrayRef<LoadInst *> Loads,
BoUpSLP::ValueSet &VectorizedLoads,
SmallVectorImpl<LoadInst *> &NonVectorized) {
SmallVector<std::pair<ArrayRef<Value *>, LoadsState>> Results;
unsigned StartIdx = 0;
SmallVector<int> CandidateVFs;
Expand Down Expand Up @@ -13634,9 +13629,9 @@ Value *BoUpSLP::vectorizeTree(
// gathered loads.
for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
if (GatheredLoadsEntriesFirst != NoGatheredLoads &&
TE->Idx >= GatheredLoadsEntriesFirst &&
(TE->State != TreeEntry::NeedToGather ||
!TE->UserTreeIndices.empty())) {
TE->Idx >= GatheredLoadsEntriesFirst &&
(TE->State != TreeEntry::NeedToGather ||
!TE->UserTreeIndices.empty())) {
assert((!TE->UserTreeIndices.empty() ||
(TE->getOpcode() == Instruction::Load &&
TE->State != TreeEntry::NeedToGather)) &&
Expand Down
620 changes: 620 additions & 0 deletions llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions llvm/test/Analysis/CostModel/X86/fshl-codesize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2744,9 +2744,9 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
;
; AVX512BW-LABEL: 'splatconstant_rotate_i16'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
Expand Down Expand Up @@ -2829,9 +2829,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512BW-LABEL: 'splatconstant_rotate_i8'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
Expand All @@ -2843,9 +2843,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SLM-LABEL: 'splatconstant_rotate_i8'
Expand All @@ -2871,9 +2871,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/Analysis/CostModel/X86/fshl-latency.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2696,9 +2696,9 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
;
; AVX512BW-LABEL: 'splatconstant_rotate_i16'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
Expand Down Expand Up @@ -2781,9 +2781,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512BW-LABEL: 'splatconstant_rotate_i8'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
Expand All @@ -2795,9 +2795,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SLM-LABEL: 'splatconstant_rotate_i8'
Expand All @@ -2823,9 +2823,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/Analysis/CostModel/X86/fshl-sizelatency.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2984,9 +2984,9 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
;
; AVX512BW-LABEL: 'splatconstant_rotate_i16'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
Expand Down Expand Up @@ -3069,9 +3069,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512BW-LABEL: 'splatconstant_rotate_i8'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
Expand All @@ -3083,9 +3083,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SLM-LABEL: 'splatconstant_rotate_i8'
Expand All @@ -3111,9 +3111,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/Analysis/CostModel/X86/fshl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2682,8 +2682,8 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
;
; AVX512BW-LABEL: 'splatconstant_rotate_i16'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
Expand Down Expand Up @@ -2767,9 +2767,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512BW-LABEL: 'splatconstant_rotate_i8'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
Expand All @@ -2781,9 +2781,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SLM-LABEL: 'splatconstant_rotate_i8'
Expand All @@ -2809,9 +2809,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/Analysis/CostModel/X86/fshr-codesize.ll

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions llvm/test/Analysis/CostModel/X86/fshr-latency.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2696,9 +2696,9 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
;
; AVX512BW-LABEL: 'splatconstant_rotate_i16'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
Expand Down Expand Up @@ -2781,9 +2781,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512BW-LABEL: 'splatconstant_rotate_i8'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
Expand All @@ -2795,9 +2795,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SLM-LABEL: 'splatconstant_rotate_i8'
Expand All @@ -2823,9 +2823,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/Analysis/CostModel/X86/fshr-sizelatency.ll

Large diffs are not rendered by default.

22 changes: 11 additions & 11 deletions llvm/test/Analysis/CostModel/X86/fshr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2682,8 +2682,8 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
;
; AVX512BW-LABEL: 'splatconstant_rotate_i16'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
Expand Down Expand Up @@ -2767,9 +2767,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512BW-LABEL: 'splatconstant_rotate_i8'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
Expand All @@ -2781,9 +2781,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SLM-LABEL: 'splatconstant_rotate_i8'
Expand All @@ -2809,9 +2809,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
;
; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
Expand Down
10 changes: 10 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1385,6 +1385,16 @@ define i32 @test_intrinsic_lrint(float %a) {
ret i32 %res
}

declare i32 @llvm.llrint.i32.f32(float)
define i32 @test_intrinsic_llrint(float %a) {
; CHECK-LABEL: name: test_intrinsic_llrint
; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
; CHECK: [[RES:%[0-9]+]]:_(s32) = G_INTRINSIC_LLRINT [[A]]
; CHECK: $w0 = COPY [[RES]]
%res = call i32 @llvm.llrint.i32.f32(float %a)
ret i32 %res
}

declare i32 @llvm.ctlz.i32(i32, i1)
define i32 @test_ctlz_intrinsic_zero_not_undef(i32 %a) {
; CHECK-LABEL: name: test_ctlz_intrinsic_zero_not_undef
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,10 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
; GISEL-NEXT: sub v0.8h, v0.8h, v1.8h
; GISEL-NEXT: usra v1.8h, v0.8h, #1
; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI3_0]
; GISEL-NEXT: neg v0.8h, v0.8h
; GISEL-NEXT: neg v0.8h, v2.8h
; GISEL-NEXT: ushl v0.8h, v1.8h, v0.8h
; GISEL-NEXT: ret
%1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
Expand All @@ -170,13 +170,13 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
; GISEL-LABEL: combine_vec_udiv_nonuniform4:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, .LCPI4_2
; GISEL-NEXT: adrp x9, .LCPI4_0
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2]
; GISEL-NEXT: adrp x8, .LCPI4_1
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0]
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_1]
; GISEL-NEXT: adrp x8, .LCPI4_0
; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b
; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI4_0]
; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b
; GISEL-NEXT: neg v2.16b, v3.16b
; GISEL-NEXT: shl v3.16b, v4.16b, #7
Expand Down
98 changes: 98 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-llrint.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-unknown -run-pass=legalizer %s -o - | FileCheck %s
---
name: testmsws
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$s0' }
body: |
bb.1:
liveins: $s0

; CHECK-LABEL: name: testmsws
; CHECK: liveins: $s0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[INTRINSIC_LLRINT:%[0-9]+]]:_(s64) = G_INTRINSIC_LLRINT [[COPY]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[INTRINSIC_LLRINT]](s64)
; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:_(s32) = COPY $s0
%1:_(s64) = G_INTRINSIC_LLRINT %0(s32)
%2:_(s32) = G_TRUNC %1(s64)
$w0 = COPY %2(s32)
RET_ReallyLR implicit $w0

...
---
name: testmsxs
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$s0' }
body: |
bb.1:
liveins: $s0

; CHECK-LABEL: name: testmsxs
; CHECK: liveins: $s0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[INTRINSIC_LLRINT:%[0-9]+]]:_(s64) = G_INTRINSIC_LLRINT [[COPY]](s32)
; CHECK-NEXT: $x0 = COPY [[INTRINSIC_LLRINT]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s32) = COPY $s0
%1:_(s64) = G_INTRINSIC_LLRINT %0(s32)
$x0 = COPY %1(s64)
RET_ReallyLR implicit $x0

...
---
name: testmswd
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$d0' }
body: |
bb.1:
liveins: $d0

; CHECK-LABEL: name: testmswd
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
; CHECK-NEXT: [[INTRINSIC_LLRINT:%[0-9]+]]:_(s64) = G_INTRINSIC_LLRINT [[COPY]](s64)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[INTRINSIC_LLRINT]](s64)
; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:_(s64) = COPY $d0
%1:_(s64) = G_INTRINSIC_LLRINT %0(s64)
%2:_(s32) = G_TRUNC %1(s64)
$w0 = COPY %2(s32)
RET_ReallyLR implicit $w0

...
---
name: testmsxd
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$d0' }
body: |
bb.1:
liveins: $d0

; CHECK-LABEL: name: testmsxd
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
; CHECK-NEXT: [[INTRINSIC_LLRINT:%[0-9]+]]:_(s64) = G_INTRINSIC_LLRINT [[COPY]](s64)
; CHECK-NEXT: $x0 = COPY [[INTRINSIC_LLRINT]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $d0
%1:_(s64) = G_INTRINSIC_LLRINT %0(s64)
$x0 = COPY %1(s64)
RET_ReallyLR implicit $x0

...
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@
# DEBUG-NEXT: G_INTRINSIC_LRINT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_INTRINSIC_LLRINT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_INTRINSIC_ROUNDEVEN (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AArch64/aarch64-addv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@ define i32 @oversized_ADDV_256(ptr noalias nocapture readonly %arg1, ptr noalias
; GISEL-NEXT: ushll v2.8h, v2.8b, #0
; GISEL-NEXT: usubl v3.4s, v1.4h, v2.4h
; GISEL-NEXT: usubl2 v1.4s, v1.8h, v2.8h
; GISEL-NEXT: neg v2.4s, v3.4s
; GISEL-NEXT: neg v4.4s, v1.4s
; GISEL-NEXT: cmgt v5.4s, v0.4s, v3.4s
; GISEL-NEXT: cmgt v2.4s, v0.4s, v3.4s
; GISEL-NEXT: cmgt v0.4s, v0.4s, v1.4s
; GISEL-NEXT: bif v2.16b, v3.16b, v5.16b
; GISEL-NEXT: bsl v0.16b, v4.16b, v1.16b
; GISEL-NEXT: neg v4.4s, v3.4s
; GISEL-NEXT: neg v5.4s, v1.4s
; GISEL-NEXT: bsl v2.16b, v4.16b, v3.16b
; GISEL-NEXT: bsl v0.16b, v5.16b, v1.16b
; GISEL-NEXT: add v0.4s, v2.4s, v0.4s
; GISEL-NEXT: addv s0, v0.4s
; GISEL-NEXT: fmov w0, s0
Expand Down
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
define <vscale x 2 x i16> @dupsext_v2i8_v2i16(i8 %src, <vscale x 2 x i16> %b) {
; CHECK-LABEL: dupsext_v2i8_v2i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -20,8 +20,8 @@ entry:
define <vscale x 4 x i16> @dupsext_v4i8_v4i16(i8 %src, <vscale x 4 x i16> %b) {
; CHECK-LABEL: dupsext_v4i8_v4i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
Expand All @@ -36,8 +36,8 @@ entry:
define <vscale x 8 x i16> @dupsext_v8i8_v8i16(i8 %src, <vscale x 8 x i16> %b) {
; CHECK-LABEL: dupsext_v8i8_v8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
Expand All @@ -52,8 +52,8 @@ entry:
define <vscale x 2 x i32> @dupsext_v2i8_v2i32(i8 %src, <vscale x 2 x i32> %b) {
; CHECK-LABEL: dupsext_v2i8_v2i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -68,8 +68,8 @@ entry:
define <vscale x 4 x i32> @dupsext_v4i8_v4i32(i8 %src, <vscale x 4 x i32> %b) {
; CHECK-LABEL: dupsext_v4i8_v4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
Expand All @@ -84,9 +84,9 @@ entry:
define <vscale x 2 x i64> @dupsext_v2i8_v2i64(i8 %src, <vscale x 2 x i64> %b) {
; CHECK-LABEL: dupsext_v2i8_v2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtb x8, w0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -101,8 +101,8 @@ entry:
define <vscale x 2 x i32> @dupsext_v2i16_v2i32(i16 %src, <vscale x 2 x i32> %b) {
; CHECK-LABEL: dupsext_v2i16_v2i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -117,8 +117,8 @@ entry:
define <vscale x 4 x i32> @dupsext_v4i16_v4i32(i16 %src, <vscale x 4 x i32> %b) {
; CHECK-LABEL: dupsext_v4i16_v4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
Expand All @@ -133,9 +133,9 @@ entry:
define <vscale x 2 x i64> @dupsext_v2i16_v2i64(i16 %src, <vscale x 2 x i64> %b) {
; CHECK-LABEL: dupsext_v2i16_v2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxth x8, w0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -150,9 +150,9 @@ entry:
define <vscale x 2 x i64> @dupsext_v2i32_v2i64(i32 %src, <vscale x 2 x i64> %b) {
; CHECK-LABEL: dupsext_v2i32_v2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -167,8 +167,8 @@ entry:
define <vscale x 2 x i16> @dupzext_v2i8_v2i16(i8 %src, <vscale x 2 x i16> %b) {
; CHECK-LABEL: dupzext_v2i8_v2i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -183,8 +183,8 @@ entry:
define <vscale x 4 x i16> @dupzext_v4i8_v4i16(i8 %src, <vscale x 4 x i16> %b) {
; CHECK-LABEL: dupzext_v4i8_v4i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
Expand All @@ -199,8 +199,8 @@ entry:
define <vscale x 8 x i16> @dupzext_v8i8_v8i16(i8 %src, <vscale x 8 x i16> %b) {
; CHECK-LABEL: dupzext_v8i8_v8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
Expand All @@ -215,8 +215,8 @@ entry:
define <vscale x 2 x i32> @dupzext_v2i8_v2i32(i8 %src, <vscale x 2 x i32> %b) {
; CHECK-LABEL: dupzext_v2i8_v2i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -231,8 +231,8 @@ entry:
define <vscale x 4 x i32> @dupzext_v4i8_v4i32(i8 %src, <vscale x 4 x i32> %b) {
; CHECK-LABEL: dupzext_v4i8_v4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
Expand All @@ -247,9 +247,9 @@ entry:
define <vscale x 2 x i64> @dupzext_v2i8_v2i64(i8 %src, <vscale x 2 x i64> %b) {
; CHECK-LABEL: dupzext_v2i8_v2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -264,8 +264,8 @@ entry:
define <vscale x 2 x i32> @dupzext_v2i16_v2i32(i16 %src, <vscale x 2 x i32> %b) {
; CHECK-LABEL: dupzext_v2i16_v2i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -280,8 +280,8 @@ entry:
define <vscale x 4 x i32> @dupzext_v4i16_v4i32(i16 %src, <vscale x 4 x i32> %b) {
; CHECK-LABEL: dupzext_v4i16_v4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
Expand All @@ -296,9 +296,9 @@ entry:
define <vscale x 2 x i64> @dupzext_v2i16_v2i64(i16 %src, <vscale x 2 x i64> %b) {
; CHECK-LABEL: dupzext_v2i16_v2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xffff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -313,8 +313,8 @@ entry:
define <vscale x 2 x i64> @dupzext_v2i32_v2i64(i32 %src, <vscale x 2 x i64> %b) {
; CHECK-LABEL: dupzext_v2i32_v2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/aarch64-smull.ll
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,8 @@ define <2 x i64> @smull_zext_v2i32_v2i64(ptr %A, ptr %B) nounwind {
; CHECK-SVE-LABEL: smull_zext_v2i32_v2i64:
; CHECK-SVE: // %bb.0:
; CHECK-SVE-NEXT: ldrh w8, [x0]
; CHECK-SVE-NEXT: ptrue p0.d, vl2
; CHECK-SVE-NEXT: ldrh w9, [x0, #2]
; CHECK-SVE-NEXT: ptrue p0.d, vl2
; CHECK-SVE-NEXT: ldr d0, [x1]
; CHECK-SVE-NEXT: fmov d1, x8
; CHECK-SVE-NEXT: sshll v0.2d, v0.2s, #0
Expand Down
Loading