8 changes: 4 additions & 4 deletions clang/test/CodeGen/ext-int-cc.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
// RUN: %clang_cc1 -no-enable-noundef-analysis -triple systemz -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=SYSTEMZ
// RUN: %clang_cc1 -no-enable-noundef-analysis -triple ppc64 -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=PPC64
// RUN: %clang_cc1 -no-enable-noundef-analysis -triple ppc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=PPC32
// RUN: %clang_cc1 -no-enable-noundef-analysis -triple aarch64 -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=AARCH64
// RUN: %clang_cc1 -no-enable-noundef-analysis -triple aarch64 -O3 -disable-llvm-passes -fexperimental-max-bitint-width=1024 -emit-llvm -o - %s | FileCheck %s --check-prefixes=AARCH64
// RUN: %clang_cc1 -no-enable-noundef-analysis -triple aarch64 -target-abi darwinpcs -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=AARCH64DARWIN
// RUN: %clang_cc1 -no-enable-noundef-analysis -triple arm64_32-apple-ios -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=AARCH64
// RUN: %clang_cc1 -no-enable-noundef-analysis -triple arm64_32-apple-ios -O3 -disable-llvm-passes -fexperimental-max-bitint-width=1024 -emit-llvm -o - %s | FileCheck %s --check-prefixes=AARCH64
// RUN: %clang_cc1 -no-enable-noundef-analysis -triple arm64_32-apple-ios -target-abi darwinpcs -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=AARCH64DARWIN
// RUN: %clang_cc1 -no-enable-noundef-analysis -triple arm -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ARM
// RUN: %clang_cc1 -no-enable-noundef-analysis -triple loongarch64 -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=LA64
Expand Down Expand Up @@ -135,6 +135,7 @@ void ParamPassing4(_BitInt(129) a) {}
// WIN64: define dso_local void @ParamPassing4(ptr %{{.+}})
// LIN32: define{{.*}} void @ParamPassing4(ptr %{{.+}})
// WIN32: define dso_local void @ParamPassing4(ptr %{{.+}})
// AARCH64: define{{.*}} void @ParamPassing4(ptr %{{.+}})
// NACL-NOT: define{{.*}} void @ParamPassing4(ptr byval(i129) align 8 %{{.+}})
// NVPTX64-NOT: define{{.*}} void @ParamPassing4(ptr byval(i129) align 8 %{{.+}})
// NVPTX-NOT: define{{.*}} void @ParamPassing4(ptr byval(i129) align 8 %{{.+}})
Expand All @@ -155,7 +156,6 @@ void ParamPassing4(_BitInt(129) a) {}
// SYSTEMZ-NOT: define{{.*}} void @ParamPassing4(ptr %{{.+}})
// PPC64-NOT: define{{.*}} void @ParamPassing4(ptr byval(i129) align 8 %{{.+}})
// PPC32-NOT: define{{.*}} void @ParamPassing4(ptr byval(i129) align 8 %{{.+}})
// AARCH64-NOT: define{{.*}} void @ParamPassing4(ptr byval(i129) align 8 %{{.+}})
// AARCH64DARWIN-NOT: define{{.*}} void @ParamPassing4(ptr byval(i129) align 8 %{{.+}})
// ARM-NOT: define{{.*}} arm_aapcscc void @ParamPassing4(ptr byval(i129) align 8 %{{.+}})
// LA64-NOT: define{{.*}} void @ParamPassing4(ptr %{{.+}})
Expand Down Expand Up @@ -294,6 +294,7 @@ _BitInt(129) ReturnPassing5(void){}
// WIN64: define dso_local void @ReturnPassing5(ptr dead_on_unwind noalias writable sret
// LIN32: define{{.*}} void @ReturnPassing5(ptr dead_on_unwind noalias writable sret
// WIN32: define dso_local void @ReturnPassing5(ptr dead_on_unwind noalias writable sret
// AARCH64: define{{.*}} void @ReturnPassing5(ptr dead_on_unwind noalias writable sret
// NACL-NOT: define{{.*}} void @ReturnPassing5(ptr dead_on_unwind noalias writable sret
// NVPTX64-NOT: define{{.*}} i129 @ReturnPassing5(
// NVPTX-NOT: define{{.*}} i129 @ReturnPassing5(
Expand All @@ -314,7 +315,6 @@ _BitInt(129) ReturnPassing5(void){}
// SYSTEMZ-NOT: define{{.*}} void @ReturnPassing5(ptr dead_on_unwind noalias writable sret
// PPC64-NOT: define{{.*}} void @ReturnPassing5(ptr dead_on_unwind noalias writable sret
// PPC32-NOT: define{{.*}} void @ReturnPassing5(ptr dead_on_unwind noalias writable sret
// AARCH64-NOT: define{{.*}} void @ReturnPassing5(ptr dead_on_unwind noalias writable sret
// AARCH64DARWIN-NOT: define{{.*}} void @ReturnPassing5(ptr dead_on_unwind noalias writable sret
// ARM-NOT: define{{.*}} arm_aapcscc void @ReturnPassing5(ptr dead_on_unwind noalias writable sret
// LA64-NOT: define{{.*}} void @ReturnPassing5(ptr dead_on_unwind noalias writable sret
Expand Down
6 changes: 5 additions & 1 deletion flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@ static bool isDummyArgument(mlir::Value v) {
if (!blockArg)
return false;

return blockArg.getOwner()->isEntryBlock();
mlir::Block *owner = blockArg.getOwner();
if (!owner->isEntryBlock() ||
!mlir::isa<mlir::FunctionOpInterface>(owner->getParentOp()))
return false;
return true;
}

/// Temporary function to skip through all the no op operations
Expand Down
1 change: 1 addition & 0 deletions flang/lib/Optimizer/Transforms/AddAliasTags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ static std::string getFuncArgName(mlir::Value arg) {
"arg is a function argument");
mlir::FunctionOpInterface func = mlir::dyn_cast<mlir::FunctionOpInterface>(
blockArg.getOwner()->getParentOp());
assert(func && "This is not a function argument");
mlir::StringAttr attr = func.getArgAttrOfType<mlir::StringAttr>(
blockArg.getArgNumber(), "fir.bindc_name");
if (!attr)
Expand Down
37 changes: 37 additions & 0 deletions flang/test/Transforms/tbaa.fir
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,40 @@
// CHECK: fir.store %[[VAL_8]] to %[[VAL_12]] : !fir.ref<i32>
// CHECK: return
// CHECK: }

// -----

// Make sure we don't mistake other block arguments as dummy arguments:

omp.declare_reduction @add_reduction_i32 : i32 init {
^bb0(%arg0: i32):
%c0_i32 = arith.constant 0 : i32
omp.yield(%c0_i32 : i32)
} combiner {
^bb0(%arg0: i32, %arg1: i32):
%0 = arith.addi %arg0, %arg1 : i32
omp.yield(%0 : i32)
}

func.func @_QQmain() attributes {fir.bindc_name = "reduce"} {
%c10_i32 = arith.constant 10 : i32
%c6_i32 = arith.constant 6 : i32
%c-1_i32 = arith.constant -1 : i32
%0 = fir.address_of(@_QFEi) : !fir.ref<i32>
%1 = fir.declare %0 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
omp.parallel reduction(@add_reduction_i32 %1 -> %arg0 : !fir.ref<i32>) {
// CHECK: omp.parallel reduction({{.*}}) {
%8 = fir.declare %arg0 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
// CHECK-NEXT: %[[DECL:.*]] = fir.declare
fir.store %c-1_i32 to %8 : !fir.ref<i32>
// CHECK-NOT: fir.store %{{.*}} to %[[DECL]] {tbaa = %{{.*}}} : !fir.ref<i32>
// CHECK: fir.store %{{.*}} to %[[DECL]] : !fir.ref<i32>
omp.terminator
}
return
}

fir.global internal @_QFEi : i32 {
%c0_i32 = arith.constant 0 : i32
fir.has_value %c0_i32 : i32
}
7 changes: 6 additions & 1 deletion lldb/source/Target/UnwindLLDB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,12 @@ UnwindLLDB::CursorSP UnwindLLDB::GetOneMoreFrame(ABI *abi) {
cur_idx < 100 ? cur_idx : 100, "", cur_idx);
return nullptr;
}
if (abi && !abi->CodeAddressIsValid(cursor_sp->start_pc)) {

// Invalid code addresses should not appear on the stack *unless* we're
// directly below a trap handler frame (in this case, the invalid address is
// likely the cause of the trap).
if (abi && !abi->CodeAddressIsValid(cursor_sp->start_pc) &&
!prev_frame->reg_ctx_lldb_sp->IsTrapHandlerFrame()) {
// If the RegisterContextUnwind has a fallback UnwindPlan, it will switch to
// that and return true. Subsequent calls to TryFallbackUnwindPlan() will
// return false.
Expand Down
21 changes: 21 additions & 0 deletions lldb/test/Shell/Unwind/Inputs/unaligned-pc-sigbus.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include <signal.h>
#include <stdint.h>
#include <unistd.h>

void sigbus_handler(int signo) { _exit(47); }

int target_function() { return 47; }

int main() {
signal(SIGBUS, sigbus_handler);

// Generate a SIGBUS by deliverately calling through an unaligned function
// pointer.
union {
int (*t)();
uintptr_t p;
} u;
u.t = target_function;
u.p |= 1;
return u.t();
}
31 changes: 31 additions & 0 deletions lldb/test/Shell/Unwind/unaligned-pc-sigbus.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# REQUIRES: target-aarch64 && native
# UNSUPPORTED: system-windows
# llvm.org/pr91610, rdar://128031075
# XFAIL: system-darwin

# RUN: %clang_host %S/Inputs/unaligned-pc-sigbus.c -o %t
# RUN: %lldb -s %s -o exit %t | FileCheck %s

# Convert EXC_BAD_ACCESS into SIGBUS on darwin.
settings set platform.plugin.darwin.ignored-exceptions EXC_BAD_ACCESS

breakpoint set -n sigbus_handler
# CHECK: Breakpoint 1: where = {{.*}}`sigbus_handler

run
# CHECK: thread #1, {{.*}} stop reason = signal SIGBUS

thread backtrace
# CHECK: (lldb) thread backtrace
# CHECK: frame #0: [[TARGET:0x[0-9a-fA-F]*]] {{.*}}`target_function

continue
# CHECK: thread #1, {{.*}} stop reason = breakpoint 1


thread backtrace
# CHECK: (lldb) thread backtrace
# CHECK: frame #0: {{.*}}`sigbus_handler
# Unknown number of signal trampoline frames
# CHECK: frame #{{[0-9]+}}: [[TARGET]] {{.*}}`target_function

71 changes: 32 additions & 39 deletions llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1161,52 +1161,45 @@ void AArch64AsmPrinter::emitFunctionEntryLabel() {
TS->emitDirectiveVariantPCS(CurrentFnSym);
}

AsmPrinter::emitFunctionEntryLabel();

if (TM.getTargetTriple().isWindowsArm64EC() &&
!MF->getFunction().hasLocalLinkage()) {
// For ARM64EC targets, a function definition's name is mangled differently
// from the normal symbol. We emit the alias from the unmangled symbol to
// mangled symbol name here.
if (MDNode *Unmangled =
MF->getFunction().getMetadata("arm64ec_unmangled_name")) {
AsmPrinter::emitFunctionEntryLabel();

if (MDNode *ECMangled =
MF->getFunction().getMetadata("arm64ec_ecmangled_name")) {
StringRef UnmangledStr =
cast<MDString>(Unmangled->getOperand(0))->getString();
MCSymbol *UnmangledSym =
MMI->getContext().getOrCreateSymbol(UnmangledStr);
StringRef ECMangledStr =
cast<MDString>(ECMangled->getOperand(0))->getString();
MCSymbol *ECMangledSym =
MMI->getContext().getOrCreateSymbol(ECMangledStr);
OutStreamer->emitSymbolAttribute(UnmangledSym, MCSA_WeakAntiDep);
OutStreamer->emitAssignment(
UnmangledSym,
MCSymbolRefExpr::create(ECMangledSym, MCSymbolRefExpr::VK_WEAKREF,
MMI->getContext()));
OutStreamer->emitSymbolAttribute(ECMangledSym, MCSA_WeakAntiDep);
OutStreamer->emitAssignment(
ECMangledSym,
MCSymbolRefExpr::create(CurrentFnSym, MCSymbolRefExpr::VK_WEAKREF,
MMI->getContext()));
return;
// from the normal symbol, emit required aliases here.
auto emitFunctionAlias = [&](MCSymbol *Src, MCSymbol *Dst) {
OutStreamer->emitSymbolAttribute(Src, MCSA_WeakAntiDep);
OutStreamer->emitAssignment(
Src, MCSymbolRefExpr::create(Dst, MCSymbolRefExpr::VK_WEAKREF,
MMI->getContext()));
};

auto getSymbolFromMetadata = [&](StringRef Name) {
MCSymbol *Sym = nullptr;
if (MDNode *Node = MF->getFunction().getMetadata(Name)) {
StringRef NameStr = cast<MDString>(Node->getOperand(0))->getString();
Sym = MMI->getContext().getOrCreateSymbol(NameStr);
}
return Sym;
};

if (MCSymbol *UnmangledSym =
getSymbolFromMetadata("arm64ec_unmangled_name")) {
MCSymbol *ECMangledSym = getSymbolFromMetadata("arm64ec_ecmangled_name");

if (ECMangledSym) {
// An external function, emit the alias from the unmangled symbol to
// mangled symbol name and the alias from the mangled symbol to guest
// exit thunk.
emitFunctionAlias(UnmangledSym, ECMangledSym);
emitFunctionAlias(ECMangledSym, CurrentFnSym);
} else {
StringRef UnmangledStr =
cast<MDString>(Unmangled->getOperand(0))->getString();
MCSymbol *UnmangledSym =
MMI->getContext().getOrCreateSymbol(UnmangledStr);
OutStreamer->emitSymbolAttribute(UnmangledSym, MCSA_WeakAntiDep);
OutStreamer->emitAssignment(
UnmangledSym,
MCSymbolRefExpr::create(CurrentFnSym, MCSymbolRefExpr::VK_WEAKREF,
MMI->getContext()));
return;
// A function implementation, emit the alias from the unmangled symbol
// to mangled symbol name.
emitFunctionAlias(UnmangledSym, CurrentFnSym);
}
}
}

return AsmPrinter::emitFunctionEntryLabel();
}

/// Small jump tables contain an unsigned byte or half, representing the offset
Expand Down
35 changes: 18 additions & 17 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1506,24 +1506,25 @@ void VPBlendRecipe::execute(VPTransformState &State) {
// Note that Mask0 is never used: lanes for which no path reaches this phi and
// are essentially undef are taken from In0.
VectorParts Entry(State.UF);
for (unsigned In = 0; In < NumIncoming; ++In) {
for (unsigned Part = 0; Part < State.UF; ++Part) {
// We might have single edge PHIs (blocks) - use an identity
// 'select' for the first PHI operand.
Value *In0 = State.get(getIncomingValue(In), Part);
if (In == 0)
Entry[Part] = In0; // Initialize with the first incoming value.
else {
// Select between the current value and the previous incoming edge
// based on the incoming mask.
Value *Cond = State.get(getMask(In), Part);
Entry[Part] =
State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
}
}
}
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
for (unsigned In = 0; In < NumIncoming; ++In) {
for (unsigned Part = 0; Part < State.UF; ++Part) {
// We might have single edge PHIs (blocks) - use an identity
// 'select' for the first PHI operand.
Value *In0 = State.get(getIncomingValue(In), Part, OnlyFirstLaneUsed);
if (In == 0)
Entry[Part] = In0; // Initialize with the first incoming value.
else {
// Select between the current value and the previous incoming edge
// based on the incoming mask.
Value *Cond = State.get(getMask(In), Part, OnlyFirstLaneUsed);
Entry[Part] =
State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
}
}
}
for (unsigned Part = 0; Part < State.UF; ++Part)
State.set(this, Entry[Part], Part);
State.set(this, Entry[Part], Part, OnlyFirstLaneUsed);
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = icmp ule <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp ule <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i1> [[TMP14]], <vscale x 8 x i1> zeroinitializer
Expand All @@ -52,9 +50,9 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64
; CHECK-NEXT: [[TMP20:%.*]] = xor <vscale x 8 x i1> [[TMP14]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP21:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i1> [[TMP20]], <vscale x 8 x i1> zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = or <vscale x 8 x i1> [[TMP19]], [[TMP21]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 8 x i1> [[TMP19]], <vscale x 8 x i64> [[BROADCAST_SPLAT6]], <vscale x 8 x i64> shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 poison, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <vscale x 8 x i64> [[PREDPHI]], i32 0
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[TMP23]]
; CHECK-NEXT: [[EXT:%.+]] = extractelement <vscale x 8 x i1> [[TMP19]], i32 0
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[EXT]], i64 [[TMP12]], i64 poison
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]]
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[TMP24]], i32 0
; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> zeroinitializer, ptr [[TMP25]], i32 2, <vscale x 8 x i1> [[TMP22]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
Expand Down
20 changes: 4 additions & 16 deletions llvm/test/Transforms/LoopVectorize/uniform-blend.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,12 @@

define void @blend_uniform_iv_trunc(i1 %c) {
; CHECK-LABEL: @blend_uniform_iv_trunc(
; CHECK: vector.ph:
; CHECK-NEXT: [[MASK0:%.*]] = insertelement <4 x i1> poison, i1 %c, i64 0
; CHECK-NEXT: [[MASK1:%.*]] = shufflevector <4 x i1> [[MASK0]], <4 x i1> poison, <4 x i32> zeroinitializer

; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP1]], 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[MASK1]], <4 x i16> [[BROADCAST_SPLAT2]], <4 x i16> undef
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i16> [[PREDPHI]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i16 [[TMP4]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 %c, i16 [[TMP2]], i16 undef
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i16 [[PREDPHI]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP6]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
Expand Down Expand Up @@ -49,17 +42,12 @@ exit: ; preds = %loop.latch
define void @blend_uniform_iv(i1 %c) {
; CHECK-LABEL: @blend_uniform_iv(
; CHECK: vector.ph:
; CHECK-NEXT: [[MASK0:%.*]] = insertelement <4 x i1> poison, i1 %c, i64 0
; CHECK-NEXT: [[MASK1:%.*]] = shufflevector <4 x i1> [[MASK0]], <4 x i1> poison, <4 x i32> zeroinitializer

; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[MASK1]], <4 x i64> [[BROADCAST_SPLAT2]], <4 x i64> undef
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP2]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 %c, i64 [[TMP0]], i64 undef
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[PREDPHI]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 0
; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP4]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ define void @test_not_first_lane_only_constant(ptr %A, ptr noalias %B) {
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[TMP0]]
; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr %B, align 2
; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[B]], align 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i16> poison, i16 [[TMP13]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT5]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
Expand Down Expand Up @@ -86,8 +86,6 @@ define void @test_not_first_lane_only_wide_compare(ptr %A, ptr noalias %B, i16 %
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x ptr> poison, ptr [[B]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT3]], <4 x ptr> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -97,8 +95,8 @@ define void @test_not_first_lane_only_wide_compare(ptr %A, ptr noalias %B, i16 %
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x ptr> poison, <4 x ptr> [[BROADCAST_SPLAT4]]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[PREDPHI]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP4]], ptr poison, ptr [[B]]
; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i16> poison, i16 [[TMP13]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT5]], <4 x i16> poison, <4 x i32> zeroinitializer
Expand Down Expand Up @@ -162,6 +160,101 @@ loop.latch:
%c.2 = icmp eq i16 %iv.next, 1000
br i1 %c.2, label %exit, label %loop.header

exit:
ret void
}

define void @test_not_first_lane_only_wide_compare_incoming_order_swapped(ptr %A, ptr noalias %B, i16 %x, i16 %y) {
; CHECK-LABEL: define void @test_not_first_lane_only_wide_compare_incoming_order_swapped(
; CHECK-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[Y]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP11]], ptr [[B]], ptr poison
; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[PREDPHI]], align 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP12]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT3]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: store <4 x i16> [[BROADCAST_SPLAT4]], ptr [[TMP2]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[IV]]
; CHECK-NEXT: [[L_0:%.*]] = load i16, ptr [[GEP_A]], align 2
; CHECK-NEXT: [[C_0:%.*]] = icmp ult i16 [[L_0]], [[X]]
; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[ELSE_1:%.*]]
; CHECK: else.1:
; CHECK-NEXT: [[C_1:%.*]] = icmp ult i16 [[L_0]], [[Y]]
; CHECK-NEXT: br i1 [[C_1]], label [[THEN_2:%.*]], label [[ELSE_2:%.*]]
; CHECK: then.2:
; CHECK-NEXT: br label [[ELSE_2]]
; CHECK: else.2:
; CHECK-NEXT: br label [[LOOP_LATCH]]
; CHECK: loop.latch:
; CHECK-NEXT: [[MERGE:%.*]] = phi ptr [ poison, [[LOOP_HEADER]] ], [ [[B]], [[ELSE_2]] ]
; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[MERGE]], align 2
; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; CHECK-NEXT: store i16 [[L]], ptr [[GEP_A]], align 2
; CHECK-NEXT: [[C_2:%.*]] = icmp eq i16 [[IV_NEXT]], 1000
; CHECK-NEXT: br i1 [[C_2]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop.header

loop.header:
%iv = phi i16 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.A = getelementptr inbounds i16, ptr %A, i16 %iv
%l.0 = load i16, ptr %gep.A
%c.0 = icmp ult i16 %l.0, %x
br i1 %c.0, label %loop.latch, label %else.1

else.1:
%c.1 = icmp ult i16 %l.0, %y
br i1 %c.1, label %then.2, label %else.2

then.2:
br label %else.2

else.2:
br label %loop.latch

loop.latch:
%merge = phi ptr [ poison, %loop.header ], [ %B, %else.2 ]
%l = load i16, ptr %merge, align 2
%iv.next = add i16 %iv, 1
store i16 %l, ptr %gep.A
%c.2 = icmp eq i16 %iv.next, 1000
br i1 %c.2, label %exit, label %loop.header

exit:
ret void
}
Expand All @@ -172,4 +265,6 @@ exit:
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
;.