Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""
Make sure that the concurrent vfork() from multiple threads works correctly.
"""

import lldb
import lldbsuite.test.lldbutil as lldbutil
from lldbsuite.test.lldbtest import *
from lldbsuite.test.decorators import *


class TestConcurrentVFork(TestBase):
NO_DEBUG_INFO_TESTCASE = True

def build_run_to_breakpoint(self, use_fork, call_exec):
self.build()

args = []
if use_fork:
args.append("--fork")
if call_exec:
args.append("--exec")
launch_info = lldb.SBLaunchInfo(args)
launch_info.SetWorkingDirectory(self.getBuildDir())

return lldbutil.run_to_source_breakpoint(
self, "// break here", lldb.SBFileSpec("main.cpp")
)

def follow_parent_helper(self, use_fork, call_exec):
(target, process, thread, bkpt) = self.build_run_to_breakpoint(
use_fork, call_exec
)

parent_pid = target.FindFirstGlobalVariable("g_pid").GetValueAsUnsigned()
self.runCmd("settings set target.process.follow-fork-mode parent")
self.runCmd("settings set target.process.stop-on-exec False", check=False)
self.expect(
"continue", substrs=[f"Process {parent_pid} exited with status = 0"]
)

def follow_child_helper(self, use_fork, call_exec):
self.build_run_to_breakpoint(use_fork, call_exec)

self.runCmd("settings set target.process.follow-fork-mode child")
self.runCmd("settings set target.process.stop-on-exec False", check=False)
# Child process exits with code "index + 10" since index is [0-4]
# so the exit code should be 1[0-4]
self.expect("continue", patterns=[r"exited with status = 1[0-4]"])

@skipUnlessPlatform(["linux"])
def test_follow_parent_vfork_no_exec(self):
"""
Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent.
And follow-parent successfully detach all child processes and exit debugger without calling exec.
"""
self.follow_parent_helper(use_fork=False, call_exec=False)

@skipUnlessPlatform(["linux"])
def test_follow_parent_fork_no_exec(self):
"""
Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-parent.
And follow-parent successfully detach all child processes and exit debugger without calling exec
"""
self.follow_parent_helper(use_fork=True, call_exec=False)

@skipUnlessPlatform(["linux"])
def test_follow_parent_vfork_call_exec(self):
"""
Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent.
And follow-parent successfully detach all child processes and exit debugger after calling exec.
"""
self.follow_parent_helper(use_fork=False, call_exec=True)

@skipUnlessPlatform(["linux"])
def test_follow_parent_fork_call_exec(self):
"""
Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent.
And follow-parent successfully detach all child processes and exit debugger after calling exec.
"""
self.follow_parent_helper(use_fork=True, call_exec=True)

@skipUnlessPlatform(["linux"])
def test_follow_child_vfork_no_exec(self):
"""
Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-child.
And follow-child successfully detach parent process and exit child process with correct exit code without calling exec.
"""
self.follow_child_helper(use_fork=False, call_exec=False)

@skipUnlessPlatform(["linux"])
def test_follow_child_fork_no_exec(self):
"""
Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-child.
And follow-child successfully detach parent process and exit child process with correct exit code without calling exec.
"""
self.follow_child_helper(use_fork=True, call_exec=False)

@skipUnlessPlatform(["linux"])
def test_follow_child_vfork_call_exec(self):
"""
Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-child.
And follow-child successfully detach parent process and exit child process with correct exit code after calling exec.
"""
self.follow_child_helper(use_fork=False, call_exec=True)

@skipUnlessPlatform(["linux"])
def test_follow_child_fork_call_exec(self):
"""
Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-child.
And follow-child successfully detach parent process and exit child process with correct exit code after calling exec.
"""
self.follow_child_helper(use_fork=True, call_exec=True)
105 changes: 105 additions & 0 deletions lldb/test/API/functionalities/fork/concurrent_vfork/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#include <assert.h>
#include <iostream>
#include <mutex>
#include <sys/wait.h>
#include <thread>
#include <unistd.h>
#include <vector>

pid_t g_pid = 0;
std::mutex g_child_pids_mutex;
std::vector<pid_t> g_child_pids;

const char *g_program = nullptr;
bool g_use_vfork = true; // Use vfork by default.
bool g_call_exec = false; // Does not call exec by default.

int call_vfork(int index) {
pid_t child_pid = 0;
if (g_use_vfork) {
child_pid = vfork();
} else {
child_pid = fork();
}

if (child_pid == -1) {
// Error handling
perror("vfork");
return 1;
} else if (child_pid == 0) {
// This code is executed by the child process
g_pid = getpid();
printf("Child process: %d\n", g_pid);

if (g_call_exec) {
std::string child_exit_code = std::to_string(index + 10);
execl(g_program, g_program, "--child", child_exit_code.c_str(), NULL);
} else {
_exit(index + 10);
}
} else {
// This code is executed by the parent process
printf("[Parent] Forked process id: %d\n", child_pid);
}
return 0;
}

void wait_all_children_to_exit() {
std::lock_guard<std::mutex> Lock(g_child_pids_mutex);
for (pid_t child_pid : g_child_pids) {
int child_status = 0;
pid_t pid = waitpid(child_pid, &child_status, 0);
if (child_status != 0) {
int exit_code = WEXITSTATUS(child_status);
if (exit_code > 15 || exit_code < 10) {
printf("Error: child process exits with unexpected code %d\n",
exit_code);
_exit(1); // This will let our program know that some child processes
// didn't exist with an expected exit status.
}
}
if (pid != child_pid)
_exit(2); // This will let our program know it didn't succeed
}
}

void create_threads(int num_threads) {
std::vector<std::thread> threads;
for (int i = 0; i < num_threads; ++i) {
threads.emplace_back(std::thread(call_vfork, i));
}
printf("Created %d threads, joining...\n",
num_threads); // end_of_create_threads
for (auto &thread : threads) {
thread.join();
}
wait_all_children_to_exit();
}

// Can be called in various ways:
// 1. [program]: use vfork and not call exec
// 2. [program] --fork: use fork and not call exec
// 3. [program] --fork --exec: use fork and call exec
// 4. [program] --exec: use vfork and call exec
// 5. [program] --child [exit_code]: child process
int main(int argc, char *argv[]) {
g_pid = getpid();
g_program = argv[0];

for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "--child") == 0) {
assert(i + 1 < argc);
int child_exit_code = std::stoi(argv[i + 1]);
printf("Child process: %d, exiting with code %d\n", g_pid,
child_exit_code);
_exit(child_exit_code);
} else if (strcmp(argv[i], "--fork") == 0)
g_use_vfork = false;
else if (strcmp(argv[i], "--exec") == 0)
g_call_exec = true;
}

int num_threads = 5; // break here
create_threads(num_threads);
return 0;
}
3 changes: 3 additions & 0 deletions lldb/test/API/python_api/process/address-masks/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
C_SOURCES := main.c

include Makefile.rules
134 changes: 134 additions & 0 deletions lldb/test/API/python_api/process/address-masks/TestAddressMasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""Test Python APIs for setting, getting, and using address masks."""

import os
import lldb
from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
from lldbsuite.test import lldbutil


class AddressMasksTestCase(TestBase):
NO_DEBUG_INFO_TESTCASE = True

def reset_all_masks(self, process):
process.SetAddressMask(
lldb.eAddressMaskTypeAll,
lldb.LLDB_INVALID_ADDRESS_MASK,
lldb.eAddressMaskRangeAll,
)
self.runCmd("settings set target.process.virtual-addressable-bits 0")
self.runCmd("settings set target.process.highmem-virtual-addressable-bits 0")

@skipIf(archs=["arm"]) # 32-bit arm ABI hardcodes Code mask, is 32-bit
def test_address_masks(self):
self.build()
(target, process, t, bp) = lldbutil.run_to_source_breakpoint(
self, "break here", lldb.SBFileSpec("main.c")
)

process.SetAddressableBits(lldb.eAddressMaskTypeAll, 42)
self.assertEqual(0x0000029500003F94, process.FixAddress(0x00265E9500003F94))
self.reset_all_masks(process)

# ~((1ULL<<42)-1) == 0xfffffc0000000000
process.SetAddressMask(lldb.eAddressMaskTypeAll, 0xFFFFFC0000000000)
self.assertEqual(0x0000029500003F94, process.FixAddress(0x00265E9500003F94))
self.reset_all_masks(process)

# Check that all bits can pass through unmodified
process.SetAddressableBits(lldb.eAddressMaskTypeAll, 64)
self.assertEqual(0x00265E9500003F94, process.FixAddress(0x00265E9500003F94))
self.reset_all_masks(process)

process.SetAddressableBits(
lldb.eAddressMaskTypeAll, 42, lldb.eAddressMaskRangeAll
)
self.assertEqual(0x000002950001F694, process.FixAddress(0x00265E950001F694))
self.assertEqual(0xFFFFFE950000F694, process.FixAddress(0xFFA65E950000F694))
self.reset_all_masks(process)

# Set a eAddressMaskTypeCode which has the low 3 bits marked as non-address
# bits, confirm that they're cleared by FixAddress.
process.SetAddressableBits(
lldb.eAddressMaskTypeAll, 42, lldb.eAddressMaskRangeAll
)
mask = process.GetAddressMask(lldb.eAddressMaskTypeAny)
process.SetAddressMask(lldb.eAddressMaskTypeCode, mask | 0x3)
self.assertEqual(0x000002950001F697, process.FixAddress(0x00265E950001F697))
self.assertEqual(0xFFFFFE950000F697, process.FixAddress(0xFFA65E950000F697))
self.assertEqual(
0x000002950001F697,
process.FixAddress(0x00265E950001F697, lldb.eAddressMaskTypeData),
)
self.assertEqual(
0x000002950001F694,
process.FixAddress(0x00265E950001F697, lldb.eAddressMaskTypeCode),
)
self.reset_all_masks(process)

# The user can override whatever settings the Process thinks should be used.
process.SetAddressableBits(
lldb.eAddressMaskTypeAll, 42, lldb.eAddressMaskRangeLow
)
self.runCmd("settings set target.process.virtual-addressable-bits 15")
self.assertEqual(0x0000000000007694, process.FixAddress(0x00265E950001F694))
self.assertEqual(0xFFFFFFFFFFFFF694, process.FixAddress(0xFFA65E950000F694))
self.runCmd("settings set target.process.virtual-addressable-bits 0")
self.assertEqual(0x000002950001F694, process.FixAddress(0x00265E950001F694))
self.reset_all_masks(process)

# AArch64 can have different address masks for high and low memory, when different
# page tables are set up.
@skipIf(archs=no_match(["arm64", "arm64e", "aarch64"]))
@skipIf(archs=["arm"]) # 32-bit arm ABI hardcodes Code mask, is 32-bit
def test_address_masks_target_supports_highmem_tests(self):
self.build()
(target, process, t, bp) = lldbutil.run_to_source_breakpoint(
self, "break here", lldb.SBFileSpec("main.c")
)

process.SetAddressableBits(
lldb.eAddressMaskTypeAll, 42, lldb.eAddressMaskRangeLow
)
process.SetAddressableBits(
lldb.eAddressMaskTypeAll, 15, lldb.eAddressMaskRangeHigh
)
self.assertEqual(0x000002950001F694, process.FixAddress(0x00265E950001F694))
self.assertEqual(0xFFFFFFFFFFFFF694, process.FixAddress(0xFFA65E950000F694))
self.reset_all_masks(process)

# The user can override whatever settings the Process thinks should be used.
process.SetAddressableBits(
lldb.eAddressMaskTypeAll, 42, lldb.eAddressMaskRangeAll
)
self.runCmd("settings set target.process.virtual-addressable-bits 15")
self.runCmd("settings set target.process.highmem-virtual-addressable-bits 15")
self.assertEqual(0x0000000000007694, process.FixAddress(0x00265E950001F694))
self.assertEqual(0xFFFFFFFFFFFFF694, process.FixAddress(0xFFA65E950000F694))
self.runCmd("settings set target.process.virtual-addressable-bits 0")
self.runCmd("settings set target.process.highmem-virtual-addressable-bits 0")
self.assertEqual(0x000002950001F694, process.FixAddress(0x00265E950001F694))
self.reset_all_masks(process)

# On most targets where we have a single mask for all address range, confirm
# that the high memory masks are ignored.
@skipIf(archs=["arm64", "arm64e", "aarch64"])
@skipIf(archs=["arm"]) # 32-bit arm ABI hardcodes Code mask, is 32-bit
def test_address_masks_target_no_highmem(self):
self.build()
(target, process, t, bp) = lldbutil.run_to_source_breakpoint(
self, "break here", lldb.SBFileSpec("main.c")
)

process.SetAddressableBits(
lldb.eAddressMaskTypeAll, 42, lldb.eAddressMaskRangeLow
)
process.SetAddressableBits(
lldb.eAddressMaskTypeAll, 15, lldb.eAddressMaskRangeHigh
)
self.assertEqual(0x000002950001F694, process.FixAddress(0x00265E950001F694))
self.assertEqual(0xFFFFFE950000F694, process.FixAddress(0xFFA65E950000F694))
self.runCmd("settings set target.process.virtual-addressable-bits 15")
self.runCmd("settings set target.process.highmem-virtual-addressable-bits 42")
self.assertEqual(0x0000000000007694, process.FixAddress(0x00265E950001F694))
self.assertEqual(0xFFFFFFFFFFFFF694, process.FixAddress(0xFFA65E950000F694))
5 changes: 5 additions & 0 deletions lldb/test/API/python_api/process/address-masks/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include <stdio.h>

int main(int argc, char const *argv[]) {
puts("Hello address masking world"); // break here
}
14 changes: 14 additions & 0 deletions lldb/test/Shell/SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,20 @@
// RUN: -o "target variable a" \
// RUN: -b %t | FileCheck %s

// Now move the .debug and .dwp file into another directory so that we can use
// the target.debug-file-search-paths setting to search for the files.
// RUN: mkdir -p %t-debug-info-dir
// RUN: mv %t.dwp %t-debug-info-dir
// RUN: mv %t.debug %t-debug-info-dir
// RUN: %lldb \
// RUN: -O "log enable dwarf split" \
// RUN: -O "setting set target.debug-file-search-paths '%t-debug-info-dir'" \
// RUN: -o "target variable a" \
// RUN: -b %t | FileCheck %s
// RUN:

// Now move the .debug and .dwp file into another directory so that we can use
// the target.debug-file-search-paths setting to search for the files.
// CHECK: Searching for DWP using:
// CHECK: Found DWP file:
// CHECK: (A) a = (x = 47)
Expand Down
19 changes: 19 additions & 0 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1534,6 +1534,25 @@ The AMDGPU backend supports the following calling conventions:

=============================== ==========================================================

AMDGPU MCExpr
-------------

As part of the AMDGPU MC layer, AMDGPU provides the following target specific
``MCExpr``\s.

.. table:: AMDGPU MCExpr types:
:name: amdgpu-mcexpr-table

=================== ================= ========================================================
MCExpr Operands Return value
=================== ================= ========================================================
``max(arg, ...)`` 1 or more Variadic signed operation that returns the maximum
value of all its arguments.

``or(arg, ...)`` 1 or more Variadic signed operation that returns the bitwise-or
result of all its arguments.

=================== ================= ========================================================

.. _amdgpu-elf-code-object:

Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/CodeGen/TargetInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1446,7 +1446,7 @@ class TargetInstrInfo : public MCInstrInfo {
/// abstraction that supports negative offsets.
virtual bool getMemOperandsWithOffsetWidth(
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const {
return false;
}
Expand Down
93 changes: 60 additions & 33 deletions llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10577,6 +10577,25 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) {
return false;
}

static bool MatchBinarySub(const SCEV *S, const SCEV *&LHS, const SCEV *&RHS) {
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S);
if (!Add || Add->getNumOperands() != 2)
return false;
if (auto *ME = dyn_cast<SCEVMulExpr>(Add->getOperand(0));
ME && ME->getNumOperands() == 2 && ME->getOperand(0)->isAllOnesValue()) {
LHS = Add->getOperand(1);
RHS = ME->getOperand(1);
return true;
}
if (auto *ME = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
ME && ME->getNumOperands() == 2 && ME->getOperand(0)->isAllOnesValue()) {
LHS = Add->getOperand(0);
RHS = ME->getOperand(1);
return true;
}
return false;
}

bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
const SCEV *&LHS, const SCEV *&RHS,
unsigned Depth) {
Expand Down Expand Up @@ -10652,19 +10671,10 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_NE:
// Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
if (!RA)
if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
if (const SCEVMulExpr *ME =
dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
ME->getOperand(0)->isAllOnesValue()) {
RHS = AE->getOperand(1);
LHS = ME->getOperand(1);
Changed = true;
}
if (RA.isZero() && MatchBinarySub(LHS, LHS, RHS))
Changed = true;
break;


// The "Should have been caught earlier!" messages refer to the fact
// that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above
// should have fired on the corresponding cases, and canonicalized the
Expand Down Expand Up @@ -13469,6 +13479,14 @@ bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
}

/// When printing a top-level SCEV for trip counts, it's helpful to include
/// a type for constants which are otherwise hard to disambiguate.
static void PrintSCEVWithTypeHint(raw_ostream &OS, const SCEV* S) {
if (isa<SCEVConstant>(S))
OS << *S->getType() << " ";
OS << *S;
}

static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
const Loop *L) {
// Print all inner loops first
Expand All @@ -13484,15 +13502,19 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
if (ExitingBlocks.size() != 1)
OS << "<multiple exits> ";

if (SE->hasLoopInvariantBackedgeTakenCount(L))
OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L) << "\n";
else
OS << "Unpredictable backedge-taken count.\n";
auto *BTC = SE->getBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(BTC)) {
OS << "backedge-taken count is ";
PrintSCEVWithTypeHint(OS, BTC);
} else
OS << "Unpredictable backedge-taken count.";
OS << "\n";

if (ExitingBlocks.size() > 1)
for (BasicBlock *ExitingBlock : ExitingBlocks) {
OS << " exit count for " << ExitingBlock->getName() << ": "
<< *SE->getExitCount(L, ExitingBlock) << "\n";
OS << " exit count for " << ExitingBlock->getName() << ": ";
PrintSCEVWithTypeHint(OS, SE->getExitCount(L, ExitingBlock));
OS << "\n";
}

OS << "Loop ";
Expand All @@ -13501,8 +13523,8 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,

auto *ConstantBTC = SE->getConstantMaxBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(ConstantBTC)) {
OS << "constant max backedge-taken count is "
<< *ConstantBTC->getType() << " " << *ConstantBTC;
OS << "constant max backedge-taken count is ";
PrintSCEVWithTypeHint(OS, ConstantBTC);
if (SE->isBackedgeTakenCountMaxOrZero(L))
OS << ", actual taken count either this or zero.";
} else {
Expand All @@ -13516,34 +13538,39 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,

auto *SymbolicBTC = SE->getSymbolicMaxBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(SymbolicBTC)) {
OS << "symbolic max backedge-taken count is " << *SymbolicBTC;
OS << "symbolic max backedge-taken count is ";
PrintSCEVWithTypeHint(OS, SymbolicBTC);
if (SE->isBackedgeTakenCountMaxOrZero(L))
OS << ", actual taken count either this or zero.";
} else {
OS << "Unpredictable symbolic max backedge-taken count. ";
}

OS << "\n";

if (ExitingBlocks.size() > 1)
for (BasicBlock *ExitingBlock : ExitingBlocks) {
OS << " symbolic max exit count for " << ExitingBlock->getName() << ": "
<< *SE->getExitCount(L, ExitingBlock, ScalarEvolution::SymbolicMaximum)
<< "\n";
OS << " symbolic max exit count for " << ExitingBlock->getName() << ": ";
auto *ExitBTC = SE->getExitCount(L, ExitingBlock,
ScalarEvolution::SymbolicMaximum);
PrintSCEVWithTypeHint(OS, ExitBTC);
OS << "\n";
}

OS << "Loop ";
L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";

SmallVector<const SCEVPredicate *, 4> Preds;
auto PBT = SE->getPredicatedBackedgeTakenCount(L, Preds);
if (!isa<SCEVCouldNotCompute>(PBT)) {
OS << "Predicated backedge-taken count is " << *PBT << "\n";
auto *PBT = SE->getPredicatedBackedgeTakenCount(L, Preds);
if (PBT != BTC || !Preds.empty()) {
OS << "Loop ";
L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
if (!isa<SCEVCouldNotCompute>(PBT)) {
OS << "Predicated backedge-taken count is ";
PrintSCEVWithTypeHint(OS, PBT);
} else
OS << "Unpredictable predicated backedge-taken count.";
OS << "\n";
OS << " Predicates:\n";
for (const auto *P : Preds)
P->print(OS, 4);
} else {
OS << "Unpredictable predicated backedge-taken count.\n";
}

if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,9 +294,11 @@ bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
if (auto *CI = dyn_cast<ConstantInt>(V))
return CI->getValue().isStrictlyPositive();

// TODO: We'd doing two recursive queries here. We should factor this such
// that only a single query is needed.
return isKnownNonNegative(V, SQ, Depth) && ::isKnownNonZero(V, Depth, SQ);
// If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
// this updated.
KnownBits Known = computeKnownBits(V, Depth, SQ);
return Known.isNonNegative() &&
(Known.isNonZero() || ::isKnownNonZero(V, Depth, SQ));
}

bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ,
Expand Down
13 changes: 7 additions & 6 deletions llvm/lib/CodeGen/MachineScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1729,11 +1729,11 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
SUnit *SU;
SmallVector<const MachineOperand *, 4> BaseOps;
int64_t Offset;
unsigned Width;
LocationSize Width;
bool OffsetIsScalable;

MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
int64_t Offset, bool OffsetIsScalable, unsigned Width)
int64_t Offset, bool OffsetIsScalable, LocationSize Width)
: SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
Width(Width), OffsetIsScalable(OffsetIsScalable) {}

Expand Down Expand Up @@ -1866,11 +1866,12 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(

auto MemOpb = MemOpRecords[NextIdx];
unsigned ClusterLength = 2;
unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
MemOpb.Width.getValue().getKnownMinValue();
if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
CurrentClusterBytes =
SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +
MemOpb.Width.getValue().getKnownMinValue();
}

if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
Expand Down Expand Up @@ -1940,7 +1941,7 @@ void BaseMemOpClusterMutation::collectMemOpRecords(
SmallVector<const MachineOperand *, 4> BaseOps;
int64_t Offset;
bool OffsetIsScalable;
unsigned Width;
LocationSize Width = 0;
if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
OffsetIsScalable, Width, TRI)) {
MemOpRecords.push_back(
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/TargetInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1365,7 +1365,7 @@ bool TargetInstrInfo::getMemOperandWithOffset(
const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset,
bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
SmallVector<const MachineOperand *, 4> BaseOps;
unsigned Width;
LocationSize Width = 0;
if (!getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable,
Width, TRI) ||
BaseOps.size() != 1)
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2675,7 +2675,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {

bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const {
if (!LdSt.mayLoadOrStore())
return false;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {

bool getMemOperandsWithOffsetWidth(
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const override;

/// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`.
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,12 @@ def FeatureDot10Insts : SubtargetFeature<"dot10-insts",
"Has v_dot2_f32_f16 instruction"
>;

def FeatureDot11Insts : SubtargetFeature<"dot11-insts",
"HasDot11Insts",
"true",
"Has v_dot4_f32_fp8_fp8, v_dot4_f32_fp8_bf8, v_dot4_f32_bf8_fp8, v_dot4_f32_bf8_bf8 instructions"
>;

def FeatureMAIInsts : SubtargetFeature<"mai-insts",
"HasMAIInsts",
"true",
Expand Down Expand Up @@ -1521,6 +1527,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureDot8Insts,
FeatureDot9Insts,
FeatureDot10Insts,
FeatureDot11Insts,
FeatureNSAEncoding,
FeaturePartialNSAEncoding,
FeatureWavefrontSize32,
Expand Down Expand Up @@ -1874,7 +1881,7 @@ def D16PreservesUnusedBits :
def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;

def HasExpOrExportInsts : Predicate<"Subtarget->hasExpOrExportInsts()">,
def HasExportInsts : Predicate<"Subtarget->hasExportInsts()">,
AssemblerPredicate<(all_of (not FeatureGFX90AInsts))>;

def HasInterpInsts : Predicate<"Subtarget->hasInterpInsts()">,
Expand Down Expand Up @@ -2029,6 +2036,9 @@ def HasDot9Insts : Predicate<"Subtarget->hasDot9Insts()">,
def HasDot10Insts : Predicate<"Subtarget->hasDot10Insts()">,
AssemblerPredicate<(all_of FeatureDot10Insts)>;

def HasDot11Insts : Predicate<"Subtarget->hasDot11Insts()">,
AssemblerPredicate<(all_of FeatureDot11Insts)>;

def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">,
AssemblerPredicate<(all_of FeatureGetWaveIdInst)>;

Expand Down
55 changes: 55 additions & 0 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIDefines.h"
Expand Down Expand Up @@ -1816,6 +1817,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {

public:
void onBeginOfFile() override;
bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;

ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);

Expand Down Expand Up @@ -8277,6 +8279,59 @@ void AMDGPUAsmParser::onBeginOfFile() {
getTargetStreamer().EmitDirectiveAMDGCNTarget();
}

/// Parse AMDGPU specific expressions.
///
/// expr ::= or(expr, ...) |
/// max(expr, ...)
///
bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
using AGVK = AMDGPUVariadicMCExpr::VariadicKind;

if (isToken(AsmToken::Identifier)) {
StringRef TokenId = getTokenStr();
AGVK VK = StringSwitch<AGVK>(TokenId)
.Case("max", AGVK::AGVK_Max)
.Case("or", AGVK::AGVK_Or)
.Default(AGVK::AGVK_None);

if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
SmallVector<const MCExpr *, 4> Exprs;
uint64_t CommaCount = 0;
lex(); // Eat 'max'/'or'
lex(); // Eat '('
while (true) {
if (trySkipToken(AsmToken::RParen)) {
if (Exprs.empty()) {
Error(getToken().getLoc(),
"empty " + Twine(TokenId) + " expression");
return true;
}
if (CommaCount + 1 != Exprs.size()) {
Error(getToken().getLoc(),
"mismatch of commas in " + Twine(TokenId) + " expression");
return true;
}
Res = AMDGPUVariadicMCExpr::create(VK, Exprs, getContext());
return false;
}
const MCExpr *Expr;
if (getParser().parseExpression(Expr, EndLoc))
return true;
Exprs.push_back(Expr);
bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
if (LastTokenWasComma)
CommaCount++;
if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
Error(getToken().getLoc(),
"unexpected token in " + Twine(TokenId) + " expression");
return true;
}
}
}
}
return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
}

ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
StringRef Name = getTokenStr();
if (Name == "mul") {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/DSInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,7 @@ def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VReg_64, VGPR_32>;

let SubtargetPredicate = isGFX11Plus in {

let OtherPredicates = [HasImageInsts] in
def DS_BVH_STACK_RTN_B32 : DS_BVH_STACK<"ds_bvh_stack_rtn_b32">;

} // let SubtargetPredicate = isGFX11Plus
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/EXPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ class EXP_Real_Row<string pseudo, int subtarget, string name = "exp", EXP_Pseudo

// DONE variants have mayLoad = 1.
// ROW variants have an implicit use of M0.
let SubtargetPredicate = HasExpOrExportInsts in {
let SubtargetPredicate = HasExportInsts in {
def EXP : EXP_Pseudo<0, 0>;
def EXP_DONE : EXP_Pseudo<0, 1>;
def EXP_ROW : EXP_Pseudo<1, 0>;
def EXP_ROW_DONE : EXP_Pseudo<1, 1>;
} // let SubtargetPredicate = HasExpOrExportInsts
} // let SubtargetPredicate = HasExportInsts

//===----------------------------------------------------------------------===//
// SI, VI, GFX10.
Expand Down Expand Up @@ -117,7 +117,7 @@ multiclass EXP_Real_gfx11 {
multiclass VEXPORT_Real_gfx12 {
defvar ps = !cast<EXP_Pseudo>(NAME);
def _gfx12 : EXP_Real_Row<NAME, SIEncodingFamily.GFX12, "export">,
EXPe_Row, MnemonicAlias<"exp", "export">, Requires<[isGFX12Plus, HasExpOrExportInsts]> {
EXPe_Row, MnemonicAlias<"exp", "export">, Requires<[isGFX12Plus, HasExportInsts]> {
let AssemblerPredicate = isGFX12Only;
let DecoderNamespace = "GFX12";
let row = ps.row;
Expand Down
7 changes: 6 additions & 1 deletion llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasDot8Insts = false;
bool HasDot9Insts = false;
bool HasDot10Insts = false;
bool HasDot11Insts = false;
bool HasMAIInsts = false;
bool HasFP8Insts = false;
bool HasFP8ConversionInsts = false;
Expand Down Expand Up @@ -647,7 +648,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// BUFFER/FLAT/GLOBAL_ATOMIC_ADD/MIN/MAX_F64
bool hasBufferFlatGlobalAtomicsF64() const { return hasGFX90AInsts(); }

bool hasExpOrExportInsts() const {
bool hasExportInsts() const {
return !hasGFX940Insts();
}

Expand Down Expand Up @@ -793,6 +794,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return HasDot10Insts;
}

bool hasDot11Insts() const {
return HasDot11Insts;
}

bool hasMAIInsts() const {
return HasMAIInsts;
}
Expand Down
94 changes: 94 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
//===- AMDGPUMCExpr.cpp - AMDGPU specific MC expression classes -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "AMDGPUMCExpr.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>

using namespace llvm;

const AMDGPUVariadicMCExpr *
AMDGPUVariadicMCExpr::create(VariadicKind Kind, ArrayRef<const MCExpr *> Args,
MCContext &Ctx) {
return new (Ctx) AMDGPUVariadicMCExpr(Kind, Args);
}

const MCExpr *AMDGPUVariadicMCExpr::getSubExpr(size_t Index) const {
assert(Index < Args.size() &&
"Indexing out of bounds AMDGPUVariadicMCExpr sub-expr");
return Args[Index];
}

void AMDGPUVariadicMCExpr::printImpl(raw_ostream &OS,
const MCAsmInfo *MAI) const {
switch (Kind) {
default:
llvm_unreachable("Unknown AMDGPUVariadicMCExpr kind.");
case AGVK_Or:
OS << "or(";
break;
case AGVK_Max:
OS << "max(";
break;
}
for (auto It = Args.begin(); It != Args.end(); ++It) {
(*It)->print(OS, MAI, /*InParens=*/false);
if ((It + 1) != Args.end())
OS << ", ";
}
OS << ')';
}

static int64_t op(AMDGPUVariadicMCExpr::VariadicKind Kind, int64_t Arg1,
int64_t Arg2) {
switch (Kind) {
default:
llvm_unreachable("Unknown AMDGPUVariadicMCExpr kind.");
case AMDGPUVariadicMCExpr::AGVK_Max:
return std::max(Arg1, Arg2);
case AMDGPUVariadicMCExpr::AGVK_Or:
return Arg1 | Arg2;
}
}

bool AMDGPUVariadicMCExpr::evaluateAsRelocatableImpl(
MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const {
std::optional<int64_t> Total;

for (const MCExpr *Arg : Args) {
MCValue ArgRes;
if (!Arg->evaluateAsRelocatable(ArgRes, Layout, Fixup) ||
!ArgRes.isAbsolute())
return false;

if (!Total.has_value())
Total = ArgRes.getConstant();
Total = op(Kind, *Total, ArgRes.getConstant());
}

Res = MCValue::get(*Total);
return true;
}

void AMDGPUVariadicMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
for (const MCExpr *Arg : Args)
Streamer.visitUsedExpr(*Arg);
}

MCFragment *AMDGPUVariadicMCExpr::findAssociatedFragment() const {
for (const MCExpr *Arg : Args) {
if (Arg->findAssociatedFragment())
return Arg->findAssociatedFragment();
}
return nullptr;
}
74 changes: 74 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
//===- AMDGPUMCExpr.h - AMDGPU specific MC expression classes ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCEXPR_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCEXPR_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCExpr.h"

namespace llvm {

/// AMDGPU target specific variadic MCExpr operations.
///
/// Takes in a minimum of 1 argument to be used with an operation. The supported
/// operations are:
/// - (bitwise) or
/// - max
///
/// \note If the 'or'/'max' operations are provided only a single argument, the
/// operation will act as a no-op and simply resolve as the provided argument.
///
class AMDGPUVariadicMCExpr : public MCTargetExpr {
public:
enum VariadicKind { AGVK_None, AGVK_Or, AGVK_Max };

private:
VariadicKind Kind;
SmallVector<const MCExpr *, 2> Args;

AMDGPUVariadicMCExpr(VariadicKind Kind, ArrayRef<const MCExpr *> Args)
: Kind(Kind), Args(Args) {
assert(Args.size() >= 1 && "Needs a minimum of one expression.");
assert(Kind != AGVK_None &&
"Cannot construct AMDGPUVariadicMCExpr of kind none.");
}

public:
static const AMDGPUVariadicMCExpr *
create(VariadicKind Kind, ArrayRef<const MCExpr *> Args, MCContext &Ctx);

static const AMDGPUVariadicMCExpr *createOr(ArrayRef<const MCExpr *> Args,
MCContext &Ctx) {
return create(VariadicKind::AGVK_Or, Args, Ctx);
}

static const AMDGPUVariadicMCExpr *createMax(ArrayRef<const MCExpr *> Args,
MCContext &Ctx) {
return create(VariadicKind::AGVK_Max, Args, Ctx);
}

VariadicKind getKind() const { return Kind; }
const MCExpr *getSubExpr(size_t Index) const;

void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
MCFragment *findAssociatedFragment() const override;
void fixELFSymbolsInTLSFixups(MCAssembler &) const override{};

static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
}
};

} // end namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCEXPR_H
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ add_llvm_component_library(LLVMAMDGPUDesc
AMDGPUInstPrinter.cpp
AMDGPUMCAsmInfo.cpp
AMDGPUMCCodeEmitter.cpp
AMDGPUMCExpr.cpp
AMDGPUMCTargetDesc.cpp
AMDGPUTargetStreamer.cpp
R600InstPrinter.cpp
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ class SIInsertHardClauses : public MachineFunctionPass {

int64_t Dummy1;
bool Dummy2;
unsigned Dummy3;
LocationSize Dummy3 = 0;
SmallVector<const MachineOperand *, 4> BaseOps;
if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ static bool isStride64(unsigned Opc) {

bool SIInstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const {
if (!LdSt.mayLoadOrStore())
return false;
Expand Down Expand Up @@ -424,7 +424,7 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
Width = getOpSize(LdSt, DataOpIdx);
DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
Width += getOpSize(LdSt, DataOpIdx);
Width = Width.getValue() + getOpSize(LdSt, DataOpIdx);
} else {
Width = getOpSize(LdSt, DataOpIdx);
}
Expand Down Expand Up @@ -3647,7 +3647,7 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
const MachineInstr &MIb) const {
SmallVector<const MachineOperand *, 4> BaseOps0, BaseOps1;
int64_t Offset0, Offset1;
unsigned Dummy0, Dummy1;
LocationSize Dummy0 = 0, Dummy1 = 0;
bool Offset0IsScalable, Offset1IsScalable;
if (!getMemOperandsWithOffsetWidth(MIa, BaseOps0, Offset0, Offset0IsScalable,
Dummy0, &RI) ||
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
bool getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt,
SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
bool &OffsetIsScalable, unsigned &Width,
bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const final;

bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1705,7 +1705,7 @@ let SubtargetPredicate = isGFX10Plus in {
} // End SubtargetPredicate = isGFX10Plus

let SubtargetPredicate = isGFX11Plus in {
let OtherPredicates = [HasExpOrExportInsts] in
let OtherPredicates = [HasExportInsts] in
def S_WAIT_EVENT : SOPP_Pseudo<"s_wait_event", (ins s16imm:$simm16),
"$simm16"> {
let hasSideEffects = 1;
Expand Down Expand Up @@ -1738,7 +1738,7 @@ let OtherPredicates = [HasImageInsts] in {
SOPP_Pseudo<"s_wait_bvhcnt", (ins s16imm:$simm16), "$simm16",
[(int_amdgcn_s_wait_bvhcnt timm:$simm16)]>;
} // End OtherPredicates = [HasImageInsts].
let OtherPredicates = [HasExpOrExportInsts] in
let OtherPredicates = [HasExportInsts] in
def S_WAIT_EXPCNT :
SOPP_Pseudo<"s_wait_expcnt", (ins s16imm:$simm16), "$simm16",
[(int_amdgcn_s_wait_expcnt timm:$simm16)]>;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -480,10 +480,12 @@ multiclass VOP3PDOTF8Inst <string OpName, SDPatternOperator intrinsic_node> {
i32:$src2_modifiers, f32:$src2)>;
}

let OtherPredicates = [HasDot11Insts] in {
defm V_DOT4_F32_FP8_BF8 : VOP3PDOTF8Inst<"v_dot4_f32_fp8_bf8", int_amdgcn_dot4_f32_fp8_bf8>;
defm V_DOT4_F32_BF8_FP8 : VOP3PDOTF8Inst<"v_dot4_f32_bf8_fp8", int_amdgcn_dot4_f32_bf8_fp8>;
defm V_DOT4_F32_FP8_FP8 : VOP3PDOTF8Inst<"v_dot4_f32_fp8_fp8", int_amdgcn_dot4_f32_fp8_fp8>;
defm V_DOT4_F32_BF8_BF8 : VOP3PDOTF8Inst<"v_dot4_f32_bf8_bf8", int_amdgcn_dot4_f32_bf8_bf8>;
}

def : UDot2Pat<V_DOT2_U32_U16>;
def : SDot2Pat<V_DOT2_I32_I16>;
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3070,7 +3070,7 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
/// Get the base register and byte offset of a load/store instr.
bool HexagonInstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const {
OffsetIsScalable = false;
const MachineOperand *BaseOp = getBaseAndOffset(LdSt, Offset, Width);
Expand Down Expand Up @@ -3286,9 +3286,9 @@ unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const {
// returned in Offset and the access size is returned in AccessSize.
// If the base operand has a subregister or the offset field does not contain
// an immediate value, return nullptr.
MachineOperand *HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
int64_t &Offset,
unsigned &AccessSize) const {
MachineOperand *
HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, int64_t &Offset,
LocationSize &AccessSize) const {
// Return if it is not a base+offset type instruction or a MemOp.
if (getAddrMode(MI) != HexagonII::BaseImmOffset &&
getAddrMode(MI) != HexagonII::BaseLongOffset &&
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/Hexagon/HexagonInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
bool getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt,
SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
bool &OffsetIsScalable, unsigned &Width,
bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const override;

/// Reverses the branch condition of the specified condition list,
Expand Down Expand Up @@ -437,7 +437,7 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {

unsigned getAddrMode(const MachineInstr &MI) const;
MachineOperand *getBaseAndOffset(const MachineInstr &MI, int64_t &Offset,
unsigned &AccessSize) const;
LocationSize &AccessSize) const;
SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const;
unsigned getCExtOpNum(const MachineInstr &MI) const;
HexagonII::CompoundGroup
Expand Down
11 changes: 6 additions & 5 deletions llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,10 +395,11 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
HII.getAddrMode(L0) != HexagonII::BaseImmOffset)
continue;
int64_t Offset0;
unsigned Size0;
LocationSize Size0 = 0;
MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
// Is the access size is longer than the L1 cache line, skip the check.
if (BaseOp0 == nullptr || !BaseOp0->isReg() || Size0 >= 32)
if (BaseOp0 == nullptr || !BaseOp0->isReg() || !Size0.hasValue() ||
Size0.getValue() >= 32)
continue;
// Scan only up to 32 instructions ahead (to avoid n^2 complexity).
for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
Expand All @@ -408,10 +409,10 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
HII.getAddrMode(L1) != HexagonII::BaseImmOffset)
continue;
int64_t Offset1;
unsigned Size1;
LocationSize Size1 = 0;
MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
if (BaseOp1 == nullptr || !BaseOp1->isReg() || Size1 >= 32 ||
BaseOp0->getReg() != BaseOp1->getReg())
if (BaseOp1 == nullptr || !BaseOp1->isReg() || !Size0.hasValue() ||
Size1.getValue() >= 32 || BaseOp0->getReg() != BaseOp1->getReg())
continue;
// Check bits 3 and 4 of the offset: if they differ, a bank conflict
// is unlikely.
Expand Down
11 changes: 6 additions & 5 deletions llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,15 @@ bool LanaiInstrInfo::areMemAccessesTriviallyDisjoint(
const TargetRegisterInfo *TRI = &getRegisterInfo();
const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
unsigned int WidthA = 0, WidthB = 0;
LocationSize WidthA = 0, WidthB = 0;
if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
if (BaseOpA->isIdenticalTo(*BaseOpB)) {
int LowOffset = std::min(OffsetA, OffsetB);
int HighOffset = std::max(OffsetA, OffsetB);
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
if (LowOffset + LowWidth <= HighOffset)
LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
if (LowWidth.hasValue() &&
LowOffset + (int)LowWidth.getValue() <= HighOffset)
return true;
}
}
Expand Down Expand Up @@ -752,7 +753,7 @@ Register LanaiInstrInfo::isStoreToStackSlot(const MachineInstr &MI,

bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
unsigned &Width, const TargetRegisterInfo * /*TRI*/) const {
LocationSize &Width, const TargetRegisterInfo * /*TRI*/) const {
// Handle only loads/stores with base register followed by immediate offset
// and with add as ALU op.
if (LdSt.getNumOperands() != 4)
Expand Down Expand Up @@ -793,7 +794,7 @@ bool LanaiInstrInfo::getMemOperandWithOffsetWidth(

bool LanaiInstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const {
switch (LdSt.getOpcode()) {
default:
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/Lanai/LanaiInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ class LanaiInstrInfo : public LanaiGenInstrInfo {
bool getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt,
SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
bool &OffsetIsScalable, unsigned &Width,
bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const override;

bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
const MachineOperand *&BaseOp,
int64_t &Offset, unsigned &Width,
int64_t &Offset, LocationSize &Width,
const TargetRegisterInfo *TRI) const;

std::pair<unsigned, unsigned>
Expand Down
15 changes: 8 additions & 7 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2821,7 +2821,7 @@ bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const {

bool PPCInstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const {
const MachineOperand *BaseOp;
OffsetIsScalable = false;
Expand Down Expand Up @@ -2913,7 +2913,7 @@ bool PPCInstrInfo::shouldClusterMemOps(
return false;

int64_t Offset1 = 0, Offset2 = 0;
unsigned Width1 = 0, Width2 = 0;
LocationSize Width1 = 0, Width2 = 0;
const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
!getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
Expand All @@ -2924,7 +2924,7 @@ bool PPCInstrInfo::shouldClusterMemOps(
"getMemOperandWithOffsetWidth return incorrect base op");
// The caller should already have ordered FirstMemOp/SecondMemOp by offset.
assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
return Offset1 + Width1 == Offset2;
return Offset1 + (int64_t)Width1.getValue() == Offset2;
}

/// GetInstSize - Return the number of bytes of code the specified
Expand Down Expand Up @@ -5504,7 +5504,7 @@ MachineInstr *PPCInstrInfo::findLoopInstr(
// memory width. Width is the size of memory that is being loaded/stored.
bool PPCInstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
unsigned &Width, const TargetRegisterInfo *TRI) const {
LocationSize &Width, const TargetRegisterInfo *TRI) const {
if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
return false;

Expand Down Expand Up @@ -5542,14 +5542,15 @@ bool PPCInstrInfo::areMemAccessesTriviallyDisjoint(
const TargetRegisterInfo *TRI = &getRegisterInfo();
const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
unsigned int WidthA = 0, WidthB = 0;
LocationSize WidthA = 0, WidthB = 0;
if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
if (BaseOpA->isIdenticalTo(*BaseOpB)) {
int LowOffset = std::min(OffsetA, OffsetB);
int HighOffset = std::max(OffsetA, OffsetB);
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
if (LowOffset + LowWidth <= HighOffset)
LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
if (LowWidth.hasValue() &&
LowOffset + (int)LowWidth.getValue() <= HighOffset)
return true;
}
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
/// loaded/stored (e.g. 1, 2, 4, 8).
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
const MachineOperand *&BaseOp,
int64_t &Offset, unsigned &Width,
int64_t &Offset, LocationSize &Width,
const TargetRegisterInfo *TRI) const;

bool optimizeCmpPostRA(MachineInstr &MI) const;
Expand All @@ -553,7 +553,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt,
SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
bool &OffsetIsScalable, unsigned &Width,
bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const override;

/// Returns true if the two given memory operations should be scheduled
Expand Down
11 changes: 6 additions & 5 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2195,7 +2195,7 @@ MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,

bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const {
if (!LdSt.mayLoadOrStore())
return false;
Expand Down Expand Up @@ -2300,7 +2300,7 @@ bool RISCVInstrInfo::shouldClusterMemOps(
// function) and set it as appropriate.
bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
unsigned &Width, const TargetRegisterInfo *TRI) const {
LocationSize &Width, const TargetRegisterInfo *TRI) const {
if (!LdSt.mayLoadOrStore())
return false;

Expand Down Expand Up @@ -2339,14 +2339,15 @@ bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
unsigned int WidthA = 0, WidthB = 0;
LocationSize WidthA = 0, WidthB = 0;
if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
if (BaseOpA->isIdenticalTo(*BaseOpB)) {
int LowOffset = std::min(OffsetA, OffsetB);
int HighOffset = std::max(OffsetA, OffsetB);
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
if (LowOffset + LowWidth <= HighOffset)
LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
if (LowWidth.hasValue() &&
LowOffset + (int)LowWidth.getValue() <= HighOffset)
return true;
}
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {

bool getMemOperandsWithOffsetWidth(
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const override;

bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
Expand All @@ -168,7 +168,7 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {

bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
const MachineOperand *&BaseOp,
int64_t &Offset, unsigned &Width,
int64_t &Offset, LocationSize &Width,
const TargetRegisterInfo *TRI) const;

bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4519,7 +4519,7 @@ bool X86InstrInfo::preservesZeroValueInReg(

bool X86InstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &MemOp, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const {
const MCInstrDesc &Desc = MemOp.getDesc();
int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ class X86InstrInfo final : public X86GenInstrInfo {
bool getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt,
SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
bool &OffsetIsScalable, unsigned &Width,
bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const override;
bool analyzeBranchPredicate(MachineBasicBlock &MBB,
TargetInstrInfo::MachineBranchPredicate &MBP,
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/TargetParser/TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["dot8-insts"] = true;
Features["dot9-insts"] = true;
Features["dot10-insts"] = true;
Features["dot11-insts"] = true;
Features["dl-insts"] = true;
Features["atomic-ds-pk-add-16-insts"] = true;
Features["atomic-flat-pk-add-16-insts"] = true;
Expand Down
63 changes: 6 additions & 57 deletions llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1867,64 +1867,10 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {

// Check for (fadd double (sitofp x), y), see if we can merge this into an
// integer add followed by a promotion.
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
Value *LHSIntVal = LHSConv->getOperand(0);
Type *FPType = LHSConv->getType();

// TODO: This check is overly conservative. In many cases known bits
// analysis can tell us that the result of the addition has less significant
// bits than the integer type can hold.
auto IsValidPromotion = [](Type *FTy, Type *ITy) {
Type *FScalarTy = FTy->getScalarType();
Type *IScalarTy = ITy->getScalarType();

// Do we have enough bits in the significand to represent the result of
// the integer addition?
unsigned MaxRepresentableBits =
APFloat::semanticsPrecision(FScalarTy->getFltSemantics());
return IScalarTy->getIntegerBitWidth() <= MaxRepresentableBits;
};

// (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
// ... if the constant fits in the integer value. This is useful for things
// like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
// requires a constant pool load, and generally allows the add to be better
// instcombined.
if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
if (IsValidPromotion(FPType, LHSIntVal->getType())) {
Constant *CI = ConstantFoldCastOperand(Instruction::FPToSI, CFP,
LHSIntVal->getType(), DL);
if (LHSConv->hasOneUse() &&
ConstantFoldCastOperand(Instruction::SIToFP, CI, I.getType(), DL) ==
CFP &&
willNotOverflowSignedAdd(LHSIntVal, CI, I)) {
// Insert the new integer add.
Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, CI, "addconv");
return new SIToFPInst(NewAdd, I.getType());
}
}

// (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
Value *RHSIntVal = RHSConv->getOperand(0);
// It's enough to check LHS types only because we require int types to
// be the same for this transform.
if (IsValidPromotion(FPType, LHSIntVal->getType())) {
// Only do this if x/y have the same type, if at least one of them has a
// single use (so we don't increase the number of int->fp conversions),
// and if the integer add will not overflow.
if (LHSIntVal->getType() == RHSIntVal->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
willNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
// Insert the new integer add.
Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, RHSIntVal, "addconv");
return new SIToFPInst(NewAdd, I.getType());
}
}
}
}
if (Instruction *R = foldFBinOpOfIntCasts(I))
return R;

Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
// Handle specials cases for FAdd with selects feeding the operation
if (Value *V = SimplifySelectsFeedingBinaryOp(I, LHS, RHS))
return replaceInstUsesWith(I, V);
Expand Down Expand Up @@ -2847,6 +2793,9 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) {
if (Instruction *X = foldFNegIntoConstant(I, DL))
return X;

if (Instruction *R = foldFBinOpOfIntCasts(I))
return R;

Value *X, *Y;
Constant *C;

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt);
Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
Instruction *foldFBinOpOfIntCasts(BinaryOperator &I);
Instruction *foldBinopOfSextBoolToSelect(BinaryOperator &I);
Instruction *narrowBinOp(TruncInst &Trunc);
Instruction *narrowMaskedBinOp(BinaryOperator &And);
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,9 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
if (Instruction *R = foldFPSignBitOps(I))
return R;

if (Instruction *R = foldFBinOpOfIntCasts(I))
return R;

// X * -1.0 --> -X
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (match(Op1, m_SpecificFP(-1.0)))
Expand Down
173 changes: 173 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1401,6 +1401,179 @@ Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
return nullptr;
}

// Try to fold:
// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
// -> ({s|u}itofp (int_binop x, y))
// 2) (fp_binop ({s|u}itofp x), FpC)
// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
Value *IntOps[2] = {nullptr, nullptr};
Constant *Op1FpC = nullptr;

// Check for:
// 1) (binop ({s|u}itofp x), ({s|u}itofp y))
// 2) (binop ({s|u}itofp x), FpC)
if (!match(BO.getOperand(0), m_SIToFP(m_Value(IntOps[0]))) &&
!match(BO.getOperand(0), m_UIToFP(m_Value(IntOps[0]))))
return nullptr;

if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
!match(BO.getOperand(1), m_SIToFP(m_Value(IntOps[1]))) &&
!match(BO.getOperand(1), m_UIToFP(m_Value(IntOps[1]))))
return nullptr;

Type *FPTy = BO.getType();
Type *IntTy = IntOps[0]->getType();

// Do we have signed casts?
bool OpsFromSigned = isa<SIToFPInst>(BO.getOperand(0));

unsigned IntSz = IntTy->getScalarSizeInBits();
// This is the maximum number of inuse bits by the integer where the int -> fp
// casts are exact.
unsigned MaxRepresentableBits =
APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics());

// Cache KnownBits a bit to potentially save some analysis.
WithCache<const Value *> OpsKnown[2] = {IntOps[0], IntOps[1]};

// Preserve known number of leading bits. This can allow us to trivial nsw/nuw
// checks later on.
unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};

auto IsNonZero = [&](unsigned OpNo) -> bool {
if (OpsKnown[OpNo].hasKnownBits() &&
OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
return true;
return isKnownNonZero(IntOps[OpNo], SQ.DL);
};

auto IsNonNeg = [&](unsigned OpNo) -> bool {
if (OpsKnown[OpNo].hasKnownBits() &&
OpsKnown[OpNo].getKnownBits(SQ).isNonNegative())
return true;
return isKnownNonNegative(IntOps[OpNo], SQ);
};

// Check if we know for certain that ({s|u}itofp op) is exact.
auto IsValidPromotion = [&](unsigned OpNo) -> bool {
// If fp precision >= bitwidth(op) then its exact.
// NB: This is slightly conservative for `sitofp`. For signed conversion, we
// can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
// handled specially. We can't, however, increase the bound arbitrarily for
// `sitofp` as for larger sizes, it won't sign extend.
if (MaxRepresentableBits < IntSz) {
// Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
// numSignBits(op).
// TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
// `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
if (OpsFromSigned)
NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(IntOps[OpNo]);
// Finally for unsigned check that fp precision >= bitwidth(op) -
// numLeadingZeros(op).
else {
NumUsedLeadingBits[OpNo] =
IntSz - OpsKnown[OpNo].getKnownBits(SQ).countMinLeadingZeros();
}
}
// NB: We could also check if op is known to be a power of 2 or zero (which
// will always be representable). Its unlikely, however, that is we are
// unable to bound op in any way we will be able to pass the overflow checks
// later on.

if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
return false;
// Signed + Mul also requires that op is non-zero to avoid -0 cases.
return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
IsNonZero(OpNo);
};

// If we have a constant rhs, see if we can losslessly convert it to an int.
if (Op1FpC != nullptr) {
Constant *Op1IntC = ConstantFoldCastOperand(
OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, Op1FpC,
IntTy, DL);
if (Op1IntC == nullptr)
return nullptr;
if (ConstantFoldCastOperand(OpsFromSigned ? Instruction::SIToFP
: Instruction::UIToFP,
Op1IntC, FPTy, DL) != Op1FpC)
return nullptr;

// First try to keep sign of cast the same.
IntOps[1] = Op1IntC;
}

// Ensure lhs/rhs integer types match.
if (IntTy != IntOps[1]->getType())
return nullptr;

if (Op1FpC == nullptr) {
if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(1))) {
// If we have a signed + unsigned, see if we can treat both as signed
// (uitofp nneg x) == (sitofp nneg x).
if (OpsFromSigned ? !IsNonNeg(1) : !IsNonNeg(0))
return nullptr;
OpsFromSigned = true;
}
if (!IsValidPromotion(1))
return nullptr;
}
if (!IsValidPromotion(0))
return nullptr;

// Final we check if the integer version of the binop will not overflow.
BinaryOperator::BinaryOps IntOpc;
// Because of the precision check, we can often rule out overflows.
bool NeedsOverflowCheck = true;
// Try to conservatively rule out overflow based on the already done precision
// checks.
unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
unsigned OverflowMaxCurBits =
std::max(NumUsedLeadingBits[0], NumUsedLeadingBits[1]);
bool OutputSigned = OpsFromSigned;
switch (BO.getOpcode()) {
case Instruction::FAdd:
IntOpc = Instruction::Add;
OverflowMaxOutputBits += OverflowMaxCurBits;
break;
case Instruction::FSub:
IntOpc = Instruction::Sub;
OverflowMaxOutputBits += OverflowMaxCurBits;
break;
case Instruction::FMul:
IntOpc = Instruction::Mul;
OverflowMaxOutputBits += OverflowMaxCurBits * 2;
break;
default:
llvm_unreachable("Unsupported binop");
}
// The precision check may have already ruled out overflow.
if (OverflowMaxOutputBits < IntSz) {
NeedsOverflowCheck = false;
// We can bound unsigned overflow from sub to in range signed value (this is
// what allows us to avoid the overflow check for sub).
if (IntOpc == Instruction::Sub)
OutputSigned = true;
}

// Precision check did not rule out overflow, so need to check.
// TODO: If we add support for `WithCache` in `willNotOverflow`, change
// `IntOps[...]` arguments to `KnownOps[...]`.
if (NeedsOverflowCheck &&
!willNotOverflow(IntOpc, IntOps[0], IntOps[1], BO, OutputSigned))
return nullptr;

Value *IntBinOp = Builder.CreateBinOp(IntOpc, IntOps[0], IntOps[1]);
if (auto *IntBO = dyn_cast<BinaryOperator>(IntBinOp)) {
IntBO->setHasNoSignedWrap(OutputSigned);
IntBO->setHasNoUnsignedWrap(!OutputSigned);
}
if (OutputSigned)
return new SIToFPInst(IntBinOp, FPTy);
return new UIToFPInst(IntBinOp, FPTy);
}

/// A binop with a constant operand and a sign-extended boolean operand may be
/// converted into a select of constants by applying the binary operation to
/// the constant with the two possible values of the extended boolean (0 or -1).
Expand Down
57 changes: 38 additions & 19 deletions llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RandomNumberGenerator.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
Expand All @@ -61,6 +62,7 @@
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <optional>
#include <random>

using namespace llvm;

Expand Down Expand Up @@ -181,16 +183,23 @@ static cl::opt<bool> ClWithTls(
cl::Hidden, cl::init(true));

static cl::opt<bool>
CSkipHotCode("hwasan-skip-hot-code",
cl::desc("Do not instument hot functions based on FDO."),
cl::Hidden, cl::init(false));
CSelectiveInstrumentation("hwasan-selective-instrumentation",
cl::desc("Use selective instrumentation"),
cl::Hidden, cl::init(false));

static cl::opt<int> HotPercentileCutoff("hwasan-percentile-cutoff-hot",
cl::init(0));
static cl::opt<int> HotPercentileCutoff(
"hwasan-percentile-cutoff-hot", cl::init(0),
cl::desc("Alternative hot percentile cuttoff."
"By default `-profile-summary-cutoff-hot` is used."));

STATISTIC(NumTotalFuncs, "Number of total funcs HWASAN");
STATISTIC(NumInstrumentedFuncs, "Number of HWASAN instrumented funcs");
STATISTIC(NumNoProfileSummaryFuncs, "Number of HWASAN funcs without PS");
static cl::opt<float>
RandomSkipRate("hwasan-random-skip-rate", cl::init(0),
cl::desc("Probability value in the range [0.0, 1.0] "
"to skip instrumentation of a function."));

STATISTIC(NumTotalFuncs, "Number of total funcs");
STATISTIC(NumInstrumentedFuncs, "Number of instrumented funcs");
STATISTIC(NumNoProfileSummaryFuncs, "Number of funcs without PS");

// Mode for selecting how to insert frame record info into the stack ring
// buffer.
Expand Down Expand Up @@ -291,6 +300,8 @@ class HWAddressSanitizer {
this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0
? ClEnableKhwasan
: CompileKernel;
this->Rng =
RandomSkipRate.getNumOccurrences() ? M.createRNG("hwasan") : nullptr;

initializeModule();
}
Expand Down Expand Up @@ -372,6 +383,7 @@ class HWAddressSanitizer {
Module &M;
const StackSafetyGlobalInfo *SSI;
Triple TargetTriple;
std::unique_ptr<RandomNumberGenerator> Rng;

/// This struct defines the shadow mapping using the rule:
/// shadow = (mem >> Scale) + Offset.
Expand Down Expand Up @@ -1526,19 +1538,26 @@ void HWAddressSanitizer::sanitizeFunction(Function &F,
return;

NumTotalFuncs++;
if (CSkipHotCode) {
auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
ProfileSummaryInfo *PSI =
MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
if (PSI && PSI->hasProfileSummary()) {
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
if ((HotPercentileCutoff.getNumOccurrences() && HotPercentileCutoff >= 0)
? PSI->isFunctionHotInCallGraphNthPercentile(HotPercentileCutoff,
&F, BFI)
: PSI->isFunctionHotInCallGraph(&F, BFI))
if (CSelectiveInstrumentation) {
if (RandomSkipRate.getNumOccurrences()) {
std::bernoulli_distribution D(RandomSkipRate);
if (D(*Rng))
return;
} else {
++NumNoProfileSummaryFuncs;
auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
ProfileSummaryInfo *PSI =
MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
if (PSI && PSI->hasProfileSummary()) {
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
if ((HotPercentileCutoff.getNumOccurrences() &&
HotPercentileCutoff >= 0)
? PSI->isFunctionHotInCallGraphNthPercentile(
HotPercentileCutoff, &F, BFI)
: PSI->isFunctionHotInCallGraph(&F, BFI))
return;
} else {
++NumNoProfileSummaryFuncs;
}
}
}
NumInstrumentedFuncs++;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1908,15 +1908,15 @@ struct DSEState {
Malloc->getArgOperand(0), IRB, TLI);
if (!Calloc)
return false;

MemorySSAUpdater Updater(&MSSA);
auto *NewAccess =
Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), nullptr,
MallocDef);
auto *NewAccessMD = cast<MemoryDef>(NewAccess);
Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
Updater.removeMemoryAccess(Malloc);
Malloc->replaceAllUsesWith(Calloc);
Malloc->eraseFromParent();
deleteDeadInstruction(Malloc);
return true;
}

Expand Down
8 changes: 3 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2992,13 +2992,11 @@ class VPlan {
Value2VPValue[V] = VPV;
}

/// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable
/// /// checking whether it is safe to query VPValues using IR Values.
VPValue *getVPValue(Value *V, bool OverrideAllowed = false) {
/// Returns the VPValue for \p V.
VPValue *getVPValue(Value *V) {
assert(V && "Trying to get the VPValue of a null Value");
assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
assert((Value2VPValueEnabled || OverrideAllowed ||
Value2VPValue[V]->isLiveIn()) &&
assert((Value2VPValueEnabled || Value2VPValue[V]->isLiveIn()) &&
"Value2VPValue mapping may be out of date!");
return Value2VPValue[V];
}
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,12 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
// Get the original loop tripcount.
Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));

// If this part of the active lane mask is scalar, generate the CMP directly
// to avoid unnecessary extracts.
if (State.VF.isScalar())
return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
Name);

auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
auto *PredTy = VectorType::get(Int1Ty, State.VF);
return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
Expand Down
11 changes: 9 additions & 2 deletions llvm/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
; PR1533

@array = weak global [101 x i32] zeroinitializer, align 32 ; <ptr> [#uses=1]

; CHECK: Loop %bb: backedge-taken count is 100

define void @loop(i32 %x) {
; CHECK-LABEL: 'loop'
; CHECK-NEXT: Determining loop execution counts for: @loop
; CHECK-NEXT: Loop %bb: backedge-taken count is i32 100
; CHECK-NEXT: Loop %bb: constant max backedge-taken count is i32 100
; CHECK-NEXT: Loop %bb: symbolic max backedge-taken count is i32 100
; CHECK-NEXT: Loop %bb: Trip multiple is 101
;
entry:
br label %bb

Expand Down
11 changes: 9 additions & 2 deletions llvm/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
; PR1706

; CHECK: backedge-taken count is 13

define i32 @f() {
; CHECK-LABEL: 'f'
; CHECK-NEXT: Determining loop execution counts for: @f
; CHECK-NEXT: Loop %bb5: backedge-taken count is i32 13
; CHECK-NEXT: Loop %bb5: constant max backedge-taken count is i32 13
; CHECK-NEXT: Loop %bb5: symbolic max backedge-taken count is i32 13
; CHECK-NEXT: Loop %bb5: Trip multiple is 14
;
entry:
br label %bb5

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
; PR2364

; CHECK: backedge-taken count is 61

define i32 @func_6() nounwind {
; CHECK-LABEL: 'func_6'
; CHECK-NEXT: Determining loop execution counts for: @func_6
; CHECK-NEXT: Loop %bb5: backedge-taken count is i8 61
; CHECK-NEXT: Loop %bb5: constant max backedge-taken count is i8 61
; CHECK-NEXT: Loop %bb5: symbolic max backedge-taken count is i8 61
; CHECK-NEXT: Loop %bb5: Trip multiple is 62
;
entry:
br label %bb5

Expand Down
11 changes: 9 additions & 2 deletions llvm/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
; PR2088

; CHECK: backedge-taken count is 113

define void @fun() {
; CHECK-LABEL: 'fun'
; CHECK-NEXT: Determining loop execution counts for: @fun
; CHECK-NEXT: Loop %loop: backedge-taken count is i8 113
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i8 113
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is i8 113
; CHECK-NEXT: Loop %loop: Trip multiple is 114
;
entry:
br label %loop
loop:
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ define i32 @f(i32 %x) nounwind readnone {
; CHECK-NEXT: Loop %bb: backedge-taken count is ((-5 + %x) /u 3)
; CHECK-NEXT: Loop %bb: constant max backedge-taken count is i32 1431655764
; CHECK-NEXT: Loop %bb: symbolic max backedge-taken count is ((-5 + %x) /u 3)
; CHECK-NEXT: Loop %bb: Predicated backedge-taken count is ((-5 + %x) /u 3)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Loop %bb: Trip multiple is 1
;
entry:
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ define i32 @f(i32 %x) nounwind readnone {
; CHECK-NEXT: Loop %bb: backedge-taken count is (((-3 + (-1 * (1 umin (-3 + (-1 * %x) + (1000 umax (3 + %x)))))<nuw><nsw> + (-1 * %x) + (1000 umax (3 + %x))) /u 3) + (1 umin (-3 + (-1 * %x) + (1000 umax (3 + %x)))))
; CHECK-NEXT: Loop %bb: constant max backedge-taken count is i32 334
; CHECK-NEXT: Loop %bb: symbolic max backedge-taken count is (((-3 + (-1 * (1 umin (-3 + (-1 * %x) + (1000 umax (3 + %x)))))<nuw><nsw> + (-1 * %x) + (1000 umax (3 + %x))) /u 3) + (1 umin (-3 + (-1 * %x) + (1000 umax (3 + %x)))))
; CHECK-NEXT: Loop %bb: Predicated backedge-taken count is (((-3 + (-1 * (1 umin (-3 + (-1 * %x) + (1000 umax (3 + %x)))))<nuw><nsw> + (-1 * %x) + (1000 umax (3 + %x))) /u 3) + (1 umin (-3 + (-1 * %x) + (1000 umax (3 + %x)))))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Loop %bb: Trip multiple is 1
;
entry:
Expand Down
11 changes: 9 additions & 2 deletions llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s

; CHECK: backedge-taken count is 255

define i32 @foo(i32 %x, i32 %y, ptr %lam, ptr %alp) nounwind {
; CHECK-LABEL: 'foo'
; CHECK-NEXT: Determining loop execution counts for: @foo
; CHECK-NEXT: Loop %bb1: backedge-taken count is i32 255
; CHECK-NEXT: Loop %bb1: constant max backedge-taken count is i32 255
; CHECK-NEXT: Loop %bb1: symbolic max backedge-taken count is i32 255
; CHECK-NEXT: Loop %bb1: Trip multiple is 256
;
bb1.thread:
br label %bb1

Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
; PR3171

; CHECK: count is 2

target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

%struct.Foo = type { i32 }
%struct.NonPod = type { [2 x %struct.Foo] }

define void @_Z3foov() nounwind {
; CHECK-LABEL: '_Z3foov'
; CHECK-NEXT: Determining loop execution counts for: @_Z3foov
; CHECK-NEXT: Loop %bb1.i: backedge-taken count is i64 2
; CHECK-NEXT: Loop %bb1.i: constant max backedge-taken count is i64 2
; CHECK-NEXT: Loop %bb1.i: symbolic max backedge-taken count is i64 2
; CHECK-NEXT: Loop %bb1.i: Trip multiple is 3
;
entry:
%x = alloca %struct.NonPod, align 8 ; <ptr> [#uses=2]
%0 = getelementptr %struct.NonPod, ptr %x, i32 0, i32 0 ; <ptr> [#uses=1]
Expand Down
38 changes: 31 additions & 7 deletions llvm/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s

; PR1101

@A = weak global [1000 x i32] zeroinitializer, align 32
@A = weak global [1000 x i32] zeroinitializer, align 32

define void @test1(i32 %N) {
; CHECK-LABEL: 'test1'
; CHECK-NEXT: Determining loop execution counts for: @test1
; CHECK-NEXT: Loop %bb3: backedge-taken count is i32 100
; CHECK-NEXT: Loop %bb3: constant max backedge-taken count is i32 100
; CHECK-NEXT: Loop %bb3: symbolic max backedge-taken count is i32 100
; CHECK-NEXT: Loop %bb3: Trip multiple is 101
;
entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
br label %bb3
Expand All @@ -29,14 +37,21 @@ bb5: ; preds = %bb3
return: ; preds = %bb5
ret void
}
; CHECK: Determining loop execution counts for: @test1
; CHECK-NEXT: backedge-taken count is 100


; PR10383
; These next two used to crash.

define void @test2(i1 %cmp, i64 %n) {
; CHECK-LABEL: 'test2'
; CHECK-NEXT: Determining loop execution counts for: @test2
; CHECK-NEXT: Loop %for.body2: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %for.body2: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %for.body2: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %for.body1: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %for.body1: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %for.body1: Unpredictable symbolic max backedge-taken count.
;
entry:
br label %for.body1

Expand All @@ -59,9 +74,14 @@ for.body2:
end:
ret void
}
; CHECK: Determining loop execution counts for: @test2

define i32 @test3() {
; CHECK-LABEL: 'test3'
; CHECK-NEXT: Determining loop execution counts for: @test3
; CHECK-NEXT: Loop %for.inc479: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %for.inc479: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %for.inc479: Unpredictable symbolic max backedge-taken count.
;
if.then466:
br i1 undef, label %for.cond539.preheader, label %for.inc479

Expand All @@ -78,12 +98,17 @@ for.inc479:
for.cond539.preheader:
unreachable
}
; CHECK: Determining loop execution counts for: @test3

; PR13489
; We used to crash on this too.

define void @test4() {
; CHECK-LABEL: 'test4'
; CHECK-NEXT: Determining loop execution counts for: @test4
; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count.
;
entry:
br label %for.body

Expand All @@ -99,4 +124,3 @@ for.end: ; preds = %for.body
ret void
}

; CHECK: Determining loop execution counts for: @test4
1 change: 0 additions & 1 deletion llvm/test/Analysis/ScalarEvolution/ZeroStep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ define void @foo() {
; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable predicated backedge-taken count.
;
entry:
br label %loop
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ define i32 @d(i32 %base) {
; CHECK-NEXT: Loop %for.cond: <multiple exits> Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %for.cond: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %for.cond: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %for.cond: Unpredictable predicated backedge-taken count.
;
entry:
%e = alloca [1 x [1 x i8]], align 1
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/Analysis/ScalarEvolution/add-like-or.ll
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ define void @mask-high(i64 %arg, ptr dereferenceable(4) %arg1) {
; CHECK-NEXT: Loop %bb6: backedge-taken count is (-1 + (-16 * (%arg /u 16)) + ((sext i32 %i to i64) smax (1 + (16 * (%arg /u 16))<nuw>)<nuw><nsw>))
; CHECK-NEXT: Loop %bb6: constant max backedge-taken count is i64 -9223372034707292162
; CHECK-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-16 * (%arg /u 16)) + ((sext i32 %i to i64) smax (1 + (16 * (%arg /u 16))<nuw>)<nuw><nsw>))
; CHECK-NEXT: Loop %bb6: Predicated backedge-taken count is (-1 + (-16 * (%arg /u 16)) + ((sext i32 %i to i64) smax (1 + (16 * (%arg /u 16))<nuw>)<nuw><nsw>))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Loop %bb6: Trip multiple is 1
;
bb:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,14 @@ define void @test(ptr %p) {
; CHECK-NEXT: Determining loop execution counts for: @test
; CHECK-NEXT: Loop %loop2: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %loop2: constant max backedge-taken count is i32 -1
; CHECK-NEXT: Loop %loop2: symbolic max backedge-taken count is -1
; CHECK-NEXT: Loop %loop2: Unpredictable predicated backedge-taken count.
; CHECK-NEXT: Loop %loop3: backedge-taken count is false
; CHECK-NEXT: Loop %loop2: symbolic max backedge-taken count is i32 -1
; CHECK-NEXT: Loop %loop3: backedge-taken count is i1 false
; CHECK-NEXT: Loop %loop3: constant max backedge-taken count is i1 false
; CHECK-NEXT: Loop %loop3: symbolic max backedge-taken count is false
; CHECK-NEXT: Loop %loop3: Predicated backedge-taken count is false
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Loop %loop3: symbolic max backedge-taken count is i1 false
; CHECK-NEXT: Loop %loop3: Trip multiple is 1
; CHECK-NEXT: Loop %loop.header: <multiple exits> Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %loop.header: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %loop.header: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %loop.header: Unpredictable predicated backedge-taken count.
;
entry:
br label %loop.header
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/Analysis/ScalarEvolution/addrec-sub-nsw.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ define i32 @test_1_non_negative(i32 %n) {
; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (1 smax %n))<nsw>
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (1 smax %n))<nsw>
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + (1 smax %n))<nsw>
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
Expand Down Expand Up @@ -53,8 +51,6 @@ define i32 @test_2_non_positive(i32 %n) {
; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (1 smax %n))<nsw>
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (1 smax %n))<nsw>
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + (1 smax %n))<nsw>
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/Analysis/ScalarEvolution/alloca.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,9 @@ define void @alloca_icmp_null_exit_count() {
; CHECK-NEXT: %and = and i1 %cmp1, %cmp2
; CHECK-NEXT: --> (%cmp2 umin %cmp1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
; CHECK-NEXT: Determining loop execution counts for: @alloca_icmp_null_exit_count
; CHECK-NEXT: Loop %loop: backedge-taken count is 2
; CHECK-NEXT: Loop %loop: backedge-taken count is i64 2
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is 2
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is 2
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is i64 2
; CHECK-NEXT: Loop %loop: Trip multiple is 3
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@ define void @loop_guard_improves_exact_backedge_taken_count_1(i32 %conv) {
; CHECK-NEXT: %iv.next = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,2) S: [1,2) Exits: 1 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @loop_guard_improves_exact_backedge_taken_count_1
; CHECK-NEXT: Loop %loop: backedge-taken count is 0
; CHECK-NEXT: Loop %loop: backedge-taken count is i64 0
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 0
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is 0
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is 0
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is i64 0
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
Expand Down Expand Up @@ -52,8 +50,6 @@ define void @loop_guard_improves_exact_backedge_taken_count_2(i32 %conv) {
; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i1 (trunc i32 %conv to i1) to i64)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 1
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (zext i1 (trunc i32 %conv to i1) to i64)
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (zext i1 (trunc i32 %conv to i1) to i64)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Loop %loop: Trip multiple is 2
;
entry:
Expand Down
8 changes: 3 additions & 5 deletions llvm/test/Analysis/ScalarEvolution/becount-invalidation.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,14 @@ define void @test(ptr %arg) {
; CHECK-NEXT: Determining loop execution counts for: @test
; CHECK-NEXT: Loop %loop2.header: <multiple exits> Unpredictable backedge-taken count.
; CHECK-NEXT: exit count for loop2.header: ***COULDNOTCOMPUTE***
; CHECK-NEXT: exit count for loop2.latch: false
; CHECK-NEXT: exit count for loop2.latch: i1 false
; CHECK-NEXT: Loop %loop2.header: constant max backedge-taken count is i1 false
; CHECK-NEXT: Loop %loop2.header: symbolic max backedge-taken count is false
; CHECK-NEXT: Loop %loop2.header: symbolic max backedge-taken count is i1 false
; CHECK-NEXT: symbolic max exit count for loop2.header: ***COULDNOTCOMPUTE***
; CHECK-NEXT: symbolic max exit count for loop2.latch: false
; CHECK-NEXT: Loop %loop2.header: Unpredictable predicated backedge-taken count.
; CHECK-NEXT: symbolic max exit count for loop2.latch: i1 false
; CHECK-NEXT: Loop %loop.header: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %loop.header: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %loop.header: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %loop.header: Unpredictable predicated backedge-taken count.
;
entry:
br label %loop.header
Expand Down
5 changes: 0 additions & 5 deletions llvm/test/Analysis/ScalarEvolution/cycled_phis.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ define void @test_01() {
; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable predicated backedge-taken count.
;
entry:
br label %loop
Expand Down Expand Up @@ -54,11 +53,9 @@ define void @test_02(ptr %p, ptr %q) {
; CHECK-NEXT: Loop %inner_loop: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %inner_loop: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %inner_loop: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %inner_loop: Unpredictable predicated backedge-taken count.
; CHECK-NEXT: Loop %outer_loop: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %outer_loop: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %outer_loop: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %outer_loop: Unpredictable predicated backedge-taken count.
;
entry:
%start = load i32, ptr %p, !range !0
Expand Down Expand Up @@ -107,11 +104,9 @@ define void @test_03(ptr %p, ptr %q) {
; CHECK-NEXT: Loop %inner_loop: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %inner_loop: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %inner_loop: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %inner_loop: Unpredictable predicated backedge-taken count.
; CHECK-NEXT: Loop %outer_loop: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %outer_loop: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %outer_loop: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %outer_loop: Unpredictable predicated backedge-taken count.
;
entry:
%start_1 = load i32, ptr %p, !range !0
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/Analysis/ScalarEvolution/decrementing_addrecs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ define i32 @test_step_1_flags(i32 %n) {
; DEFAULT-NEXT: Loop %loop: backedge-taken count is (-1 + %n)
; DEFAULT-NEXT: Loop %loop: constant max backedge-taken count is i32 2147483646
; DEFAULT-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + %n)
; DEFAULT-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + %n)
; DEFAULT-NEXT: Predicates:
; DEFAULT-NEXT: Loop %loop: Trip multiple is 1
;
; EXPENSIVE_SHARPENING-LABEL: 'test_step_1_flags'
Expand All @@ -75,8 +73,6 @@ define i32 @test_step_1_flags(i32 %n) {
; EXPENSIVE_SHARPENING-NEXT: Loop %loop: backedge-taken count is (-1 + %n)
; EXPENSIVE_SHARPENING-NEXT: Loop %loop: constant max backedge-taken count is i32 2147483646
; EXPENSIVE_SHARPENING-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + %n)
; EXPENSIVE_SHARPENING-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + %n)
; EXPENSIVE_SHARPENING-NEXT: Predicates:
; EXPENSIVE_SHARPENING-NEXT: Loop %loop: Trip multiple is 1
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,10 @@ define void @test_and(i16 %in) {
; CHECK-NEXT: Loop %bb1.i: backedge-taken count is (1 + (-1 * %in))
; CHECK-NEXT: Loop %bb1.i: constant max backedge-taken count is i16 -1
; CHECK-NEXT: Loop %bb1.i: symbolic max backedge-taken count is (1 + (-1 * %in))
; CHECK-NEXT: Loop %bb1.i: Predicated backedge-taken count is (1 + (-1 * %in))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Loop %bb1.i: Trip multiple is 1
; CHECK-NEXT: Loop %bb2: <multiple exits> Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %bb2: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %bb2: Unpredictable symbolic max backedge-taken count.
; CHECK-NEXT: Loop %bb2: Unpredictable predicated backedge-taken count.
;
br label %bb2

Expand Down Expand Up @@ -58,8 +55,6 @@ define void @test_or() {
; CHECK-NEXT: Loop %BB: backedge-taken count is undef
; CHECK-NEXT: Loop %BB: constant max backedge-taken count is i32 -1
; CHECK-NEXT: Loop %BB: symbolic max backedge-taken count is undef
; CHECK-NEXT: Loop %BB: Predicated backedge-taken count is undef
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Loop %BB: Trip multiple is 1
;
%C10 = icmp slt i1 undef, undef
Expand Down
33 changes: 24 additions & 9 deletions llvm/test/Analysis/ScalarEvolution/exact_iter_count.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 -disable-output 2>&1 | FileCheck %s

; One side exit dominating the latch, exact backedge taken count is known.
define void @test_01() {

; CHECK-LABEL: Determining loop execution counts for: @test_01
; CHECK-NEXT: Loop %loop: <multiple exits> backedge-taken count is 50

; CHECK-LABEL: 'test_01'
; CHECK-NEXT: Determining loop execution counts for: @test_01
; CHECK-NEXT: Loop %loop: <multiple exits> backedge-taken count is i32 50
; CHECK-NEXT: exit count for loop: i32 50
; CHECK-NEXT: exit count for backedge: i32 100
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 50
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is i32 50
; CHECK-NEXT: symbolic max exit count for loop: i32 50
; CHECK-NEXT: symbolic max exit count for backedge: i32 100
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
br label %loop

Expand All @@ -27,10 +35,17 @@ side.exit:
}

define void @test_02(i1 %c) {

; CHECK-LABEL: Determining loop execution counts for: @test_02
; CHECK-NEXT: Loop %loop: <multiple exits> backedge-taken count is 50

; CHECK-LABEL: 'test_02'
; CHECK-NEXT: Determining loop execution counts for: @test_02
; CHECK-NEXT: Loop %loop: <multiple exits> backedge-taken count is i32 50
; CHECK-NEXT: exit count for merge: i32 50
; CHECK-NEXT: exit count for backedge: i32 100
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 50
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is i32 50
; CHECK-NEXT: symbolic max exit count for merge: i32 50
; CHECK-NEXT: symbolic max exit count for backedge: i32 100
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
br label %loop

Expand Down
6 changes: 2 additions & 4 deletions llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@ define void @f_0() {
;
; CHECK-LABEL: 'f_0'
; CHECK-NEXT: Determining loop execution counts for: @f_0
; CHECK-NEXT: Loop %for.body: backedge-taken count is 5
; CHECK-NEXT: Loop %for.body: backedge-taken count is i32 5
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 5
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is 5
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 5
; CHECK-NEXT: Predicates:
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is i32 5
; CHECK-NEXT: Loop %for.body: Trip multiple is 6
;
entry:
Expand Down
Loading