48 changes: 26 additions & 22 deletions lldb/source/Host/common/Editline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@
#include <iomanip>
#include <optional>

#include "lldb/Host/Editline.h"

#include "lldb/Host/ConnectionFileDescriptor.h"
#include "lldb/Host/Editline.h"
#include "lldb/Host/FileSystem.h"
#include "lldb/Host/Host.h"
#include "lldb/Utility/CompletionRequest.h"
Expand All @@ -23,6 +22,7 @@
#include "lldb/Utility/StreamString.h"
#include "lldb/Utility/StringList.h"
#include "lldb/Utility/Timeout.h"
#include "llvm/Support/ConvertUTF.h"

#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Locale.h"
Expand Down Expand Up @@ -444,7 +444,9 @@ StringList Editline::GetInputAsStringList(int line_count) {
if (line_count == 0)
break;
#if LLDB_EDITLINE_USE_WCHAR
lines.AppendString(m_utf8conv.to_bytes(line));
std::string buffer;
llvm::convertWideToUTF8(line, buffer);
lines.AppendString(buffer);
#else
lines.AppendString(line);
#endif
Expand Down Expand Up @@ -636,7 +638,9 @@ unsigned char Editline::BreakLineCommand(int ch) {
if (m_fix_indentation_callback) {
StringList lines = GetInputAsStringList(m_current_line_index + 1);
#if LLDB_EDITLINE_USE_WCHAR
lines.AppendString(m_utf8conv.to_bytes(new_line_fragment));
std::string buffer;
llvm::convertWideToUTF8(new_line_fragment, buffer);
lines.AppendString(buffer);
#else
lines.AppendString(new_line_fragment);
#endif
Expand Down Expand Up @@ -684,8 +688,9 @@ unsigned char Editline::EndOrAddLineCommand(int ch) {
m_input_lines.clear();
for (unsigned index = 0; index < lines.GetSize(); index++) {
#if LLDB_EDITLINE_USE_WCHAR
m_input_lines.insert(m_input_lines.end(),
m_utf8conv.from_bytes(lines[index]));
std::wstring wbuffer;
llvm::ConvertUTF8toWide(lines[index], wbuffer);
m_input_lines.insert(m_input_lines.end(), wbuffer);
#else
m_input_lines.insert(m_input_lines.end(), lines[index]);
#endif
Expand Down Expand Up @@ -869,7 +874,9 @@ unsigned char Editline::FixIndentationCommand(int ch) {
currentLine = currentLine.erase(0, -indent_correction);
}
#if LLDB_EDITLINE_USE_WCHAR
m_input_lines[m_current_line_index] = m_utf8conv.from_bytes(currentLine);
std::wstring wbuffer;
llvm::ConvertUTF8toWide(currentLine, wbuffer);
m_input_lines[m_current_line_index] = wbuffer;
#else
m_input_lines[m_current_line_index] = currentLine;
#endif
Expand Down Expand Up @@ -1502,7 +1509,7 @@ bool Editline::GetLine(std::string &line, bool &interrupted) {
} else {
m_history_sp->Enter(input);
#if LLDB_EDITLINE_USE_WCHAR
line = m_utf8conv.to_bytes(SplitLines(input)[0]);
llvm::convertWideToUTF8(SplitLines(input)[0], line);
#else
line = SplitLines(input)[0];
#endif
Expand Down Expand Up @@ -1574,25 +1581,22 @@ bool Editline::CompleteCharacter(char ch, EditLineGetCharType &out) {
out = (unsigned char)ch;
return true;
#else
LLDB_DEPRECATED_WARNING_DISABLE
std::codecvt_utf8<wchar_t> cvt;
LLDB_DEPRECATED_WARNING_RESTORE
llvm::SmallString<4> input;
for (;;) {
const char *from_next;
wchar_t *to_next;
std::mbstate_t state = std::mbstate_t();
input.push_back(ch);
switch (cvt.in(state, input.begin(), input.end(), from_next, &out, &out + 1,
to_next)) {
case std::codecvt_base::ok:
auto *cur_ptr = reinterpret_cast<const llvm::UTF8 *>(input.begin());
auto *end_ptr = reinterpret_cast<const llvm::UTF8 *>(input.end());
llvm::UTF32 code_point = 0;
llvm::ConversionResult cr = llvm::convertUTF8Sequence(
&cur_ptr, end_ptr, &code_point, llvm::lenientConversion);
switch (cr) {
case llvm::conversionOK:
out = code_point;
return out != (EditLineGetCharType)WEOF;

case std::codecvt_base::error:
case std::codecvt_base::noconv:
case llvm::targetExhausted:
case llvm::sourceIllegal:
return false;

case std::codecvt_base::partial:
case llvm::sourceExhausted:
lldb::ConnectionStatus status;
size_t read_count = m_input_connection.Read(
&ch, 1, std::chrono::seconds(0), status, nullptr);
Expand Down
2 changes: 1 addition & 1 deletion lldb/source/Symbol/Block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ Block *Block::GetContainingInlinedBlockWithCallSite(
const auto *function_info = inlined_block->GetInlinedFunctionInfo();

if (function_info &&
function_info->GetCallSite().FileAndLineEqual(find_call_site))
function_info->GetCallSite().FileAndLineEqual(find_call_site, true))
return inlined_block;
inlined_block = inlined_block->GetInlinedParent();
}
Expand Down
113 changes: 111 additions & 2 deletions lldb/source/Symbol/CompileUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,10 @@ void CompileUnit::ResolveSymbolContext(
SymbolContextItem resolve_scope, SymbolContextList &sc_list,
RealpathPrefixes *realpath_prefixes) {
const FileSpec file_spec = src_location_spec.GetFileSpec();
const uint32_t line = src_location_spec.GetLine().value_or(0);
const uint32_t line =
src_location_spec.GetLine().value_or(LLDB_INVALID_LINE_NUMBER);
const uint32_t column_num =
src_location_spec.GetColumn().value_or(LLDB_INVALID_COLUMN_NUMBER);
const bool check_inlines = src_location_spec.GetCheckInlines();

// First find all of the file indexes that match our "file_spec". If
Expand All @@ -268,7 +271,7 @@ void CompileUnit::ResolveSymbolContext(
SymbolContext sc(GetModule());
sc.comp_unit = this;

if (line == 0) {
if (line == LLDB_INVALID_LINE_NUMBER) {
if (file_spec_matches_cu_file_spec && !check_inlines) {
// only append the context if we aren't looking for inline call sites by
// file and line and if the file spec matches that of the compile unit
Expand Down Expand Up @@ -312,6 +315,112 @@ void CompileUnit::ResolveSymbolContext(
0, file_indexes, src_location_spec, &line_entry);
}

// If we didn't manage to find a breakpoint that matched the line number
// requested, that might be because it is only an inline call site, and
// doesn't have a line entry in the line table. Scan for that here.
//
// We are making the assumption that if there was an inlined function it will
// contribute at least 1 non-call-site entry to the line table. That's handy
// because we don't move line breakpoints over function boundaries, so if we
// found a hit, and there were also a call site entry, it would have to be in
// the function containing the PC of the line table match. That way we can
// limit the call site search to that function.
// We will miss functions that ONLY exist as a call site entry.

if (line_entry.IsValid() &&
(line_entry.line != line || line_entry.column != column_num) &&
resolve_scope & eSymbolContextLineEntry && check_inlines) {
// We don't move lines over function boundaries, so the address in the
// line entry will be the in function that contained the line that might
// be a CallSite, and we can just iterate over that function to find any
// inline records, and dig up their call sites.
Address start_addr = line_entry.range.GetBaseAddress();
Function *function = start_addr.CalculateSymbolContextFunction();

Declaration sought_decl(file_spec, line, column_num);
// We use this recursive function to descend the block structure looking
// for a block that has this Declaration as in it's CallSite info.
// This function recursively scans the sibling blocks of the incoming
// block parameter.
std::function<void(Block &)> examine_block =
[&sought_decl, &sc_list, &src_location_spec, resolve_scope,
&examine_block](Block &block) -> void {
// Iterate over the sibling child blocks of the incoming block.
Block *sibling_block = block.GetFirstChild();
while (sibling_block) {
// We only have to descend through the regular blocks, looking for
// immediate inlines, since those are the only ones that will have this
// callsite.
const InlineFunctionInfo *inline_info =
sibling_block->GetInlinedFunctionInfo();
if (inline_info) {
// If this is the call-site we are looking for, record that:
// We need to be careful because the call site from the debug info
// will generally have a column, but the user might not have specified
// it.
Declaration found_decl = inline_info->GetCallSite();
uint32_t sought_column = sought_decl.GetColumn();
if (found_decl.FileAndLineEqual(sought_decl, false) &&
(sought_column == LLDB_INVALID_COLUMN_NUMBER ||
sought_column == found_decl.GetColumn())) {
// If we found a call site, it belongs not in this inlined block,
// but in the parent block that inlined it.
Address parent_start_addr;
if (sibling_block->GetParent()->GetStartAddress(
parent_start_addr)) {
SymbolContext sc;
parent_start_addr.CalculateSymbolContext(&sc, resolve_scope);
// Now swap out the line entry for the one we found.
LineEntry call_site_line = sc.line_entry;
call_site_line.line = found_decl.GetLine();
call_site_line.column = found_decl.GetColumn();
bool matches_spec = true;
// If the user asked for an exact match, we need to make sure the
// call site we found actually matches the location.
if (src_location_spec.GetExactMatch()) {
matches_spec = false;
if ((src_location_spec.GetFileSpec() ==
sc.line_entry.GetFile()) &&
(src_location_spec.GetLine() &&
*src_location_spec.GetLine() == call_site_line.line) &&
(src_location_spec.GetColumn() &&
*src_location_spec.GetColumn() == call_site_line.column))
matches_spec = true;
}
if (matches_spec &&
sibling_block->GetRangeAtIndex(0, call_site_line.range)) {
SymbolContext call_site_sc(sc.target_sp, sc.module_sp,
sc.comp_unit, sc.function, sc.block,
&call_site_line, sc.symbol);
sc_list.Append(call_site_sc);
}
}
}
}

// Descend into the child blocks:
examine_block(*sibling_block);
// Now go to the next sibling:
sibling_block = sibling_block->GetSibling();
}
};

if (function) {
// We don't need to examine the function block, it can't be inlined.
Block &func_block = function->GetBlock(true);
examine_block(func_block);
}
// If we found entries here, we are done. We only get here because we
// didn't find an exact line entry for this line & column, but if we found
// an exact match from the call site info that's strictly better than
// continuing to look for matches further on in the file.
// FIXME: Should I also do this for "call site line exists between the
// given line number and the later line we found in the line table"? That's
// a closer approximation to our general sliding algorithm.
if (sc_list.GetSize())
return;
}

// If "exact == true", then "found_line" will be the same as "line". If
// "exact == false", the "found_line" will be the closest line entry
// with a line number greater than "line" and we will use this for our
Expand Down
171 changes: 56 additions & 115 deletions lldb/source/Target/StackFrameList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,121 +85,32 @@ void StackFrameList::ResetCurrentInlinedDepth() {
return;

std::lock_guard<std::recursive_mutex> guard(m_mutex);

GetFramesUpTo(0, DoNotAllowInterruption);
if (m_frames.empty())
return;
if (!m_frames[0]->IsInlined()) {
m_current_inlined_depth = UINT32_MAX;
m_current_inlined_pc = LLDB_INVALID_ADDRESS;
Log *log = GetLog(LLDBLog::Step);
if (log && log->GetVerbose())
LLDB_LOGF(
log,
"ResetCurrentInlinedDepth: Invalidating current inlined depth.\n");
return;
}

// We only need to do something special about inlined blocks when we are
// at the beginning of an inlined function:
// FIXME: We probably also have to do something special if the PC is at
// the END of an inlined function, which coincides with the end of either
// its containing function or another inlined function.

Block *block_ptr = m_frames[0]->GetFrameBlock();
if (!block_ptr)
return;
m_current_inlined_pc = LLDB_INVALID_ADDRESS;
m_current_inlined_depth = UINT32_MAX;

Address pc_as_address;
lldb::addr_t curr_pc = m_thread.GetRegisterContext()->GetPC();
pc_as_address.SetLoadAddress(curr_pc, &(m_thread.GetProcess()->GetTarget()));
AddressRange containing_range;
if (!block_ptr->GetRangeContainingAddress(pc_as_address, containing_range) ||
pc_as_address != containing_range.GetBaseAddress())
return;

// If we got here because of a breakpoint hit, then set the inlined depth
// depending on where the breakpoint was set. If we got here because of a
// crash, then set the inlined depth to the deepest most block. Otherwise,
// we stopped here naturally as the result of a step, so set ourselves in the
// containing frame of the whole set of nested inlines, so the user can then
// "virtually" step into the frames one by one, or next over the whole mess.
// Note: We don't have to handle being somewhere in the middle of the stack
// here, since ResetCurrentInlinedDepth doesn't get called if there is a
// valid inlined depth set.
StopInfoSP stop_info_sp = m_thread.GetStopInfo();
if (!stop_info_sp)
return;
switch (stop_info_sp->GetStopReason()) {
case eStopReasonWatchpoint:
case eStopReasonException:
case eStopReasonExec:
case eStopReasonFork:
case eStopReasonVFork:
case eStopReasonVForkDone:
case eStopReasonSignal:
// In all these cases we want to stop in the deepest frame.
m_current_inlined_pc = curr_pc;
m_current_inlined_depth = 0;
break;
case eStopReasonBreakpoint: {
// FIXME: Figure out what this break point is doing, and set the inline
// depth appropriately. Be careful to take into account breakpoints that
// implement step over prologue, since that should do the default
// calculation. For now, if the breakpoints corresponding to this hit are
// all internal, I set the stop location to the top of the inlined stack,
// since that will make things like stepping over prologues work right.
// But if there are any non-internal breakpoints I do to the bottom of the
// stack, since that was the old behavior.
uint32_t bp_site_id = stop_info_sp->GetValue();
BreakpointSiteSP bp_site_sp(
m_thread.GetProcess()->GetBreakpointSiteList().FindByID(bp_site_id));
bool all_internal = true;
if (bp_site_sp) {
uint32_t num_owners = bp_site_sp->GetNumberOfConstituents();
for (uint32_t i = 0; i < num_owners; i++) {
Breakpoint &bp_ref =
bp_site_sp->GetConstituentAtIndex(i)->GetBreakpoint();
if (!bp_ref.IsInternal()) {
all_internal = false;
}
}
}
if (!all_internal) {
m_current_inlined_pc = curr_pc;
m_current_inlined_depth = 0;
break;
}
}
[[fallthrough]];
default: {
// Otherwise, we should set ourselves at the container of the inlining, so
// that the user can descend into them. So first we check whether we have
// more than one inlined block sharing this PC:
int num_inlined_functions = 0;

for (Block *container_ptr = block_ptr->GetInlinedParent();
container_ptr != nullptr;
container_ptr = container_ptr->GetInlinedParent()) {
if (!container_ptr->GetRangeContainingAddress(pc_as_address,
containing_range))
break;
if (pc_as_address != containing_range.GetBaseAddress())
break;

num_inlined_functions++;
}
m_current_inlined_pc = curr_pc;
m_current_inlined_depth = num_inlined_functions + 1;
Log *log = GetLog(LLDBLog::Step);
bool inlined = true;
auto inline_depth = stop_info_sp->GetSuggestedStackFrameIndex(inlined);
// We're only adjusting the inlined stack here.
Log *log = GetLog(LLDBLog::Step);
if (inline_depth) {
m_current_inlined_depth = *inline_depth;
m_current_inlined_pc = m_thread.GetRegisterContext()->GetPC();

if (log && log->GetVerbose())
LLDB_LOGF(log,
"ResetCurrentInlinedDepth: setting inlined "
"depth: %d 0x%" PRIx64 ".\n",
m_current_inlined_depth, curr_pc);

break;
}
m_current_inlined_depth, m_current_inlined_pc);
} else {
if (log && log->GetVerbose())
LLDB_LOGF(
log,
"ResetCurrentInlinedDepth: Invalidating current inlined depth.\n");
}
}

Expand Down Expand Up @@ -816,19 +727,48 @@ void StackFrameList::SelectMostRelevantFrame() {

RecognizedStackFrameSP recognized_frame_sp = frame_sp->GetRecognizedFrame();

if (!recognized_frame_sp) {
LLDB_LOG(log, "Frame #0 not recognized");
return;
if (recognized_frame_sp) {
if (StackFrameSP most_relevant_frame_sp =
recognized_frame_sp->GetMostRelevantFrame()) {
LLDB_LOG(log, "Found most relevant frame at index {0}",
most_relevant_frame_sp->GetFrameIndex());
SetSelectedFrame(most_relevant_frame_sp.get());
return;
}
}
LLDB_LOG(log, "Frame #0 not recognized");

if (StackFrameSP most_relevant_frame_sp =
recognized_frame_sp->GetMostRelevantFrame()) {
LLDB_LOG(log, "Found most relevant frame at index {0}",
most_relevant_frame_sp->GetFrameIndex());
SetSelectedFrame(most_relevant_frame_sp.get());
} else {
LLDB_LOG(log, "No relevant frame!");
// If this thread has a non-trivial StopInof, then let it suggest
// a most relevant frame:
StopInfoSP stop_info_sp = m_thread.GetStopInfo();
uint32_t stack_idx = 0;
bool found_relevant = false;
if (stop_info_sp) {
// Here we're only asking the stop info if it wants to adjust the real stack
// index. We have to ask about the m_inlined_stack_depth in
// Thread::ShouldStop since the plans need to reason with that info.
bool inlined = false;
std::optional<uint32_t> stack_opt =
stop_info_sp->GetSuggestedStackFrameIndex(inlined);
if (stack_opt) {
stack_idx = *stack_opt;
found_relevant = true;
}
}

frame_sp = GetFrameAtIndex(stack_idx);
if (!frame_sp)
LLDB_LOG(log, "Stop info suggested relevant frame {0} but it didn't exist",
stack_idx);
else if (found_relevant)
LLDB_LOG(log, "Setting selected frame from stop info to {0}", stack_idx);
// Note, we don't have to worry about "inlined" frames here, because we've
// already calculated the inlined frame in Thread::ShouldStop, and
// SetSelectedFrame will take care of that adjustment for us.
SetSelectedFrame(frame_sp.get());

if (!found_relevant)
LLDB_LOG(log, "No relevant frame!");
}

uint32_t StackFrameList::GetSelectedFrameIndex(
Expand All @@ -841,6 +781,7 @@ uint32_t StackFrameList::GetSelectedFrameIndex(
// isn't set, then don't force a selection here, just return 0.
if (!select_most_relevant)
return 0;
// If the inlined stack frame is set, then use that:
m_selected_frame_idx = 0;
}
return *m_selected_frame_idx;
Expand Down
55 changes: 55 additions & 0 deletions lldb/source/Target/StopInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "lldb/Breakpoint/WatchpointResource.h"
#include "lldb/Core/Debugger.h"
#include "lldb/Expression/UserExpression.h"
#include "lldb/Symbol/Block.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/StopInfo.h"
#include "lldb/Target/Target.h"
Expand Down Expand Up @@ -246,6 +247,22 @@ class StopInfoBreakpoint : public StopInfo {
return m_description.c_str();
}

std::optional<uint32_t>
GetSuggestedStackFrameIndex(bool inlined_stack) override {
if (!inlined_stack)
return {};

ThreadSP thread_sp(m_thread_wp.lock());
if (!thread_sp)
return {};
BreakpointSiteSP bp_site_sp(
thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value));
if (!bp_site_sp)
return {};

return bp_site_sp->GetSuggestedStackFrameIndex();
}

protected:
bool ShouldStop(Event *event_ptr) override {
// This just reports the work done by PerformAction or the synchronous
Expand Down Expand Up @@ -1164,6 +1181,44 @@ class StopInfoTrace : public StopInfo {
else
return m_description.c_str();
}

std::optional<uint32_t>
GetSuggestedStackFrameIndex(bool inlined_stack) override {
// Trace only knows how to adjust inlined stacks:
if (!inlined_stack)
return {};

ThreadSP thread_sp = GetThread();
StackFrameSP frame_0_sp = thread_sp->GetStackFrameAtIndex(0);
if (!frame_0_sp)
return {};
if (!frame_0_sp->IsInlined())
return {};
Block *block_ptr = frame_0_sp->GetFrameBlock();
if (!block_ptr)
return {};
Address pc_address = frame_0_sp->GetFrameCodeAddress();
AddressRange containing_range;
if (!block_ptr->GetRangeContainingAddress(pc_address, containing_range) ||
pc_address != containing_range.GetBaseAddress())
return {};

int num_inlined_functions = 0;

for (Block *container_ptr = block_ptr->GetInlinedParent();
container_ptr != nullptr;
container_ptr = container_ptr->GetInlinedParent()) {
if (!container_ptr->GetRangeContainingAddress(pc_address,
containing_range))
break;
if (pc_address != containing_range.GetBaseAddress())
break;

num_inlined_functions++;
}
inlined_stack = true;
return num_inlined_functions + 1;
}
};

// StopInfoException
Expand Down
8 changes: 8 additions & 0 deletions lldb/source/Target/Thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,14 @@ void Thread::WillStop() {

void Thread::SetupForResume() {
if (GetResumeState() != eStateSuspended) {
// First check whether this thread is going to "actually" resume at all.
// For instance, if we're stepping from one level to the next of an
// virtual inlined call stack, we just change the inlined call stack index
// without actually running this thread. In that case, for this thread we
// shouldn't push a step over breakpoint plan or do that work.
if (GetCurrentPlan()->IsVirtualStep())
return;

// If we're at a breakpoint push the step-over breakpoint plan. Do this
// before telling the current plan it will resume, since we might change
// what the current plan is.
Expand Down
24 changes: 18 additions & 6 deletions lldb/source/Target/ThreadPlanStepInRange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ ThreadPlanStepInRange::ThreadPlanStepInRange(
"Step Range stepping in", thread, range, addr_context,
stop_others),
ThreadPlanShouldStopHere(this), m_step_past_prologue(true),
m_virtual_step(false), m_step_into_target(step_into_target) {
m_virtual_step(eLazyBoolCalculate), m_step_into_target(step_into_target) {
SetCallbacks();
SetFlagsToDefault();
SetupAvoidNoDebug(step_in_avoids_code_without_debug_info,
Expand Down Expand Up @@ -149,7 +149,7 @@ bool ThreadPlanStepInRange::ShouldStop(Event *event_ptr) {
m_sub_plan_sp.reset();
}

if (m_virtual_step) {
if (m_virtual_step == eLazyBoolYes) {
// If we've just completed a virtual step, all we need to do is check for a
// ShouldStopHere plan, and otherwise we're done.
// FIXME - This can be both a step in and a step out. Probably should
Expand Down Expand Up @@ -431,7 +431,7 @@ bool ThreadPlanStepInRange::DoPlanExplainsStop(Event *event_ptr) {

bool return_value = false;

if (m_virtual_step) {
if (m_virtual_step == eLazyBoolYes) {
return_value = true;
} else {
StopInfoSP stop_info_sp = GetPrivateStopInfo();
Expand Down Expand Up @@ -460,10 +460,13 @@ bool ThreadPlanStepInRange::DoPlanExplainsStop(Event *event_ptr) {

bool ThreadPlanStepInRange::DoWillResume(lldb::StateType resume_state,
bool current_plan) {
m_virtual_step = false;
m_virtual_step = eLazyBoolCalculate;
if (resume_state == eStateStepping && current_plan) {
Thread &thread = GetThread();
// See if we are about to step over a virtual inlined call.
// But if we already know we're virtual stepping, don't decrement the
// inlined depth again...

bool step_without_resume = thread.DecrementCurrentInlinedDepth();
if (step_without_resume) {
Log *log = GetLog(LLDBLog::Step);
Expand All @@ -476,11 +479,20 @@ bool ThreadPlanStepInRange::DoWillResume(lldb::StateType resume_state,
// FIXME: Maybe it would be better to create a InlineStep stop reason, but
// then
// the whole rest of the world would have to handle that stop reason.
m_virtual_step = true;
m_virtual_step = eLazyBoolYes;
}
return !step_without_resume;
}
return true;
}

bool ThreadPlanStepInRange::IsVirtualStep() { return m_virtual_step; }
bool ThreadPlanStepInRange::IsVirtualStep() {
if (m_virtual_step == eLazyBoolCalculate) {
Thread &thread = GetThread();
if (thread.GetCurrentInlinedDepth() == UINT32_MAX)
m_virtual_step = eLazyBoolNo;
else
m_virtual_step = eLazyBoolYes;
}
return m_virtual_step == eLazyBoolYes;
}
2 changes: 1 addition & 1 deletion lldb/source/Target/ThreadPlanStepOverRange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ bool ThreadPlanStepOverRange::DoWillResume(lldb::StateType resume_state,
if (in_inlined_stack) {
Log *log = GetLog(LLDBLog::Step);
LLDB_LOGF(log,
"ThreadPlanStepInRange::DoWillResume: adjusting range to "
"ThreadPlanStepOverRange::DoWillResume: adjusting range to "
"the frame at inlined depth %d.",
thread.GetCurrentInlinedDepth());
StackFrameSP stack_sp = thread.GetStackFrameAtIndex(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,3 @@ def test(self):
"frame variable ils",
substrs=['[4] = "surprise it is a long string!! yay!!"'],
)

self.expect("image list", substrs=self.getLibcPlusPlusLibs())
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,39 @@ def test_read_registers_using_g_packets(self):
target = self.createTarget("a.yaml")
process = self.connect(target)

self.assertEqual(1, self.server.responder.packetLog.count("g"))
self.server.responder.packetLog = []
# We want to make sure that the process is using the g packet, but it's
# not required the "connect" should read all registers. However, it might
# have... So we need to wait till we explicitly 'read_registers' to do
# test.
# Also, even with the use-g-packet-for-reading lldb will sometimes send p0
# early on to see if the packet is supported. So we can't say that there
# will be NO p packets.
# But there certainly should be no p packets after the g packet.

self.read_registers(process)
# Reading registers should not cause any 'p' packets to be exchanged.
print(f"\nPACKET LOG:\n{self.server.responder.packetLog}\n")
g_pos = 0
try:
g_pos = self.server.responder.packetLog.index("g")
except err:
self.fail("'g' packet not found after fetching registers")

try:
second_g = self.server.responder.packetLog.index("g", g_pos)
self.fail("Found more than one 'g' packet")
except:
pass

# Make sure there aren't any `p` packets after the `g` packet:
self.assertEqual(
0, len([p for p in self.server.responder.packetLog if p.startswith("p")])
0,
len(
[
p
for p in self.server.responder.packetLog[g_pos:]
if p.startswith("p")
]
),
)

def test_read_registers_using_p_packets(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def test_step_in_template_with_python_api(self):
self.build()
self.step_in_template()

@add_test_categories(["pyapi"])
def test_virtual_inline_stepping(self):
"""Test stepping through a virtual inlined call stack"""
self.build()
self.virtual_inline_stepping()

def setUp(self):
# Call super's setUp().
TestBase.setUp(self)
Expand Down Expand Up @@ -357,3 +363,60 @@ def step_in_template(self):

step_sequence = [["// In max_value specialized", "into"]]
self.run_step_sequence(step_sequence)

def run_to_call_site_and_step(self, source_regex, func_name, start_pos):
main_spec = lldb.SBFileSpec("calling.cpp")
# Set the breakpoint by file and line, not sourced regex because
# we want to make sure we can set breakpoints on call sites:
call_site_line_num = line_number(self.main_source, source_regex)
target, process, thread, bkpt = lldbutil.run_to_line_breakpoint(
self, main_spec, call_site_line_num
)

# Make sure that the location is at the call site (run_to_line_breakpoint already asserted
# that there's one location.):
bkpt_loc = bkpt.location[0]
strm = lldb.SBStream()
result = bkpt_loc.GetDescription(strm, lldb.eDescriptionLevelFull)

self.assertTrue(result, "Got a location description")
desc = strm.GetData()
self.assertIn(f"calling.cpp:{call_site_line_num}", desc, "Right line listed")
# We don't get the function name right yet - so we omit it in printing.
# Turn on this test when that is working.
# self.assertIn(func_name, desc, "Right function listed")

pc = thread.frame[0].pc
for i in range(start_pos, 3):
thread.StepInto()
frame_0 = thread.frame[0]

trivial_line_num = line_number(
self.main_source, f"In caller_trivial_inline_{i}."
)
self.assertEqual(
frame_0.line_entry.line,
trivial_line_num,
f"Stepped into the caller_trivial_inline_{i}",
)
if pc != frame_0.pc:
# If we get here, we stepped to the expected line number, but
# the compiler on this system has decided to insert an instruction
# between the call site of an inlined function with no arguments,
# returning void, and its immediate call to another void inlined function
# with no arguments. We aren't going to be testing virtual inline
# stepping for this function...
break

process.Kill()
target.Clear()

def virtual_inline_stepping(self):
"""Use the Python API's to step through a virtual inlined stack"""
self.run_to_call_site_and_step("At caller_trivial_inline_1", "main", 1)
self.run_to_call_site_and_step(
"In caller_trivial_inline_1", "caller_trivial_inline_1", 2
)
self.run_to_call_site_and_step(
"In caller_trivial_inline_2", "caller_trivial_inline_2", 3
)
25 changes: 25 additions & 0 deletions lldb/test/API/functionalities/inline-stepping/calling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ int called_by_inline_ref (int &value);
inline void inline_trivial_1 () __attribute__((always_inline));
inline void inline_trivial_2 () __attribute__((always_inline));

// These three should share the same initial pc so we can test
// virtual inline stepping.
inline void caller_trivial_inline_1() __attribute__((always_inline));
inline void caller_trivial_inline_2() __attribute__((always_inline));
inline void caller_trivial_inline_3() __attribute__((always_inline));

void caller_trivial_1 ();
void caller_trivial_2 ();

Expand Down Expand Up @@ -79,6 +85,23 @@ caller_trivial_2 ()
inline_value += 1; // At increment in caller_trivial_2.
}

// When you call caller_trivial_inline_1, the inlined call-site
// should share a PC with all three of the following inlined
// functions, so we can exercise "virtual inline stepping".
void caller_trivial_inline_1() {
caller_trivial_inline_2(); // In caller_trivial_inline_1.
inline_value += 1;
}

void caller_trivial_inline_2() {
caller_trivial_inline_3(); // In caller_trivial_inline_2.
inline_value += 1;
}

void caller_trivial_inline_3() {
inline_value += 1; // In caller_trivial_inline_3.
}

void
called_by_inline_trivial ()
{
Expand Down Expand Up @@ -132,5 +155,7 @@ main (int argc, char **argv)
max_value(123, 456); // Call max_value template
max_value(std::string("abc"), std::string("0022")); // Call max_value specialized

caller_trivial_inline_1(); // At caller_trivial_inline_1.

return 0; // About to return from main.
}
7 changes: 5 additions & 2 deletions lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
from lldbsuite.test import lldbutil

from lldbsuite.test import lldbplatformutil

class NamespaceLookupTestCase(TestBase):
def setUp(self):
Expand Down Expand Up @@ -167,7 +167,10 @@ def test_scope_lookup_with_run_command(self):
self.runToBkpt("continue")
# FIXME: In DWARF 5 with dsyms, the ordering of functions is slightly
# different, which also hits the same issues mentioned previously.
if configuration.dwarf_version <= 4 or self.getDebugInfo() == "dwarf":
if (
int(lldbplatformutil.getDwarfVersion()) <= 4
or self.getDebugInfo() == "dwarf"
):
self.expect_expr("func()", result_type="int", result_value="2")

# Continue to BP_ns_scope at ns scope
Expand Down
39 changes: 23 additions & 16 deletions lldb/test/API/python_api/process/io/TestProcessIO.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,31 +99,38 @@ def test_stdout_stderr_redirection(self):
@expectedFlakeyLinux(bugnumber="llvm.org/pr26437")
@skipIfDarwinEmbedded # debugserver can't create/write files on the device
def test_stdout_stderr_redirection_to_existing_files(self):
"""Exercise SBLaunchInfo::AddOpenFileAction() for STDOUT and STDERR without redirecting STDIN to output files already exist."""
"""Exercise SBLaunchInfo::AddOpenFileAction() for STDOUT and STDERR redirect to output files already exist."""
self.setup_test()
self.build()
self.create_target()
self.write_file_with_placeholder(self.output_file)
self.write_file_with_placeholder(self.error_file)
self.redirect_stdout()
self.redirect_stderr()
self.run_process(True)
output = self.read_output_file_and_delete()
error = self.read_error_file_and_delete()
self.check_process_output(output, error)

def write_file_with_placeholder(self, target_file):
# Create the output and error files with placeholder
placeholder = "This content should be overwritten."
# Local file directory and working directory are the same for local debugging
f = open(self.local_output_file, "w")
f.write(placeholder)
f.close()
f = open(self.local_error_file, "w")
f.write(placeholder)
f.close()
if lldb.remote_platform:
self.runCmd(
'platform file write "{target}" -d "{data}"'.format(
target=target_file, data=placeholder
'platform put-file "{local}" "{remote}"'.format(
local=self.local_output_file, remote=self.output_file
)
)
self.runCmd(
'platform put-file "{local}" "{remote}"'.format(
local=self.local_error_file, remote=self.error_file
)
)
else:
f = open(target_file, "w")
f.write(placeholder)
f.close()

self.redirect_stdout()
self.redirect_stderr()
self.run_process(True)
output = self.read_output_file_and_delete()
error = self.read_error_file_and_delete()
self.check_process_output(output, error)

# target_file - path on local file system or remote file system if running remote
# local_file - path on local system
Expand Down
4 changes: 2 additions & 2 deletions lldb/test/API/python_api/type/TestTypeList.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
from lldbsuite.test import lldbutil

from lldbsuite.test import lldbplatformutil

class TypeAndTypeListTestCase(TestBase):
def setUp(self):
Expand Down Expand Up @@ -248,7 +248,7 @@ def test(self):
self.assertEqual(myint_arr_element_type, myint_type)

# Test enum methods. Requires DW_AT_enum_class which was added in Dwarf 4.
if configuration.dwarf_version >= 4:
if int(lldbplatformutil.getDwarfVersion()) >= 4:
enum_type = target.FindFirstType("EnumType")
self.assertTrue(enum_type)
self.DebugSBType(enum_type)
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1267,7 +1267,7 @@ class MachineIRBuilder {
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res,
const SrcOp &Op0, const SrcOp &Op1,
std::optional<unsigned> Flags = std::nullopt);
std::optional<unsigned> Flgs = std::nullopt);

/// Build and insert a \p Res = G_FCMP \p Pred\p Op0, \p Op1
///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLEMANAGER_H
#define LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLEMANAGER_H
#ifndef LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLESYMBOLMANAGER_H
#define LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLESYMBOLMANAGER_H

#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/RedirectionManager.h"
Expand Down Expand Up @@ -103,4 +103,4 @@ class JITLinkRedirectableSymbolManager : public RedirectableSymbolManager,
} // namespace orc
} // namespace llvm

#endif
#endif // LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLESYMBOLMANAGER_H
2 changes: 1 addition & 1 deletion llvm/include/llvm/ExecutionEngine/Orc/ReOptimizeLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,4 @@ class ReOptimizeLayer : public IRLayer, public ResourceManager {
} // namespace orc
} // namespace llvm

#endif
#endif // LLVM_EXECUTIONENGINE_ORC_REOPTIMIZELAYER_H
8 changes: 8 additions & 0 deletions llvm/include/llvm/Transforms/Utils/Cloning.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,14 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr);

/// Clone OldFunc's attributes into NewFunc, transforming values based on the
/// mappings in VMap.
void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr);

void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
const Instruction *StartingInst,
ValueToValueMapTy &VMap, bool ModuleLevelChanges,
Expand Down
42 changes: 20 additions & 22 deletions llvm/lib/Target/AArch64/AArch64RegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,24 @@ class AArch64Reg<bits<16> enc, string n, list<Register> subregs = [],
}

let Namespace = "AArch64" in {
// SubRegIndexes for GPR registers
def sub_32 : SubRegIndex<32>;
def sube64 : SubRegIndex<64>;
def subo64 : SubRegIndex<64>;
def sube32 : SubRegIndex<32>;
def subo32 : SubRegIndex<32>;

// SubRegIndexes for FPR/Vector registers
def bsub : SubRegIndex<8>;
def hsub : SubRegIndex<16>;
def ssub : SubRegIndex<32>;
def dsub : SubRegIndex<64>;
def sube32 : SubRegIndex<32>;
def subo32 : SubRegIndex<32>;
def sube64 : SubRegIndex<64>;
def subo64 : SubRegIndex<64>;
// SVE
def zsub : SubRegIndex<128>;
def zsub : SubRegIndex<128>;
// Note: Code depends on these having consecutive numbers
def zsub0 : SubRegIndex<128, -1>;
def zsub1 : SubRegIndex<128, -1>;
def zsub2 : SubRegIndex<128, -1>;
def zsub3 : SubRegIndex<128, -1>;
// Note: Code depends on these having consecutive numbers
def dsub0 : SubRegIndex<64>;
def dsub1 : SubRegIndex<64>;
Expand All @@ -41,7 +47,8 @@ let Namespace = "AArch64" in {
def qsub1 : SubRegIndex<128>;
def qsub2 : SubRegIndex<128>;
def qsub3 : SubRegIndex<128>;
// Note: Code depends on these having consecutive numbers

// SubRegIndexes for SME Matrix tiles
def zasubb : SubRegIndex<2048>; // (16 x 16)/1 bytes = 2048 bits
def zasubh0 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits
def zasubh1 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits
Expand All @@ -52,7 +59,11 @@ let Namespace = "AArch64" in {
def zasubq0 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits
def zasubq1 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits

def psub : SubRegIndex<16>;
// SubRegIndexes for SVE Predicates
def psub : SubRegIndex<16>;
// Note: Code depends on these having consecutive numbers
def psub0 : SubRegIndex<16, -1>;
def psub1 : SubRegIndex<16, -1>;
}

let Namespace = "AArch64" in {
Expand Down Expand Up @@ -1026,11 +1037,6 @@ def PNR16_p8to15 : PNRP8to15RegOp<"h", PNRAsmOp16_p8to15, 16, PNR_p8to15>;
def PNR32_p8to15 : PNRP8to15RegOp<"s", PNRAsmOp32_p8to15, 32, PNR_p8to15>;
def PNR64_p8to15 : PNRP8to15RegOp<"d", PNRAsmOp64_p8to15, 64, PNR_p8to15>;

let Namespace = "AArch64" in {
def psub0 : SubRegIndex<16, -1>;
def psub1 : SubRegIndex<16, -1>;
}

class PPRorPNRClass : RegisterClass<
"AArch64",
[ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1, aarch64svcount ], 16,
Expand Down Expand Up @@ -1123,8 +1129,7 @@ let EncoderMethod = "EncodeRegMul_MinMax<2, 0, 14>",
} // end let EncoderMethod/DecoderMethod


//******************************************************************************

//===----------------------------------------------------------------------===//
// SVE vector register classes
class ZPRClass<int firstreg, int lastreg, int step = 1> : RegisterClass<"AArch64",
[nxv16i8, nxv8i16, nxv4i32, nxv2i64,
Expand Down Expand Up @@ -1245,13 +1250,6 @@ def FPR32asZPR : FPRasZPROperand<32>;
def FPR64asZPR : FPRasZPROperand<64>;
def FPR128asZPR : FPRasZPROperand<128>;

let Namespace = "AArch64" in {
def zsub0 : SubRegIndex<128, -1>;
def zsub1 : SubRegIndex<128, -1>;
def zsub2 : SubRegIndex<128, -1>;
def zsub3 : SubRegIndex<128, -1>;
}

// Pairs, triples, and quads of SVE vector registers.
def ZSeqPairs : RegisterTuples<[zsub0, zsub1], [(rotl ZPR, 0), (rotl ZPR, 1)]>;
def ZSeqTriples : RegisterTuples<[zsub0, zsub1, zsub2], [(rotl ZPR, 0), (rotl ZPR, 1), (rotl ZPR, 2)]>;
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1017,6 +1017,12 @@ let Predicates = [HasSME2p2] in {
defm FMUL_2Z2Z : sme2_multi2_fmul_mm< "fmul">;
defm FMUL_4ZZ : sme2_multi4_fmul_sm<"fmul">;
defm FMUL_4Z4Z : sme2_multi4_fmul_mm< "fmul">;

defm FMOP4A : sme2_fmop4as_fp32_non_widening<0, "fmop4a">;
defm FMOP4S : sme2_fmop4as_fp32_non_widening<1, "fmop4s">;

defm FMOP4A : sme2_fmop4as_fp16_fp32_widening<0, "fmop4a">;
defm FMOP4S : sme2_fmop4as_fp16_fp32_widening<1, "fmop4s">;
} // [HasSME2p2]

let Predicates = [HasSME2p2, HasSMEB16B16] in {
Expand Down Expand Up @@ -1052,3 +1058,8 @@ let Predicates = [HasSME2p2, HasSMEB16B16] in {
defm BFMOP4A : sme2_bfmop4as_non_widening<0, "bfmop4a">;
defm BFMOP4S : sme2_bfmop4as_non_widening<1, "bfmop4s">;
}

let Predicates = [HasSME2p2, HasSMEF64F64] in {
defm FMOP4A : sme2_fmop4as_fp64_non_widening<0, "fmop4a">;
defm FMOP4S : sme2_fmop4as_fp64_non_widening<1, "fmop4s">;
}
9 changes: 9 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ static cl::alias AArch64StreamingStackHazardSize(
cl::desc("alias for -aarch64-streaming-hazard-size"),
cl::aliasopt(AArch64StreamingHazardSize));

// Subreg liveness tracking is disabled by default for now until all issues
// are ironed out. This option allows the feature to be used in tests.
static cl::opt<bool>
EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking",
cl::init(false), cl::Hidden,
cl::desc("Enable subreg liveness tracking"));

unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
return OverrideVectorInsertExtractBaseCost;
Expand Down Expand Up @@ -380,6 +387,8 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
ReserveXRegisterForRA.set(29);

AddressCheckPSV.reset(new AddressCheckPseudoSourceValue(TM));

EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
}

const CallLowering *AArch64Subtarget::getCallLowering() const {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
unsigned VScaleForTuning = 2;
TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;

bool EnableSubregLiveness;

/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;

Expand Down Expand Up @@ -153,6 +155,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override { return true; }
bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
bool enableSubRegLiveness() const override { return EnableSubregLiveness; }

bool enableMachinePipeliner() const override;
bool useDFAforSMS() const override { return false; }
Expand Down
111 changes: 111 additions & 0 deletions llvm/lib/Target/AArch64/SMEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -5454,3 +5454,114 @@ multiclass sme2_bfmop4as_non_widening<bit S, string mnemonic> {
// Multiple vectors
def _M2Z2Z_H : sme2_bf16_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
}

class sme2_fp32_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
: I<(outs TileOp32:$ZAda),
(ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
mnemonic, "\t$ZAda, $Zn, $Zm",
"", []>, Sched<[]> {
bits<2> ZAda;
bits<3> Zn;
bits<3> Zm;

let Inst{31-21} = 0b10000000000;
let Inst{20} = M;
let Inst{19-17} = Zm;
let Inst{16-10} = 0b0000000;
let Inst{9} = N;
let Inst{8-6} = Zn;
let Inst{5} = 0;
let Inst{4} = S;
let Inst{3-2} = 0b00;
let Inst{1-0} = ZAda;

let Constraints = "$ZAda = $_ZAda";
}

multiclass sme2_fmop4as_fp32_non_widening<bit S, string mnemonic> {
// Single vectors
def _MZZ_S : sme2_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR32Mul2_Lo, ZPR32Mul2_Hi>;

// Multiple and single vectors
def _M2ZZ_S : sme2_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZPR32Mul2_Hi>;

// Single and multiple vectors
def _MZ2Z_S : sme2_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR32Mul2_Lo, ZZ_s_mul_r_Hi>;

// Multiple vectors
def _M2Z2Z_S : sme2_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZZ_s_mul_r_Hi>;
}

class sme2_fp64_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
: I<(outs TileOp64:$ZAda),
(ins TileOp64:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
mnemonic, "\t$ZAda, $Zn, $Zm",
"", []>, Sched<[]> {
bits<3> ZAda;
bits<3> Zn;
bits<3> Zm;

let Inst{31-21} = 0b10000000110;
let Inst{20} = M;
let Inst{19-17} = Zm;
let Inst{16-10} = 0b0000000;
let Inst{9} = N;
let Inst{8-6} = Zn;
let Inst{5} = 0;
let Inst{4} = S;
let Inst{3} = 0b1;
let Inst{2-0} = ZAda;

let Constraints = "$ZAda = $_ZAda";
}

multiclass sme2_fmop4as_fp64_non_widening<bit S, string mnemonic> {
// Single vectors
def _MZZ_D : sme2_fp64_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR64Mul2_Lo, ZPR64Mul2_Hi>;

// Multiple and single vectors
def _M2ZZ_D : sme2_fp64_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZPR64Mul2_Hi>;

// Single and multiple vectors
def _MZ2Z_D : sme2_fp64_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR64Mul2_Lo, ZZ_d_mul_r_Hi>;

// Multiple vectors
def _M2Z2Z_D : sme2_fp64_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZZ_d_mul_r_Hi>;
}

class sme2_fp16_fp32_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
: I<(outs TileOp32:$ZAda),
(ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
mnemonic, "\t$ZAda, $Zn, $Zm",
"", []>, Sched<[]> {
bits<2> ZAda;
bits<3> Zn;
bits<3> Zm;

let Inst{31-21} = 0b10000001001;
let Inst{20} = M;
let Inst{19-17} = Zm;
let Inst{16-10} = 0b0000000;
let Inst{9} = N;
let Inst{8-6} = Zn;
let Inst{5} = 0;
let Inst{4} = S;
let Inst{3-2} = 0b00;
let Inst{1-0} = ZAda;

let Constraints = "$ZAda = $_ZAda";
}

multiclass sme2_fmop4as_fp16_fp32_widening<bit S, string mnemonic> {
// Single vectors
def _MZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>;

// Multiple and single vectors
def _M2ZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>;

// Single and multiple vectors
def _MZ2Z_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>;

// Multiple vectors
def _M2Z2Z_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
}
7 changes: 0 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,13 +230,6 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
return AddrReg.getReg(0);
}

void assignValueToReg(Register ValVReg, Register PhysReg,
const CCValAssign &VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
}

void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
const MachinePointerInfo &MPO,
const CCValAssign &VA) override {
Expand Down
15 changes: 8 additions & 7 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3855,10 +3855,14 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,

unsigned ArgIdx = 0;
for (auto [Reg, Val] : RegsToPass) {
if (ArgIdx++ >= NumSpecialInputs && !Val->isDivergent() &&
TRI->isSGPRPhysReg(Reg)) {
// Speculatively insert a readfirstlane in case this is a uniform value in
// a VGPR.
if (ArgIdx++ >= NumSpecialInputs &&
(IsChainCallConv || !Val->isDivergent()) && TRI->isSGPRPhysReg(Reg)) {
// For chain calls, the inreg arguments are required to be
// uniform. Speculatively Insert a readfirstlane in case we cannot prove
// they are uniform.
//
// For other calls, if an inreg arguments is known to be uniform,
// speculatively insert a readfirstlane in case it is in a VGPR.
//
// FIXME: We need to execute this in a waterfall loop if it is a divergent
// value, so let that continue to produce invalid code.
Expand Down Expand Up @@ -3893,9 +3897,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64));
} else {
if (IsTailCall) {
assert(!Callee->isDivergent() &&
"cannot tail call a divergent call target");

// isEligibleForTailCallOptimization considered whether the call target is
// divergent, but we may still end up with a uniform value in a VGPR.
// Insert a readfirstlane just in case.
Expand Down
13 changes: 0 additions & 13 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -3553,19 +3553,6 @@ def : AMDGPUPat <
(V_BFE_U32_e64 $src, (i32 0), $width)
>;

// x << (bitwidth - y) >> (bitwidth - y)
def : AMDGPUPat <
(DivergentBinFrag<srl> (shl_oneuse i32:$src, (sub 32, i32:$width)),
(sub 32, i32:$width)),
(V_BFE_U32_e64 $src, (i32 0), $width)
>;

def : AMDGPUPat <
(DivergentBinFrag<sra> (shl_oneuse i32:$src, (sub 32, i32:$width)),
(sub 32, i32:$width)),
(V_BFE_I32_e64 $src, (i32 0), $width)
>;

// SHA-256 Ma patterns

// ((x & z) | (y & (x | z))) -> BFI (XOR x, y), z, y
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1335,8 +1335,6 @@ NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
VT.getScalarType() == MVT::i1)
return TypeSplitVector;
if (Isv2x16VT(VT))
return TypeLegal;
return TargetLoweringBase::getPreferredVectorAction(VT);
}

Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,9 @@ enum OperandType : unsigned {
OPERAND_VEC_POLICY,
// Vector SEW operand.
OPERAND_SEW,
OPERAND_LAST_RISCV_IMM = OPERAND_SEW,
// Vector rounding mode for VXRM or FRM.
OPERAND_VEC_RM,
OPERAND_LAST_RISCV_IMM = OPERAND_VEC_RM,
// Operand is either a register or uimm5, this is used by V extension pseudo
// instructions to represent a value that be passed as AVL to either vsetvli
// or vsetivli.
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,7 +693,7 @@ bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {

// The constants that can be encoded in the THeadMemIdx instructions
// are of the form (sign_extend(imm5) << imm2).
int64_t Shift;
unsigned Shift;
for (Shift = 0; Shift < 4; Shift++)
if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
break;
Expand Down Expand Up @@ -3366,7 +3366,7 @@ bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
SDValue &Shl2) {
if (auto *C = dyn_cast<ConstantSDNode>(N)) {
int64_t Offset = C->getSExtValue();
int64_t Shift;
unsigned Shift;
for (Shift = 0; Shift < 4; Shift++)
if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
break;
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2551,6 +2551,13 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
case RISCVOp::OPERAND_SEW:
Ok = Imm == 0 || (Imm >= 3 && Imm <= 6);
break;
case RISCVOp::OPERAND_VEC_RM:
assert(RISCVII::hasRoundModeOp(Desc.TSFlags));
if (RISCVII::usesVXRM(Desc.TSFlags))
Ok = isUInt<2>(Imm);
else
Ok = RISCVFPRndMode::isValidRoundingMode(Imm);
break;
}
if (!Ok) {
ErrInfo = "Invalid immediate";
Expand Down Expand Up @@ -2623,6 +2630,13 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}

if (int Idx = RISCVII::getFRMOpNum(Desc);
Idx >= 0 && MI.getOperand(Idx).getImm() == RISCVFPRndMode::DYN &&
!MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr)) {
ErrInfo = "dynamic rounding mode should read FRM";
return false;
}

return true;
}

Expand Down
252 changes: 74 additions & 178 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -2639,23 +2639,23 @@ foreach fvti = AllFloatVectors in {
// 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
defm : VPatConvertFP2IVL_V_RM<riscv_vfcvt_xu_f_vl, "PseudoVFCVT_XU_F_V">;
defm : VPatConvertFP2IVL_V_RM<riscv_vfcvt_x_f_vl, "PseudoVFCVT_X_F_V">;
defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFCVT_RM_XU_F_V">;
defm : VPatConvertFP2I_RM_VL_V<any_riscv_vfcvt_rm_x_f_vl, "PseudoVFCVT_RM_X_F_V">;
defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFCVT_XU_F_V">;
defm : VPatConvertFP2I_RM_VL_V<any_riscv_vfcvt_rm_x_f_vl, "PseudoVFCVT_X_F_V">;

defm : VPatConvertFP2IVL_V<any_riscv_vfcvt_rtz_xu_f_vl, "PseudoVFCVT_RTZ_XU_F_V">;
defm : VPatConvertFP2IVL_V<any_riscv_vfcvt_rtz_x_f_vl, "PseudoVFCVT_RTZ_X_F_V">;

defm : VPatConvertI2FPVL_V_RM<any_riscv_uint_to_fp_vl, "PseudoVFCVT_F_XU_V">;
defm : VPatConvertI2FPVL_V_RM<any_riscv_sint_to_fp_vl, "PseudoVFCVT_F_X_V">;

defm : VPatConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_xu_vl, "PseudoVFCVT_RM_F_XU_V">;
defm : VPatConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_x_vl, "PseudoVFCVT_RM_F_X_V">;
defm : VPatConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_xu_vl, "PseudoVFCVT_F_XU_V">;
defm : VPatConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_x_vl, "PseudoVFCVT_F_X_V">;

// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
defm : VPatWConvertFP2IVL_V_RM<riscv_vfcvt_xu_f_vl, "PseudoVFWCVT_XU_F_V">;
defm : VPatWConvertFP2IVL_V_RM<riscv_vfcvt_x_f_vl, "PseudoVFWCVT_X_F_V">;
defm : VPatWConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFWCVT_RM_XU_F_V">;
defm : VPatWConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl, "PseudoVFWCVT_RM_X_F_V">;
defm : VPatWConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFWCVT_XU_F_V">;
defm : VPatWConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl, "PseudoVFWCVT_X_F_V">;

defm : VPatWConvertFP2IVL_V<any_riscv_vfcvt_rtz_xu_f_vl, "PseudoVFWCVT_RTZ_XU_F_V">;
defm : VPatWConvertFP2IVL_V<any_riscv_vfcvt_rtz_x_f_vl, "PseudoVFWCVT_RTZ_X_F_V">;
Expand Down Expand Up @@ -2696,17 +2696,17 @@ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
// 13.19 Narrowing Floating-Point/Integer Type-Convert Instructions
defm : VPatNConvertFP2IVL_W_RM<riscv_vfcvt_xu_f_vl, "PseudoVFNCVT_XU_F_W">;
defm : VPatNConvertFP2IVL_W_RM<riscv_vfcvt_x_f_vl, "PseudoVFNCVT_X_F_W">;
defm : VPatNConvertFP2I_RM_VL_W<riscv_vfcvt_rm_xu_f_vl, "PseudoVFNCVT_RM_XU_F_W">;
defm : VPatNConvertFP2I_RM_VL_W<riscv_vfcvt_rm_x_f_vl, "PseudoVFNCVT_RM_X_F_W">;
defm : VPatNConvertFP2I_RM_VL_W<riscv_vfcvt_rm_xu_f_vl, "PseudoVFNCVT_XU_F_W">;
defm : VPatNConvertFP2I_RM_VL_W<riscv_vfcvt_rm_x_f_vl, "PseudoVFNCVT_X_F_W">;

defm : VPatNConvertFP2IVL_W<any_riscv_vfcvt_rtz_xu_f_vl, "PseudoVFNCVT_RTZ_XU_F_W">;
defm : VPatNConvertFP2IVL_W<any_riscv_vfcvt_rtz_x_f_vl, "PseudoVFNCVT_RTZ_X_F_W">;

defm : VPatNConvertI2FPVL_W_RM<any_riscv_uint_to_fp_vl, "PseudoVFNCVT_F_XU_W">;
defm : VPatNConvertI2FPVL_W_RM<any_riscv_sint_to_fp_vl, "PseudoVFNCVT_F_X_W">;

defm : VPatNConvertI2FP_RM_VL_W<riscv_vfcvt_rm_f_xu_vl, "PseudoVFNCVT_RM_F_XU_W">;
defm : VPatNConvertI2FP_RM_VL_W<riscv_vfcvt_rm_f_x_vl, "PseudoVFNCVT_RM_F_X_W">;
defm : VPatNConvertI2FP_RM_VL_W<riscv_vfcvt_rm_f_xu_vl, "PseudoVFNCVT_F_XU_W">;
defm : VPatNConvertI2FP_RM_VL_W<riscv_vfcvt_rm_f_x_vl, "PseudoVFNCVT_F_X_W">;

foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,8 @@ let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq",
def VFWMACC_4x4x4 : CustomSiFiveVMACC<0b111100, OPFVV, "sf.vfwmacc.4x4x4">;
}

let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf" in {
let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf",
Uses = [FRM] in {
def VFNRCLIP_XU_F_QF : CustomSiFiveVFNRCLIP<0b100010, OPFVF, "sf.vfnrclip.xu.f.qf">;
def VFNRCLIP_X_F_QF : CustomSiFiveVFNRCLIP<0b100011, OPFVF, "sf.vfnrclip.x.f.qf">;
}
Expand Down Expand Up @@ -405,7 +406,7 @@ multiclass VPseudoSiFiveVFWMACC<string Constraint = ""> {

multiclass VPseudoSiFiveVFNRCLIP<string Constraint = "@earlyclobber $rd"> {
foreach i = 0-4 in
let hasSideEffects = 0 in
let hasSideEffects = 0, hasPostISelHook = 1 in
defm "Pseudo" # NAME : VPseudoBinaryRoundingMode<MxListW[i].vrclass,
MxListVF4[i].vrclass,
FPR32, MxListW[i],
Expand Down
31 changes: 19 additions & 12 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -723,8 +723,7 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(

// The interleaved memory access pass will lower interleaved memory ops (i.e
// a load and store followed by a specific shuffle) to vlseg/vsseg
// intrinsics. In those cases then we can treat it as if it's just one (legal)
// memory op
// intrinsics.
if (!UseMaskForCond && !UseMaskForGaps &&
Factor <= TLI->getMaxSupportedInterleaveFactor()) {
auto *VTy = cast<VectorType>(VecTy);
Expand All @@ -734,19 +733,27 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
auto *SubVecTy =
VectorType::get(VTy->getElementType(),
VTy->getElementCount().divideCoefficientBy(Factor));

if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
AddressSpace, DL)) {
// FIXME: We use the memory op cost of the *legalized* type here,
// because it's getMemoryOpCost returns a really expensive cost for
// types like <6 x i8>, which show up when doing interleaves of
// Factor=3 etc. Should the memory op cost of these be cheaper?
auto *LegalVTy = VectorType::get(VTy->getElementType(),
LT.second.getVectorElementCount());
InstructionCost LegalMemCost = getMemoryOpCost(
Opcode, LegalVTy, Alignment, AddressSpace, CostKind);
return LT.first + LegalMemCost;

// Most available hardware today optimizes NF=2 as as one wide memory op
// + Factor * LMUL shuffle ops.
if (Factor == 2) {
InstructionCost Cost =
getMemoryOpCost(Opcode, VTy, Alignment, AddressSpace, CostKind);
MVT SubVecVT = getTLI()->getValueType(DL, SubVecTy).getSimpleVT();
Cost += Factor * TLI->getLMULCost(SubVecVT);
return LT.first * Cost;
}

// Otherwise, the cost is proportional to the number of elements (VL *
// Factor ops).
InstructionCost MemOpCost =
getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0,
CostKind, {TTI::OK_AnyValue, TTI::OP_None});
unsigned NumLoads = getEstimatedVLFor(VTy);
return NumLoads * MemOpCost;
}
}
}
Expand Down
43 changes: 35 additions & 8 deletions llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -713,21 +713,36 @@ Register SPIRVGlobalRegistry::buildGlobalVariable(
return Reg;
}

static std::string GetSpirvImageTypeName(const SPIRVType *Type,
MachineIRBuilder &MIRBuilder,
const std::string &Prefix);

static std::string buildSpirvTypeName(const SPIRVType *Type,
MachineIRBuilder &MIRBuilder) {
switch (Type->getOpcode()) {
case SPIRV::OpTypeSampledImage: {
return GetSpirvImageTypeName(Type, MIRBuilder, "sampled_image_");
}
case SPIRV::OpTypeImage: {
Register SampledTypeReg = Type->getOperand(1).getReg();
auto *SampledType = MIRBuilder.getMRI()->getUniqueVRegDef(SampledTypeReg);
std::string TypeName =
"image_" + buildSpirvTypeName(SampledType, MIRBuilder);
for (uint32_t I = 2; I < Type->getNumOperands(); ++I) {
TypeName = (TypeName + '_' + Twine(Type->getOperand(I).getImm())).str();
}
return TypeName;
return GetSpirvImageTypeName(Type, MIRBuilder, "image_");
}
case SPIRV::OpTypeArray: {
MachineRegisterInfo *MRI = MIRBuilder.getMRI();
Register ElementTypeReg = Type->getOperand(1).getReg();
auto *ElementType = MRI->getUniqueVRegDef(ElementTypeReg);
const SPIRVType *TypeInst = MRI->getVRegDef(Type->getOperand(2).getReg());
assert(TypeInst->getOpcode() != SPIRV::OpConstantI);
MachineInstr *ImmInst = MRI->getVRegDef(TypeInst->getOperand(1).getReg());
assert(ImmInst->getOpcode() == TargetOpcode::G_CONSTANT);
uint32_t ArraySize = ImmInst->getOperand(1).getCImm()->getZExtValue();
return (buildSpirvTypeName(ElementType, MIRBuilder) + Twine("[") +
Twine(ArraySize) + Twine("]"))
.str();
}
case SPIRV::OpTypeFloat:
return ("f" + Twine(Type->getOperand(1).getImm())).str();
case SPIRV::OpTypeSampler:
return ("sampler");
case SPIRV::OpTypeInt:
if (Type->getOperand(2).getImm())
return ("i" + Twine(Type->getOperand(1).getImm())).str();
Expand All @@ -737,6 +752,18 @@ static std::string buildSpirvTypeName(const SPIRVType *Type,
}
}

static std::string GetSpirvImageTypeName(const SPIRVType *Type,
MachineIRBuilder &MIRBuilder,
const std::string &Prefix) {
Register SampledTypeReg = Type->getOperand(1).getReg();
auto *SampledType = MIRBuilder.getMRI()->getUniqueVRegDef(SampledTypeReg);
std::string TypeName = Prefix + buildSpirvTypeName(SampledType, MIRBuilder);
for (uint32_t I = 2; I < Type->getNumOperands(); ++I) {
TypeName = (TypeName + '_' + Twine(Type->getOperand(I).getImm())).str();
}
return TypeName;
}

Register SPIRVGlobalRegistry::getOrCreateGlobalVariableWithBinding(
const SPIRVType *VarType, uint32_t Set, uint32_t Binding,
MachineIRBuilder &MIRBuilder) {
Expand Down
43 changes: 37 additions & 6 deletions llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ class SPIRVInstructionSelector : public InstructionSelector {
SPIRVType *SrcPtrTy) const;
Register buildPointerToResource(const SPIRVType *ResType, uint32_t Set,
uint32_t Binding, uint32_t ArraySize,
Register IndexReg, bool IsNonUniform,
MachineIRBuilder MIRBuilder) const;
};

Expand Down Expand Up @@ -2616,10 +2617,15 @@ void SPIRVInstructionSelector::selectHandleFromBinding(Register &ResVReg,
uint32_t Set = foldImm(I.getOperand(2), MRI);
uint32_t Binding = foldImm(I.getOperand(3), MRI);
uint32_t ArraySize = foldImm(I.getOperand(4), MRI);
Register IndexReg = I.getOperand(5).getReg();
bool IsNonUniform = ArraySize > 1 && foldImm(I.getOperand(6), MRI);

MachineIRBuilder MIRBuilder(I);
Register VarReg =
buildPointerToResource(ResType, Set, Binding, ArraySize, MIRBuilder);
Register VarReg = buildPointerToResource(ResType, Set, Binding, ArraySize,
IndexReg, IsNonUniform, MIRBuilder);

if (IsNonUniform)
buildOpDecorate(ResVReg, I, TII, SPIRV::Decoration::NonUniformEXT, {});

// TODO: For now we assume the resource is an image, which needs to be
// loaded to get the handle. That will not be true for storage buffers.
Expand All @@ -2631,10 +2637,35 @@ void SPIRVInstructionSelector::selectHandleFromBinding(Register &ResVReg,

Register SPIRVInstructionSelector::buildPointerToResource(
const SPIRVType *ResType, uint32_t Set, uint32_t Binding,
uint32_t ArraySize, MachineIRBuilder MIRBuilder) const {
assert(ArraySize == 1 && "Resource arrays are not implemented yet.");
return GR.getOrCreateGlobalVariableWithBinding(ResType, Set, Binding,
MIRBuilder);
uint32_t ArraySize, Register IndexReg, bool IsNonUniform,
MachineIRBuilder MIRBuilder) const {
if (ArraySize == 1)
return GR.getOrCreateGlobalVariableWithBinding(ResType, Set, Binding,
MIRBuilder);

const SPIRVType *VarType = GR.getOrCreateSPIRVArrayType(
ResType, ArraySize, *MIRBuilder.getInsertPt(), TII);
Register VarReg = GR.getOrCreateGlobalVariableWithBinding(
VarType, Set, Binding, MIRBuilder);

SPIRVType *ResPointerType = GR.getOrCreateSPIRVPointerType(
ResType, MIRBuilder, SPIRV::StorageClass::UniformConstant);

Register AcReg = MRI->createVirtualRegister(&SPIRV::iIDRegClass);
if (IsNonUniform) {
// It is unclear which value needs to be marked an non-uniform, so both
// the index and the access changed are decorated as non-uniform.
buildOpDecorate(IndexReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {});
buildOpDecorate(AcReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {});
}

MIRBuilder.buildInstr(SPIRV::OpAccessChain)
.addDef(AcReg)
.addUse(GR.getSPIRVTypeID(ResPointerType))
.addUse(VarReg)
.addUse(IndexReg);

return AcReg;
}

bool SPIRVInstructionSelector::selectAllocaArray(Register ResVReg,
Expand Down
164 changes: 161 additions & 3 deletions llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -689,11 +689,31 @@ void RequirementHandler::initAvailableCapabilitiesForVulkan(
const SPIRVSubtarget &ST) {
addAvailableCaps({Capability::Shader, Capability::Linkage});

// Provided by all supported Vulkan versions.
// Core in Vulkan 1.1 and earlier.
addAvailableCaps({Capability::Int16, Capability::Int64, Capability::Float16,
Capability::Float64, Capability::GroupNonUniform,
Capability::Image1D, Capability::SampledBuffer,
Capability::ImageBuffer});
Capability::ImageBuffer,
Capability::UniformBufferArrayDynamicIndexing,
Capability::SampledImageArrayDynamicIndexing,
Capability::StorageBufferArrayDynamicIndexing,
Capability::StorageImageArrayDynamicIndexing});

// Became core in Vulkan 1.2
if (ST.isAtLeastSPIRVVer(VersionTuple(1, 5))) {
addAvailableCaps(
{Capability::ShaderNonUniformEXT, Capability::RuntimeDescriptorArrayEXT,
Capability::InputAttachmentArrayDynamicIndexingEXT,
Capability::UniformTexelBufferArrayDynamicIndexingEXT,
Capability::StorageTexelBufferArrayDynamicIndexingEXT,
Capability::UniformBufferArrayNonUniformIndexingEXT,
Capability::SampledImageArrayNonUniformIndexingEXT,
Capability::StorageBufferArrayNonUniformIndexingEXT,
Capability::StorageImageArrayNonUniformIndexingEXT,
Capability::InputAttachmentArrayNonUniformIndexingEXT,
Capability::UniformTexelBufferArrayNonUniformIndexingEXT,
Capability::StorageTexelBufferArrayNonUniformIndexingEXT});
}
}

} // namespace SPIRV
Expand Down Expand Up @@ -729,6 +749,8 @@ static void addOpDecorateReqs(const MachineInstr &MI, unsigned DecIndex,
Dec == SPIRV::Decoration::ImplementInRegisterMapINTEL) {
Reqs.addExtension(
SPIRV::Extension::SPV_INTEL_global_variable_fpga_decorations);
} else if (Dec == SPIRV::Decoration::NonUniformEXT) {
Reqs.addRequirements(SPIRV::Capability::ShaderNonUniformEXT);
}
}

Expand Down Expand Up @@ -848,6 +870,136 @@ static void AddAtomicFloatRequirements(const MachineInstr &MI,
}
}

bool isUniformTexelBuffer(MachineInstr *ImageInst) {
if (ImageInst->getOpcode() != SPIRV::OpTypeImage)
return false;
uint32_t Dim = ImageInst->getOperand(2).getImm();
uint32_t Sampled = ImageInst->getOperand(6).getImm();
return Dim == SPIRV::Dim::DIM_Buffer && Sampled == 1;
}

bool isStorageTexelBuffer(MachineInstr *ImageInst) {
if (ImageInst->getOpcode() != SPIRV::OpTypeImage)
return false;
uint32_t Dim = ImageInst->getOperand(2).getImm();
uint32_t Sampled = ImageInst->getOperand(6).getImm();
return Dim == SPIRV::Dim::DIM_Buffer && Sampled == 2;
}

bool isSampledImage(MachineInstr *ImageInst) {
if (ImageInst->getOpcode() != SPIRV::OpTypeImage)
return false;
uint32_t Dim = ImageInst->getOperand(2).getImm();
uint32_t Sampled = ImageInst->getOperand(6).getImm();
return Dim != SPIRV::Dim::DIM_Buffer && Sampled == 1;
}

bool isInputAttachment(MachineInstr *ImageInst) {
if (ImageInst->getOpcode() != SPIRV::OpTypeImage)
return false;
uint32_t Dim = ImageInst->getOperand(2).getImm();
uint32_t Sampled = ImageInst->getOperand(6).getImm();
return Dim == SPIRV::Dim::DIM_SubpassData && Sampled == 2;
}

bool isStorageImage(MachineInstr *ImageInst) {
if (ImageInst->getOpcode() != SPIRV::OpTypeImage)
return false;
uint32_t Dim = ImageInst->getOperand(2).getImm();
uint32_t Sampled = ImageInst->getOperand(6).getImm();
return Dim != SPIRV::Dim::DIM_Buffer && Sampled == 2;
}

bool isCombinedImageSampler(MachineInstr *SampledImageInst) {
if (SampledImageInst->getOpcode() != SPIRV::OpTypeSampledImage)
return false;

const MachineRegisterInfo &MRI = SampledImageInst->getMF()->getRegInfo();
Register ImageReg = SampledImageInst->getOperand(1).getReg();
auto *ImageInst = MRI.getUniqueVRegDef(ImageReg);
return isSampledImage(ImageInst);
}

bool hasNonUniformDecoration(Register Reg, const MachineRegisterInfo &MRI) {
for (const auto &MI : MRI.reg_instructions(Reg)) {
if (MI.getOpcode() != SPIRV::OpDecorate)
continue;

uint32_t Dec = MI.getOperand(1).getImm();
if (Dec == SPIRV::Decoration::NonUniformEXT)
return true;
}
return false;
}

void addOpAccessChainReqs(const MachineInstr &Instr,
SPIRV::RequirementHandler &Handler,
const SPIRVSubtarget &Subtarget) {
const MachineRegisterInfo &MRI = Instr.getMF()->getRegInfo();
// Get the result type. If it is an image type, then the shader uses
// descriptor indexing. The appropriate capabilities will be added based
// on the specifics of the image.
Register ResTypeReg = Instr.getOperand(1).getReg();
MachineInstr *ResTypeInst = MRI.getUniqueVRegDef(ResTypeReg);

assert(ResTypeInst->getOpcode() == SPIRV::OpTypePointer);
uint32_t StorageClass = ResTypeInst->getOperand(1).getImm();
if (StorageClass != SPIRV::StorageClass::StorageClass::UniformConstant &&
StorageClass != SPIRV::StorageClass::StorageClass::Uniform &&
StorageClass != SPIRV::StorageClass::StorageClass::StorageBuffer) {
return;
}

Register PointeeTypeReg = ResTypeInst->getOperand(2).getReg();
MachineInstr *PointeeType = MRI.getUniqueVRegDef(PointeeTypeReg);
if (PointeeType->getOpcode() != SPIRV::OpTypeImage &&
PointeeType->getOpcode() != SPIRV::OpTypeSampledImage &&
PointeeType->getOpcode() != SPIRV::OpTypeSampler) {
return;
}

bool IsNonUniform =
hasNonUniformDecoration(Instr.getOperand(0).getReg(), MRI);
if (isUniformTexelBuffer(PointeeType)) {
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::UniformTexelBufferArrayNonUniformIndexingEXT);
else
Handler.addRequirements(
SPIRV::Capability::UniformTexelBufferArrayDynamicIndexingEXT);
} else if (isInputAttachment(PointeeType)) {
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::InputAttachmentArrayNonUniformIndexingEXT);
else
Handler.addRequirements(
SPIRV::Capability::InputAttachmentArrayDynamicIndexingEXT);
} else if (isStorageTexelBuffer(PointeeType)) {
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::StorageTexelBufferArrayNonUniformIndexingEXT);
else
Handler.addRequirements(
SPIRV::Capability::StorageTexelBufferArrayDynamicIndexingEXT);
} else if (isSampledImage(PointeeType) ||
isCombinedImageSampler(PointeeType) ||
PointeeType->getOpcode() == SPIRV::OpTypeSampler) {
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::SampledImageArrayNonUniformIndexingEXT);
else
Handler.addRequirements(
SPIRV::Capability::SampledImageArrayDynamicIndexing);
} else if (isStorageImage(PointeeType)) {
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::StorageImageArrayNonUniformIndexingEXT);
else
Handler.addRequirements(
SPIRV::Capability::StorageImageArrayDynamicIndexing);
}
}

void addInstrRequirements(const MachineInstr &MI,
SPIRV::RequirementHandler &Reqs,
const SPIRVSubtarget &ST) {
Expand Down Expand Up @@ -967,11 +1119,17 @@ void addInstrRequirements(const MachineInstr &MI,
case SPIRV::OpConstantSampler:
Reqs.addCapability(SPIRV::Capability::LiteralSampler);
break;
case SPIRV::OpInBoundsAccessChain:
case SPIRV::OpAccessChain:
addOpAccessChainReqs(MI, Reqs, ST);
break;
case SPIRV::OpTypeImage:
addOpTypeImageReqs(MI, Reqs, ST);
break;
case SPIRV::OpTypeSampler:
Reqs.addCapability(SPIRV::Capability::ImageBasic);
if (!ST.isVulkanEnv()) {
Reqs.addCapability(SPIRV::Capability::ImageBasic);
}
break;
case SPIRV::OpTypeForwardPointer:
// TODO: check if it's OpenCL's kernel.
Expand Down
61 changes: 52 additions & 9 deletions llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,57 @@ static MachineInstr *findAssignTypeInstr(Register Reg,
return nullptr;
}

static void buildOpBitcast(SPIRVGlobalRegistry *GR, MachineIRBuilder &MIB,
Register ResVReg, Register OpReg) {
SPIRVType *ResType = GR->getSPIRVTypeForVReg(ResVReg);
SPIRVType *OpType = GR->getSPIRVTypeForVReg(OpReg);
assert(ResType && OpType && "Operand types are expected");
if (!GR->isBitcastCompatible(ResType, OpType))
report_fatal_error("incompatible result and operand types in a bitcast");
MachineRegisterInfo *MRI = MIB.getMRI();
if (!MRI->getRegClassOrNull(ResVReg))
MRI->setRegClass(ResVReg, GR->getRegClass(ResType));
MIB.buildInstr(SPIRV::OpBitcast)
.addDef(ResVReg)
.addUse(GR->getSPIRVTypeID(ResType))
.addUse(OpReg);
}

// We do instruction selections early instead of calling MIB.buildBitcast()
// generating the general op code G_BITCAST. When MachineVerifier validates
// G_BITCAST we see a check of a kind: if Source Type is equal to Destination
// Type then report error "bitcast must change the type". This doesn't take into
// account the notion of a typed pointer that is important for SPIR-V where a
// user may and should use bitcast between pointers with different pointee types
// (https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast).
// It's important for correct lowering in SPIR-V, because interpretation of the
// data type is not left to instructions that utilize the pointer, but encoded
// by the pointer declaration, and the SPIRV target can and must handle the
// declaration and use of pointers that specify the type of data they point to.
// It's not feasible to improve validation of G_BITCAST using just information
// provided by low level types of source and destination. Therefore we don't
// produce G_BITCAST as the general op code with semantics different from
// OpBitcast, but rather lower to OpBitcast immediately. As for now, the only
// difference would be that CombinerHelper couldn't transform known patterns
// around G_BUILD_VECTOR. See discussion
// in https://github.com/llvm/llvm-project/pull/110270 for even more context.
static void selectOpBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR,
MachineIRBuilder MIB) {
SmallVector<MachineInstr *, 16> ToErase;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
if (MI.getOpcode() != TargetOpcode::G_BITCAST)
continue;
MIB.setInsertPt(*MI.getParent(), MI);
buildOpBitcast(GR, MIB, MI.getOperand(0).getReg(),
MI.getOperand(1).getReg());
ToErase.push_back(&MI);
}
}
for (MachineInstr *MI : ToErase)
MI->eraseFromParent();
}

static void insertBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR,
MachineIRBuilder MIB) {
// Get access to information about available extensions
Expand Down Expand Up @@ -202,15 +253,6 @@ static void insertBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR,
} else {
GR->assignSPIRVTypeToVReg(AssignedPtrType, Def, MF);
MIB.buildBitcast(Def, Source);
// MachineVerifier requires that bitcast must change the type.
// Change AddressSpace if needed to hint that Def and Source points to
// different types: this doesn't change actual code generation.
LLT DefType = MRI->getType(Def);
if (DefType == MRI->getType(Source))
MRI->setType(Def,
LLT::pointer((DefType.getAddressSpace() + 1) %
SPIRVSubtarget::MaxLegalAddressSpace,
GR->getPointerSize()));
}
}
}
Expand Down Expand Up @@ -1007,6 +1049,7 @@ bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) {
removeImplicitFallthroughs(MF, MIB);
insertSpirvDecorations(MF, MIB);
insertInlineAsm(MF, GR, ST, MIB);
selectOpBitcasts(MF, GR, MIB);

return true;
}
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,9 @@ defm GeometryPointSize : CapabilityOperand<24, 0, 0, [], [Geometry]>;
defm ImageGatherExtended : CapabilityOperand<25, 0, 0, [], [Shader]>;
defm StorageImageMultisample : CapabilityOperand<27, 0, 0, [], [Shader]>;
defm UniformBufferArrayDynamicIndexing : CapabilityOperand<28, 0, 0, [], [Shader]>;
defm SampledImageArrayDymnamicIndexing : CapabilityOperand<29, 0, 0, [], [Shader]>;
defm SampledImageArrayDynamicIndexing : CapabilityOperand<29, 0, 0, [], [Shader]>;
defm StorageBufferArrayDynamicIndexing : CapabilityOperand<30, 0, 0, [], [Shader]>;
defm StorageImageArrayDynamicIndexing : CapabilityOperand<31, 0, 0, [], [Shader]>;
defm ClipDistance : CapabilityOperand<32, 0, 0, [], [Shader]>;
defm CullDistance : CapabilityOperand<33, 0, 0, [], [Shader]>;
defm SampleRateShading : CapabilityOperand<35, 0, 0, [], [Shader]>;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49321,7 +49321,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
if (!LogicalShift && ISD::isBuildVectorAllOnes(N0.getNode()))
// N0 is all ones or undef. We guarantee that the bits shifted into the
// result are all ones, not undef.
return DAG.getConstant(-1, SDLoc(N), VT);
return DAG.getAllOnesConstant(SDLoc(N), VT);

auto MergeShifts = [&](SDValue X, uint64_t Amt0, uint64_t Amt1) {
unsigned NewShiftVal = Amt0 + Amt1;
Expand Down
58 changes: 38 additions & 20 deletions llvm/lib/Transforms/Coroutines/CoroSplit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Coroutines/ABI.h"
#include "llvm/Transforms/Coroutines/CoroInstr.h"
Expand Down Expand Up @@ -118,7 +119,6 @@ class CoroCloner {

TargetTransformInfo &TTI;

public:
/// Create a cloner for a switch lowering.
CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
Kind FKind, TargetTransformInfo &TTI)
Expand All @@ -140,6 +140,30 @@ class CoroCloner {
assert(ActiveSuspend && "need active suspend point for continuation");
}

public:
/// Create a clone for a switch lowering.
static Function *createClone(Function &OrigF, const Twine &Suffix,
coro::Shape &Shape, Kind FKind,
TargetTransformInfo &TTI) {
TimeTraceScope FunctionScope("CoroCloner");

CoroCloner Cloner(OrigF, Suffix, Shape, FKind, TTI);
Cloner.create();
return Cloner.getFunction();
}

/// Create a clone for a continuation lowering.
static Function *createClone(Function &OrigF, const Twine &Suffix,
coro::Shape &Shape, Function *NewF,
AnyCoroSuspendInst *ActiveSuspend,
TargetTransformInfo &TTI) {
TimeTraceScope FunctionScope("CoroCloner");

CoroCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI);
Cloner.create();
return Cloner.getFunction();
}

Function *getFunction() const {
assert(NewF != nullptr && "declaration not yet set");
return NewF;
Expand Down Expand Up @@ -1466,13 +1490,16 @@ struct SwitchCoroutineSplitter {
TargetTransformInfo &TTI) {
assert(Shape.ABI == coro::ABI::Switch);

// Create a resume clone by cloning the body of the original function,
// setting new entry block and replacing coro.suspend an appropriate value
// to force resume or cleanup pass for every suspend point.
createResumeEntryBlock(F, Shape);
auto *ResumeClone =
createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI);
auto *DestroyClone =
createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI);
auto *CleanupClone =
createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI);
auto *ResumeClone = CoroCloner::createClone(
F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI);
auto *DestroyClone = CoroCloner::createClone(
F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI);
auto *CleanupClone = CoroCloner::createClone(
F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI);

postSplitCleanup(*ResumeClone);
postSplitCleanup(*DestroyClone);
Expand Down Expand Up @@ -1562,17 +1589,6 @@ struct SwitchCoroutineSplitter {
}

private:
// Create a resume clone by cloning the body of the original function, setting
// new entry block and replacing coro.suspend an appropriate value to force
// resume or cleanup pass for every suspend point.
static Function *createClone(Function &F, const Twine &Suffix,
coro::Shape &Shape, CoroCloner::Kind FKind,
TargetTransformInfo &TTI) {
CoroCloner Cloner(F, Suffix, Shape, FKind, TTI);
Cloner.create();
return Cloner.getFunction();
}

// Create an entry block for a resume function with a switch that will jump to
// suspend points.
static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
Expand Down Expand Up @@ -1872,7 +1888,8 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
auto *Suspend = Shape.CoroSuspends[Idx];
auto *Clone = Clones[Idx];

CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend, TTI).create();
CoroCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone, Suspend,
TTI);
}
}

Expand Down Expand Up @@ -2001,7 +2018,8 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
auto Suspend = Shape.CoroSuspends[i];
auto Clone = Clones[i];

CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend, TTI).create();
CoroCloner::createClone(F, "resume." + Twine(i), Shape, Clone, Suspend,
TTI);
}
}

Expand Down
54 changes: 32 additions & 22 deletions llvm/lib/Transforms/Utils/CloneFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,28 +87,14 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
return NewBB;
}

// Clone OldFunc into NewFunc, transforming the old arguments into references to
// VMap values.
//
void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
CloneFunctionChangeType Changes,
SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix, ClonedCodeInfo *CodeInfo,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
NewFunc->setIsNewDbgInfoFormat(OldFunc->IsNewDbgInfoFormat);
assert(NameSuffix && "NameSuffix cannot be null!");

#ifndef NDEBUG
for (const Argument &I : OldFunc->args())
assert(VMap.count(&I) && "No mapping from source argument specified!");
#endif

bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly;

// Copy all attributes other than those stored in the AttributeList. We need
// to remap the parameter indices of the AttributeList.
void llvm::CloneFunctionAttributesInto(Function *NewFunc,
const Function *OldFunc,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
// Copy all attributes other than those stored in Function's AttributeList
// which holds e.g. parameters and return value attributes.
AttributeList NewAttrs = NewFunc->getAttributes();
NewFunc->copyAttributesFrom(OldFunc);
NewFunc->setAttributes(NewAttrs);
Expand Down Expand Up @@ -140,6 +126,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Clone any argument attributes that are present in the VMap.
for (const Argument &OldArg : OldFunc->args()) {
if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) {
// Remap the parameter indices.
NewArgAttrs[NewArg->getArgNo()] =
OldAttrs.getParamAttrs(OldArg.getArgNo());
}
Expand All @@ -148,6 +135,29 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
NewFunc->setAttributes(
AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttrs(),
OldAttrs.getRetAttrs(), NewArgAttrs));
}

// Clone OldFunc into NewFunc, transforming the old arguments into references to
// VMap values.
void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
CloneFunctionChangeType Changes,
SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix, ClonedCodeInfo *CodeInfo,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
NewFunc->setIsNewDbgInfoFormat(OldFunc->IsNewDbgInfoFormat);
assert(NameSuffix && "NameSuffix cannot be null!");

#ifndef NDEBUG
for (const Argument &I : OldFunc->args())
assert(VMap.count(&I) && "No mapping from source argument specified!");
#endif

bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly;

CloneFunctionAttributesInto(NewFunc, OldFunc, VMap, ModuleLevelChanges,
TypeMapper, Materializer);

// Everything else beyond this point deals with function instructions,
// so if we are dealing with a function declaration, we're done.
Expand Down
59 changes: 30 additions & 29 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,11 +467,12 @@ class InnerLoopVectorizer {
ElementCount MinProfitableTripCount,
unsigned UnrollFactor, LoopVectorizationLegality *LVL,
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks)
ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks,
VPlan &Plan)
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
Builder(PSE.getSE()->getContext()), Legal(LVL), Cost(CM), BFI(BFI),
PSI(PSI), RTChecks(RTChecks) {
PSI(PSI), RTChecks(RTChecks), Plan(Plan) {
// Query this against the original loop and save it here because the profile
// of the original loop header may change as the transformation happens.
OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize(
Expand All @@ -498,7 +499,7 @@ class InnerLoopVectorizer {
createVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs);

/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
void fixVectorizedLoop(VPTransformState &State, VPlan &Plan);
void fixVectorizedLoop(VPTransformState &State);

// Return true if any runtime check is added.
bool areSafetyChecksAdded() { return AddedSafetyChecks; }
Expand All @@ -513,7 +514,7 @@ class InnerLoopVectorizer {
VPTransformState &State);

/// Fix the non-induction PHIs in \p Plan.
void fixNonInductionPHIs(VPlan &Plan, VPTransformState &State);
void fixNonInductionPHIs(VPTransformState &State);

/// Create a new phi node for the induction variable \p OrigPhi to resume
/// iteration count in the scalar epilogue, from where the vectorized loop
Expand Down Expand Up @@ -541,8 +542,7 @@ class InnerLoopVectorizer {
/// Set up the values of the IVs correctly when exiting the vector loop.
virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
Value *VectorTripCount, Value *EndValue,
BasicBlock *MiddleBlock, VPlan &Plan,
VPTransformState &State);
BasicBlock *MiddleBlock, VPTransformState &State);

/// Iteratively sink the scalarized operands of a predicated instruction into
/// the block that was created for it.
Expand Down Expand Up @@ -674,6 +674,8 @@ class InnerLoopVectorizer {
/// Structure to hold information about generated runtime checks, responsible
/// for cleaning the checks, if vectorization turns out unprofitable.
GeneratedRTChecks &RTChecks;

VPlan &Plan;
};

/// Encapsulate information regarding vectorization of a loop and its epilogue.
Expand Down Expand Up @@ -715,10 +717,10 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
GeneratedRTChecks &Checks)
GeneratedRTChecks &Checks, VPlan &Plan)
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF, LVL,
CM, BFI, PSI, Checks),
CM, BFI, PSI, Checks, Plan),
EPI(EPI) {}

// Override this function to handle the more complex control flow around the
Expand Down Expand Up @@ -755,9 +757,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
GeneratedRTChecks &Check)
GeneratedRTChecks &Check, VPlan &Plan)
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
EPI, LVL, CM, BFI, PSI, Check) {}
EPI, LVL, CM, BFI, PSI, Check, Plan) {}
/// Implements the interface for creating a vectorized skeleton using the
/// *main loop* strategy (ie the first pass of vplan execution).
std::pair<BasicBlock *, Value *>
Expand All @@ -773,7 +775,7 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {

void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
Value *VectorTripCount, Value *EndValue,
BasicBlock *MiddleBlock, VPlan &Plan,
BasicBlock *MiddleBlock,
VPTransformState &State) override {};
};

Expand All @@ -789,9 +791,9 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
GeneratedRTChecks &Checks)
GeneratedRTChecks &Checks, VPlan &Plan)
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
EPI, LVL, CM, BFI, PSI, Checks) {
EPI, LVL, CM, BFI, PSI, Checks, Plan) {
TripCount = EPI.TripCount;
}
/// Implements the interface for creating a vectorized skeleton using the
Expand Down Expand Up @@ -2751,7 +2753,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
const InductionDescriptor &II,
Value *VectorTripCount, Value *EndValue,
BasicBlock *MiddleBlock, VPlan &Plan,
BasicBlock *MiddleBlock,
VPTransformState &State) {
// There are two kinds of external IV usages - those that use the value
// computed in the last iteration (the PHI) and those that use the penultimate
Expand Down Expand Up @@ -2931,11 +2933,10 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
TargetTransformInfo::TCK_RecipThroughput);
}

void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
VPlan &Plan) {
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
// Fix widened non-induction PHIs by setting up the PHI operands.
if (EnableVPlanNativePath)
fixNonInductionPHIs(Plan, State);
fixNonInductionPHIs(State);

// Forget the original basic block.
PSE.getSE()->forgetLoop(OrigLoop);
Expand Down Expand Up @@ -2966,7 +2967,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
for (const auto &Entry : Legal->getInductionVars())
fixupIVUsers(Entry.first, Entry.second,
getOrCreateVectorTripCount(nullptr),
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
IVEndValues[Entry.first], LoopMiddleBlock, State);
}

// Fix live-out phis not already fixed earlier.
Expand Down Expand Up @@ -3077,8 +3078,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
} while (Changed);
}

void InnerLoopVectorizer::fixNonInductionPHIs(VPlan &Plan,
VPTransformState &State) {
void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
for (VPRecipeBase &P : VPBB->phis()) {
Expand Down Expand Up @@ -7744,7 +7744,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(

// 3. Fix the vectorized code: take care of header phi's, live-outs,
// predication, updating analyses.
ILV.fixVectorizedLoop(State, BestVPlan);
ILV.fixVectorizedLoop(State);

ILV.printDebugTracesAtEnd();

Expand Down Expand Up @@ -9727,7 +9727,7 @@ static bool processLoopInVPlanNativePath(
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(),
AddBranchWeights);
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
VF.Width, 1, LVL, &CM, BFI, PSI, Checks);
VF.Width, 1, LVL, &CM, BFI, PSI, Checks, BestPlan);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false);
Expand Down Expand Up @@ -10215,11 +10215,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
assert(IC > 1 && "interleave count should not be 1 or 0");
// If we decided that it is not legal to vectorize the loop, then
// interleave it.
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
InnerLoopVectorizer Unroller(
L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed(1),
ElementCount::getFixed(1), IC, &LVL, &CM, BFI, PSI, Checks);
ElementCount::getFixed(1), IC, &LVL, &CM, BFI, PSI, Checks, BestPlan);

VPlan &BestPlan = LVP.getPlanFor(VF.Width);
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);

ORE->emit([&]() {
Expand All @@ -10236,15 +10236,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizationFactor EpilogueVF =
LVP.selectEpilogueVectorizationFactor(VF.Width, IC);
if (EpilogueVF.Width.isVector()) {
std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());

// The first pass vectorizes the main loop and creates a scalar epilogue
// to be vectorized by executing the plan (potentially with a different
// factor) again shortly afterwards.
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1);
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
EPI, &LVL, &CM, BFI, PSI, Checks);
EPI, &LVL, &CM, BFI, PSI, Checks,
*BestMainPlan);

std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
*BestMainPlan, MainILV, DT, false);
++LoopsVectorized;
Expand All @@ -10253,11 +10254,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// edges from the first pass.
EPI.MainLoopVF = EPI.EpilogueVF;
EPI.MainLoopUF = EPI.EpilogueUF;
VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
ORE, EPI, &LVL, &CM, BFI, PSI,
Checks);
Checks, BestEpiPlan);

VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
VPRegionBlock *VectorLoop = BestEpiPlan.getVectorLoopRegion();
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");
Expand Down Expand Up @@ -10340,7 +10341,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
} else {
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
PSI, Checks);
PSI, Checks, BestPlan);
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64 -run-pass=none -verify-machineinstrs %s -o - | FileCheck %s


---
name: icmp_samesign
body: |
bb.0:
liveins: $w0, $w1

; CHECK-LABEL: name: icmp_samesign
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
Expand All @@ -23,15 +21,12 @@ body: |
%zext:_(s32) = G_ZEXT %cmp:_(s1)
$w0 = COPY %zext
RET_ReallyLR implicit $w0


...
---
name: icmp_differentsign
body: |
bb.0:
liveins: $w0, $w1

; CHECK-LABEL: name: icmp_differentsign
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
Expand Down
File renamed without changes.
Loading