324 changes: 276 additions & 48 deletions llvm/lib/Transforms/IPO/SampleProfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ static cl::opt<std::string> SampleProfileRemappingFile(
"sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);

static cl::opt<bool> SalvageStaleProfile(
"salvage-stale-profile", cl::Hidden, cl::init(false),
cl::desc("Salvage stale profile by fuzzy matching and use the remapped "
"location for sample profile query."));

static cl::opt<bool> ReportProfileStaleness(
"report-profile-staleness", cl::Hidden, cl::init(false),
cl::desc("Compute and report stale profile statistical metrics."));
Expand Down Expand Up @@ -438,6 +443,10 @@ class SampleProfileMatcher {
SampleProfileReader &Reader;
const PseudoProbeManager *ProbeManager;
SampleProfileMap FlattenedProfiles;
// For each function, the matcher generates a map, of which each entry is a
// mapping from the source location of current build to the source location in
// the profile.
StringMap<LocToLocMap> FuncMappings;

// Profile mismatching statstics.
uint64_t TotalProfiledCallsites = 0;
Expand All @@ -458,17 +467,37 @@ class SampleProfileMatcher {
FunctionSamples::ProfileIsCS);
}
}
void runOnModule();

private:
FunctionSamples *getFlattenedSamplesFor(const Function &F) {
StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
auto It = FlattenedProfiles.find(CanonFName);
if (It != FlattenedProfiles.end())
return &It->second;
return nullptr;
}

void detectProfileMismatch();
void detectProfileMismatch(const Function &F, const FunctionSamples &FS);
void runOnFunction(const Function &F, const FunctionSamples &FS);
void countProfileMismatches(
const FunctionSamples &FS,
const std::unordered_set<LineLocation, LineLocationHash>
&MatchedCallsiteLocs,
uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites);

LocToLocMap &getIRToProfileLocationMap(const Function &F) {
auto Ret = FuncMappings.try_emplace(
FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap());
return Ret.first->second;
}
void distributeIRToProfileLocationMap();
void distributeIRToProfileLocationMap(FunctionSamples &FS);
void populateProfileCallsites(
const FunctionSamples &FS,
StringMap<std::set<LineLocation>> &CalleeToCallsitesMap);
void runStaleProfileMatching(
const std::map<LineLocation, StringRef> &IRLocations,
StringMap<std::set<LineLocation>> &CalleeToCallsitesMap,
LocToLocMap &IRToProfileLocationMap);
};

/// Sample profile pass.
Expand Down Expand Up @@ -1810,9 +1839,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
if (!ProbeManager->profileIsValid(F, *Samples)) {
LLVM_DEBUG(
dbgs() << "Profile is invalid due to CFG mismatch for Function "
<< F.getName());
<< F.getName() << "\n");
++NumMismatchedProfile;
return false;
if (!SalvageStaleProfile)
return false;
}
++NumMatchedProfile;
} else {
Expand Down Expand Up @@ -2071,33 +2101,201 @@ bool SampleProfileLoader::doInitialization(Module &M,
}
}

if (ReportProfileStaleness || PersistProfileStaleness) {
if (ReportProfileStaleness || PersistProfileStaleness ||
SalvageStaleProfile) {
MatchingManager =
std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
}

return true;
}

void SampleProfileMatcher::detectProfileMismatch(const Function &F,
const FunctionSamples &FS) {
void SampleProfileMatcher::countProfileMismatches(
const FunctionSamples &FS,
const std::unordered_set<LineLocation, LineLocationHash>
&MatchedCallsiteLocs,
uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites) {

auto isInvalidLineOffset = [](uint32_t LineOffset) {
return LineOffset & 0x8000;
};

// Check if there are any callsites in the profile that does not match to any
// IR callsites, those callsite samples will be discarded.
for (auto &I : FS.getBodySamples()) {
const LineLocation &Loc = I.first;
if (isInvalidLineOffset(Loc.LineOffset))
continue;

uint64_t Count = I.second.getSamples();
if (!I.second.getCallTargets().empty()) {
TotalCallsiteSamples += Count;
FuncProfiledCallsites++;
if (!MatchedCallsiteLocs.count(Loc)) {
MismatchedCallsiteSamples += Count;
FuncMismatchedCallsites++;
}
}
}

for (auto &I : FS.getCallsiteSamples()) {
const LineLocation &Loc = I.first;
if (isInvalidLineOffset(Loc.LineOffset))
continue;

uint64_t Count = 0;
for (auto &FM : I.second) {
Count += FM.second.getHeadSamplesEstimate();
}
TotalCallsiteSamples += Count;
FuncProfiledCallsites++;
if (!MatchedCallsiteLocs.count(Loc)) {
MismatchedCallsiteSamples += Count;
FuncMismatchedCallsites++;
}
}
}

// Populate the anchors(direct callee name) from profile.
void SampleProfileMatcher::populateProfileCallsites(
const FunctionSamples &FS,
StringMap<std::set<LineLocation>> &CalleeToCallsitesMap) {
for (const auto &I : FS.getBodySamples()) {
const auto &Loc = I.first;
const auto &CTM = I.second.getCallTargets();
// Filter out possible indirect calls, use direct callee name as anchor.
if (CTM.size() == 1) {
StringRef CalleeName = CTM.begin()->first();
const auto &Candidates = CalleeToCallsitesMap.try_emplace(
CalleeName, std::set<LineLocation>());
Candidates.first->second.insert(Loc);
}
}

for (const auto &I : FS.getCallsiteSamples()) {
const LineLocation &Loc = I.first;
const auto &CalleeMap = I.second;
// Filter out possible indirect calls, use direct callee name as anchor.
if (CalleeMap.size() == 1) {
StringRef CalleeName = CalleeMap.begin()->first;
const auto &Candidates = CalleeToCallsitesMap.try_emplace(
CalleeName, std::set<LineLocation>());
Candidates.first->second.insert(Loc);
}
}
}

// Call target name anchor based profile fuzzy matching.
// Input:
// For IR locations, the anchor is the callee name of direct callsite; For
// profile locations, it's the call target name for BodySamples or inlinee's
// profile name for CallsiteSamples.
// Matching heuristic:
// First match all the anchors in lexical order, then split the non-anchor
// locations between the two anchors evenly, first half are matched based on the
// start anchor, second half are matched based on the end anchor.
// For example, given:
// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
// The matching gives:
// [1, 2(foo), 3, 5, 6(bar), 7]
// | | | | | |
// [1, 2, 3(foo), 4, 7, 8(bar), 9]
// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
void SampleProfileMatcher::runStaleProfileMatching(
const std::map<LineLocation, StringRef> &IRLocations,
StringMap<std::set<LineLocation>> &CalleeToCallsitesMap,
LocToLocMap &IRToProfileLocationMap) {
assert(IRToProfileLocationMap.empty() &&
"Run stale profile matching only once per function");

auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
// Skip the unchanged location mapping to save memory.
if (From != To)
IRToProfileLocationMap.insert({From, To});
};

// Use function's beginning location as the initial anchor.
int32_t LocationDelta = 0;
SmallVector<LineLocation> LastMatchedNonAnchors;

for (const auto &IR : IRLocations) {
const auto &Loc = IR.first;
StringRef CalleeName = IR.second;
bool IsMatchedAnchor = false;
// Match the anchor location in lexical order.
if (!CalleeName.empty()) {
auto ProfileAnchors = CalleeToCallsitesMap.find(CalleeName);
if (ProfileAnchors != CalleeToCallsitesMap.end() &&
!ProfileAnchors->second.empty()) {
auto CI = ProfileAnchors->second.begin();
const auto &Candidate = *CI;
ProfileAnchors->second.erase(CI);
InsertMatching(Loc, Candidate);
LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName
<< " is matched from " << Loc << " to " << Candidate
<< "\n");
LocationDelta = Candidate.LineOffset - Loc.LineOffset;

// Match backwards for non-anchor locations.
// The locations in LastMatchedNonAnchors have been matched forwards
// based on the previous anchor, spilt it evenly and overwrite the
// second half based on the current anchor.
for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
I < LastMatchedNonAnchors.size(); I++) {
const auto &L = LastMatchedNonAnchors[I];
uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
LineLocation Candidate(CandidateLineOffset, L.Discriminator);
InsertMatching(L, Candidate);
LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
<< " to " << Candidate << "\n");
}

IsMatchedAnchor = true;
LastMatchedNonAnchors.clear();
}
}

// Match forwards for non-anchor locations.
if (!IsMatchedAnchor) {
uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
InsertMatching(Loc, Candidate);
LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
<< Candidate << "\n");
LastMatchedNonAnchors.emplace_back(Loc);
}
}
}

void SampleProfileMatcher::runOnFunction(const Function &F,
const FunctionSamples &FS) {
bool IsFuncHashMismatch = false;
if (FunctionSamples::ProfileIsProbeBased) {
uint64_t Count = FS.getTotalSamples();
TotalFuncHashSamples += Count;
TotalProfiledFunc++;
if (!ProbeManager->profileIsValid(F, FS)) {
MismatchedFuncHashSamples += Count;
NumMismatchedFuncHash++;
return;
IsFuncHashMismatch = true;
}
}

std::unordered_set<LineLocation, LineLocationHash> MatchedCallsiteLocs;
// The value of the map is the name of direct callsite and use empty StringRef
// for non-direct-call site.
std::map<LineLocation, StringRef> IRLocations;

// Go through all the callsites on the IR and flag the callsite if the target
// name is the same as the one in the profile.
// Extract profile matching anchors and profile mismatch metrics in the IR.
for (auto &BB : F) {
for (auto &I : BB) {
// TODO: Support line-number based location(AutoFDO).
if (FunctionSamples::ProfileIsProbeBased && isa<PseudoProbeInst>(&I)) {
if (std::optional<PseudoProbe> Probe = extractProbe(I))
IRLocations.emplace(LineLocation(Probe->Id, 0), StringRef());
}

if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
continue;

Expand All @@ -2109,6 +2307,17 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F,
if (Function *Callee = CB->getCalledFunction())
CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());

// Force to overwrite the callee name in case any non-call location was
// written before.
auto R = IRLocations.emplace(IRCallsite, CalleeName);
R.first->second = CalleeName;
assert((!FunctionSamples::ProfileIsProbeBased || R.second ||
R.first->second == CalleeName) &&
"Overwrite non-call or different callee name location for "
"pseudo probe callsite");

// Go through all the callsites on the IR and flag the callsite if the
// target name is the same as the one in the profile.
const auto CTM = FS.findCallTargetMapAt(IRCallsite);
const auto CallsiteFS = FS.findFunctionSamplesMapAt(IRCallsite);

Expand All @@ -2130,47 +2339,40 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F,
}
}

auto isInvalidLineOffset = [](uint32_t LineOffset) {
return LineOffset & 0x8000;
};
// Detect profile mismatch for profile staleness metrics report.
if (ReportProfileStaleness || PersistProfileStaleness) {
uint64_t FuncMismatchedCallsites = 0;
uint64_t FuncProfiledCallsites = 0;
countProfileMismatches(FS, MatchedCallsiteLocs, FuncMismatchedCallsites,
FuncProfiledCallsites);
TotalProfiledCallsites += FuncProfiledCallsites;
NumMismatchedCallsites += FuncMismatchedCallsites;
LLVM_DEBUG({
if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch &&
FuncMismatchedCallsites)
dbgs() << "Function checksum is matched but there are "
<< FuncMismatchedCallsites << "/" << FuncProfiledCallsites
<< " mismatched callsites.\n";
});
}

// Check if there are any callsites in the profile that does not match to any
// IR callsites, those callsite samples will be discarded.
for (auto &I : FS.getBodySamples()) {
const LineLocation &Loc = I.first;
if (isInvalidLineOffset(Loc.LineOffset))
continue;
if (IsFuncHashMismatch && SalvageStaleProfile) {
LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
<< "\n");

uint64_t Count = I.second.getSamples();
if (!I.second.getCallTargets().empty()) {
TotalCallsiteSamples += Count;
TotalProfiledCallsites++;
if (!MatchedCallsiteLocs.count(Loc)) {
MismatchedCallsiteSamples += Count;
NumMismatchedCallsites++;
}
}
}
StringMap<std::set<LineLocation>> CalleeToCallsitesMap;
populateProfileCallsites(FS, CalleeToCallsitesMap);

for (auto &I : FS.getCallsiteSamples()) {
const LineLocation &Loc = I.first;
if (isInvalidLineOffset(Loc.LineOffset))
continue;
// The matching result will be saved to IRToProfileLocationMap, create a new
// map for each function.
auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);

uint64_t Count = 0;
for (auto &FM : I.second) {
Count += FM.second.getHeadSamplesEstimate();
}
TotalCallsiteSamples += Count;
TotalProfiledCallsites++;
if (!MatchedCallsiteLocs.count(Loc)) {
MismatchedCallsiteSamples += Count;
NumMismatchedCallsites++;
}
runStaleProfileMatching(IRLocations, CalleeToCallsitesMap,
IRToProfileLocationMap);
}
}

void SampleProfileMatcher::detectProfileMismatch() {
void SampleProfileMatcher::runOnModule() {
for (auto &F : M) {
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
Expand All @@ -2181,8 +2383,10 @@ void SampleProfileMatcher::detectProfileMismatch() {
FS = Reader.getSamplesFor(F);
if (!FS)
continue;
detectProfileMismatch(F, *FS);
runOnFunction(F, *FS);
}
if (SalvageStaleProfile)
distributeIRToProfileLocationMap();

if (ReportProfileStaleness) {
if (FunctionSamples::ProfileIsProbeBased) {
Expand Down Expand Up @@ -2225,6 +2429,28 @@ void SampleProfileMatcher::detectProfileMismatch() {
}
}

void SampleProfileMatcher::distributeIRToProfileLocationMap(
FunctionSamples &FS) {
const auto ProfileMappings = FuncMappings.find(FS.getName());
if (ProfileMappings != FuncMappings.end()) {
FS.setIRToProfileLocationMap(&(ProfileMappings->second));
}

for (auto &Inlinees : FS.getCallsiteSamples()) {
for (auto FS : Inlinees.second) {
distributeIRToProfileLocationMap(FS.second);
}
}
}

// Use a central place to distribute the matching results. Outlined and inlined
// profile with the function name will be set to the same pointer.
void SampleProfileMatcher::distributeIRToProfileLocationMap() {
for (auto &I : Reader.getProfiles()) {
distributeIRToProfileLocationMap(I.second);
}
}

bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI,
LazyCallGraph &CG) {
Expand Down Expand Up @@ -2270,8 +2496,10 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
assert(SymbolMap.count(StringRef()) == 0 &&
"No empty StringRef should be added in SymbolMap");

if (ReportProfileStaleness || PersistProfileStaleness)
MatchingManager->detectProfileMismatch();
if (ReportProfileStaleness || PersistProfileStaleness ||
SalvageStaleProfile) {
MatchingManager->runOnModule();
}

bool retval = false;
for (auto *F : buildFunctionOrder(M, CG)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
main:1497:0
1: 0
2: 112
3: 112 bar:60 dummy_calltarget:50
4: 116
5: 0
7: 124 bar:124
9: 126 bar:126
6: foo:452
1: 112
2: 101
3: 13
4: 112
5: 101 bar:109
6: 13 bar:14
!CFGChecksum: 563022570642068
8: foo:472
1: 117
2: 104
3: 13
4: 121
5: 104 bar:104
6: 14 bar:14
!CFGChecksum: 563022570642068
!CFGChecksum: 1125988587804525
bar:491:491
1: 491
!CFGChecksum: 4294967295
Original file line number Diff line number Diff line change
@@ -0,0 +1,342 @@
; REQUIRES: x86_64-linux
; REQUIRES: asserts
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching.prof --salvage-stale-profile -S --debug-only=sample-profile 2>&1 | FileCheck %s

; The profiled source code:

; volatile int x = 1;
; __attribute__((noinline)) int bar(int p) {
; return p;
; }

; __attribute__((always_inline)) int foo(int i, int p) {
; if (i % 10) return bar(p);
; else return bar(p + 1);
; }

; int main() {
; for (int i = 0; i < 1000 * 1000; i++) {
; x += foo(i, x);
; x += bar(x);
; x += foo(i, x);
; x += bar(x);
; }
; }

; The source code for the current build:

; volatile int x = 1;
; __attribute__((noinline)) int bar(int p) {
; return p;
; }

; __attribute__((always_inline)) int foo(int i, int p) {
; if (i % 10) return bar(p);
; else return bar(p + 1);
; }

; int main() {
; if (x == 0) // code change
; return 0; // code change
; for (int i = 0; i < 1000 * 1000; i++) {
; x += foo(i, x);
; x += bar(x);
; if (i < 0) // code change
; return 0; // code change
; x += foo(i, x);
; x += bar(x);
; }
; }


; CHECK: Run stale profile matching for main

; CHECK: Location is matched from 1 to 1
; CHECK: Location is matched from 2 to 2
; CHECK: Location is matched from 3 to 3
; CHECK: Location is matched from 4 to 4
; CHECK: Location is matched from 5 to 5
; CHECK: Location is matched from 6 to 6
; CHECK: Location is matched from 7 to 7
; CHECK: Location is matched from 8 to 8
; CHECK: Location is matched from 9 to 9
; CHECK: Location is matched from 10 to 10
; CHECK: Location is matched from 11 to 11

; CHECK: Callsite with callee:foo is matched from 13 to 6
; CHECK: Location is rematched backwards from 7 to 0
; CHECK: Location is rematched backwards from 8 to 1
; CHECK: Location is rematched backwards from 9 to 2
; CHECK: Location is rematched backwards from 10 to 3
; CHECK: Location is rematched backwards from 11 to 4
; CHECK: Callsite with callee:bar is matched from 14 to 7
; CHECK: Callsite with callee:foo is matched from 15 to 8
; CHECK: Callsite with callee:bar is matched from 16 to 9


; CHECK: 2: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
; CHECK: 3: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
; CHECK: 4: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 116 - factor: 1.00)
; CHECK: 5: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 5, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00)
; CHECK: 1: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
; CHECK: 2: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 101 - factor: 1.00)
; CHECK: 5: %call.i3 = call i32 @bar(i32 noundef %1), !dbg ![[#]] - weight: 101 - factor: 1.00)
; CHECK: 3: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 13 - factor: 1.00)
; CHECK: 6: %call1.i6 = call i32 @bar(i32 noundef %add.i5), !dbg ![[#]] - weight: 13 - factor: 1.00)
; CHECK: 4: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
; CHECK: 14: %call2 = call i32 @bar(i32 noundef %3), !dbg ![[#]] - weight: 124 - factor: 1.00)
; CHECK: 8: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00)
; CHECK: 1: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 117 - factor: 1.00)
; CHECK: 2: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 104 - factor: 1.00)
; CHECK: 5: %call.i = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 104 - factor: 1.00)
; CHECK: 3: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 13 - factor: 1.00)
; CHECK: 6: %call1.i = call i32 @bar(i32 noundef %add.i), !dbg ![[#]] - weight: 14 - factor: 1.00)
; CHECK: 4: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 121 - factor: 1.00)
; CHECK: 16: %call9 = call i32 @bar(i32 noundef %7), !dbg ![[#]] - weight: 126 - factor: 1.00)
; CHECK: 9: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
; CHECK: 10: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 10, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
; CHECK: 11: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 11, i32 0, i64 -1), !dbg ![[#]] - weight: 116 - factor: 1.00)
; CHECK: 1: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00)


target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@x = dso_local global i32 1, align 4, !dbg !0

; Function Attrs: noinline nounwind uwtable
define dso_local i32 @bar(i32 noundef %p) #0 !dbg !16 {
entry:
call void @llvm.dbg.value(metadata i32 %p, metadata !20, metadata !DIExpression()), !dbg !21
call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !22
ret i32 %p, !dbg !23
}

; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

; Function Attrs: alwaysinline nounwind uwtable
define dso_local i32 @foo(i32 noundef %i, i32 noundef %p) #2 !dbg !24 {
entry:
call void @llvm.dbg.value(metadata i32 %i, metadata !28, metadata !DIExpression()), !dbg !30
call void @llvm.dbg.value(metadata i32 %p, metadata !29, metadata !DIExpression()), !dbg !30
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg !31
%rem = srem i32 %i, 10, !dbg !33
%tobool = icmp ne i32 %rem, 0, !dbg !33
br i1 %tobool, label %if.then, label %if.else, !dbg !34

if.then: ; preds = %entry
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg !35
%call = call i32 @bar(i32 noundef %p), !dbg !36
br label %return, !dbg !38

if.else: ; preds = %entry
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg !39
%add = add nsw i32 %p, 1, !dbg !40
%call1 = call i32 @bar(i32 noundef %add), !dbg !41
br label %return, !dbg !43

return: ; preds = %if.else, %if.then
%retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ], !dbg !44
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !45
ret i32 %retval.0, !dbg !45
}

; Function Attrs: nounwind uwtable
define dso_local i32 @main() #3 !dbg !46 {
entry:
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !52
%0 = load volatile i32, ptr @x, align 4, !dbg !52, !tbaa !54
%cmp = icmp eq i32 %0, 0, !dbg !58
br i1 %cmp, label %if.then, label %if.end, !dbg !59

if.then: ; preds = %entry
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !60
br label %for.end, !dbg !60

if.end: ; preds = %entry
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !61
call void @llvm.dbg.value(metadata i32 0, metadata !50, metadata !DIExpression()), !dbg !62
br label %for.cond, !dbg !63

for.cond: ; preds = %if.end6, %if.end
%i.0 = phi i32 [ 0, %if.end ], [ %inc, %if.end6 ], !dbg !64
call void @llvm.dbg.value(metadata i32 %i.0, metadata !50, metadata !DIExpression()), !dbg !62
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !65
%cmp1 = icmp slt i32 %i.0, 1000000, !dbg !67
br i1 %cmp1, label %for.body, label %for.cond.cleanup, !dbg !68

for.cond.cleanup: ; preds = %for.cond
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 5, i32 0, i64 -1), !dbg !68
br label %cleanup, !dbg !68

for.body: ; preds = %for.cond
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !69
%1 = load volatile i32, ptr @x, align 4, !dbg !71, !tbaa !54
%call = call i32 @foo(i32 noundef %i.0, i32 noundef %1), !dbg !72
%2 = load volatile i32, ptr @x, align 4, !dbg !74, !tbaa !54
%add = add nsw i32 %2, %call, !dbg !74
store volatile i32 %add, ptr @x, align 4, !dbg !74, !tbaa !54
%3 = load volatile i32, ptr @x, align 4, !dbg !75, !tbaa !54
%call2 = call i32 @bar(i32 noundef %3), !dbg !76
%4 = load volatile i32, ptr @x, align 4, !dbg !78, !tbaa !54
%add3 = add nsw i32 %4, %call2, !dbg !78
store volatile i32 %add3, ptr @x, align 4, !dbg !78, !tbaa !54
br i1 false, label %if.then5, label %if.end6, !dbg !79

if.then5: ; preds = %for.body
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !80
br label %cleanup, !dbg !80

if.end6: ; preds = %for.body
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg !82
%5 = load volatile i32, ptr @x, align 4, !dbg !83, !tbaa !54
%call7 = call i32 @foo(i32 noundef %i.0, i32 noundef %5), !dbg !84
%6 = load volatile i32, ptr @x, align 4, !dbg !86, !tbaa !54
%add8 = add nsw i32 %6, %call7, !dbg !86
store volatile i32 %add8, ptr @x, align 4, !dbg !86, !tbaa !54
%7 = load volatile i32, ptr @x, align 4, !dbg !87, !tbaa !54
%call9 = call i32 @bar(i32 noundef %7), !dbg !88
%8 = load volatile i32, ptr @x, align 4, !dbg !90, !tbaa !54
%add10 = add nsw i32 %8, %call9, !dbg !90
store volatile i32 %add10, ptr @x, align 4, !dbg !90, !tbaa !54
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg !91
%inc = add nsw i32 %i.0, 1, !dbg !91
call void @llvm.dbg.value(metadata i32 %inc, metadata !50, metadata !DIExpression()), !dbg !62
br label %for.cond, !dbg !92, !llvm.loop !93

cleanup: ; preds = %if.then5, %for.cond.cleanup
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 10, i32 0, i64 -1), !dbg !96
br label %for.end

for.end: ; preds = %cleanup, %if.then
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 11, i32 0, i64 -1), !dbg !97
ret i32 0, !dbg !97
}

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #4

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #4

; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) #1

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #5

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare void @llvm.dbg.value(metadata, metadata, metadata) #6

attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #2 = { alwaysinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
attributes #3 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #5 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
attributes #6 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!7, !8, !9, !10, !11}
!llvm.ident = !{!12}
!llvm.pseudo_probe_desc = !{!13, !14, !15}

!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 17.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
!3 = !DIFile(filename: "test.c", directory: "path")
!4 = !{!0}
!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!7 = !{i32 7, !"Dwarf Version", i32 5}
!8 = !{i32 2, !"Debug Info Version", i32 3}
!9 = !{i32 1, !"wchar_size", i32 4}
!10 = !{i32 7, !"uwtable", i32 2}
!11 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
!12 = !{!"clang version 17.0.0"}
!13 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
!14 = !{i64 6699318081062747564, i64 563022570642068, !"foo"}
!15 = !{i64 -2624081020897602054, i64 1126158552146340, !"main"}
!16 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 2, type: !17, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !19)
!17 = !DISubroutineType(types: !18)
!18 = !{!6, !6}
!19 = !{!20}
!20 = !DILocalVariable(name: "p", arg: 1, scope: !16, file: !3, line: 2, type: !6)
!21 = !DILocation(line: 0, scope: !16)
!22 = !DILocation(line: 3, column: 10, scope: !16)
!23 = !DILocation(line: 3, column: 3, scope: !16)
!24 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 6, type: !25, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !27)
!25 = !DISubroutineType(types: !26)
!26 = !{!6, !6, !6}
!27 = !{!28, !29}
!28 = !DILocalVariable(name: "i", arg: 1, scope: !24, file: !3, line: 6, type: !6)
!29 = !DILocalVariable(name: "p", arg: 2, scope: !24, file: !3, line: 6, type: !6)
!30 = !DILocation(line: 0, scope: !24)
!31 = !DILocation(line: 7, column: 6, scope: !32)
!32 = distinct !DILexicalBlock(scope: !24, file: !3, line: 7, column: 6)
!33 = !DILocation(line: 7, column: 8, scope: !32)
!34 = !DILocation(line: 7, column: 6, scope: !24)
!35 = !DILocation(line: 7, column: 26, scope: !32)
!36 = !DILocation(line: 7, column: 22, scope: !37)
!37 = !DILexicalBlockFile(scope: !32, file: !3, discriminator: 186646575)
!38 = !DILocation(line: 7, column: 14, scope: !32)
!39 = !DILocation(line: 8, column: 19, scope: !32)
!40 = !DILocation(line: 8, column: 21, scope: !32)
!41 = !DILocation(line: 8, column: 15, scope: !42)
!42 = !DILexicalBlockFile(scope: !32, file: !3, discriminator: 186646583)
!43 = !DILocation(line: 8, column: 8, scope: !32)
!44 = !DILocation(line: 0, scope: !32)
!45 = !DILocation(line: 9, column: 1, scope: !24)
!46 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !47, scopeLine: 11, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !49)
!47 = !DISubroutineType(types: !48)
!48 = !{!6}
!49 = !{!50}
!50 = !DILocalVariable(name: "i", scope: !51, file: !3, line: 14, type: !6)
!51 = distinct !DILexicalBlock(scope: !46, file: !3, line: 14, column: 3)
!52 = !DILocation(line: 12, column: 6, scope: !53)
!53 = distinct !DILexicalBlock(scope: !46, file: !3, line: 12, column: 6)
!54 = !{!55, !55, i64 0}
!55 = !{!"int", !56, i64 0}
!56 = !{!"omnipotent char", !57, i64 0}
!57 = !{!"Simple C/C++ TBAA"}
!58 = !DILocation(line: 12, column: 8, scope: !53)
!59 = !DILocation(line: 12, column: 6, scope: !46)
!60 = !DILocation(line: 13, column: 5, scope: !53)
!61 = !DILocation(line: 14, column: 11, scope: !51)
!62 = !DILocation(line: 0, scope: !51)
!63 = !DILocation(line: 14, column: 7, scope: !51)
!64 = !DILocation(line: 14, scope: !51)
!65 = !DILocation(line: 14, column: 18, scope: !66)
!66 = distinct !DILexicalBlock(scope: !51, file: !3, line: 14, column: 3)
!67 = !DILocation(line: 14, column: 20, scope: !66)
!68 = !DILocation(line: 14, column: 3, scope: !51)
!69 = !DILocation(line: 15, column: 15, scope: !70)
!70 = distinct !DILexicalBlock(scope: !66, file: !3, line: 14, column: 40)
!71 = !DILocation(line: 15, column: 18, scope: !70)
!72 = !DILocation(line: 15, column: 11, scope: !73)
!73 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646639)
!74 = !DILocation(line: 15, column: 8, scope: !70)
!75 = !DILocation(line: 16, column: 15, scope: !70)
!76 = !DILocation(line: 16, column: 11, scope: !77)
!77 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646647)
!78 = !DILocation(line: 16, column: 8, scope: !70)
!79 = !DILocation(line: 17, column: 9, scope: !70)
!80 = !DILocation(line: 18, column: 8, scope: !81)
!81 = distinct !DILexicalBlock(scope: !70, file: !3, line: 17, column: 9)
!82 = !DILocation(line: 19, column: 15, scope: !70)
!83 = !DILocation(line: 19, column: 18, scope: !70)
!84 = !DILocation(line: 19, column: 11, scope: !85)
!85 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646655)
!86 = !DILocation(line: 19, column: 8, scope: !70)
!87 = !DILocation(line: 20, column: 15, scope: !70)
!88 = !DILocation(line: 20, column: 11, scope: !89)
!89 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646663)
!90 = !DILocation(line: 20, column: 8, scope: !70)
!91 = !DILocation(line: 14, column: 36, scope: !66)
!92 = !DILocation(line: 14, column: 3, scope: !66)
!93 = distinct !{!93, !68, !94, !95}
!94 = !DILocation(line: 21, column: 3, scope: !51)
!95 = !{!"llvm.loop.mustprogress"}
!96 = !DILocation(line: 0, scope: !46)
!97 = !DILocation(line: 22, column: 1, scope: !46)