Skip to content

Commit

Permalink
Revert "[llvm-profdata] Refactoring Sample Profile Reader to increase…
Browse files Browse the repository at this point in the history
  • Loading branch information
dyung committed Jun 24, 2023
1 parent a7fb865 commit c9a8a0e
Show file tree
Hide file tree
Showing 14 changed files with 153 additions and 513 deletions.
211 changes: 17 additions & 194 deletions llvm/include/llvm/ProfileData/SampleProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,14 +318,6 @@ struct LineLocationHash {

raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);

static inline hash_code hashFuncName(StringRef F) {
// If function name is already MD5 string, do not hash again.
uint64_t Hash;
if (F.getAsInteger(10, Hash))
Hash = MD5Hash(F);
return Hash;
}

/// Representation of a single sample record.
///
/// A sample record is represented by a positive integer value, which
Expand Down Expand Up @@ -638,13 +630,9 @@ class SampleContext {
return getContextString(FullContext, false);
}

hash_code getHashCode() const {
if (hasContext())
return hash_value(getContextFrames());

// For non-context function name, use its MD5 as hash value, so that it is
// consistent with the profile map's key.
return hashFuncName(getName());
uint64_t getHashCode() const {
return hasContext() ? hash_value(getContextFrames())
: hash_value(getName());
}

/// Set the name of the function and clear the current context.
Expand Down Expand Up @@ -722,12 +710,9 @@ class SampleContext {
uint32_t Attributes;
};

static inline hash_code hash_value(const SampleContext &Context) {
return Context.getHashCode();
}

inline raw_ostream &operator<<(raw_ostream &OS, const SampleContext &Context) {
return OS << Context.toString();
static inline hash_code hash_value(const SampleContext &arg) {
return arg.hasContext() ? hash_value(arg.getContextFrames())
: hash_value(arg.getName());
}

class FunctionSamples;
Expand Down Expand Up @@ -1217,9 +1202,6 @@ class FunctionSamples {
return !(*this == Other);
}

template <typename T>
const T &getKey() const;

private:
/// CFG hash value for the function.
uint64_t FunctionHash = 0;
Expand Down Expand Up @@ -1283,173 +1265,12 @@ class FunctionSamples {
const LocToLocMap *IRToProfileLocationMap = nullptr;
};

template <>
inline const SampleContext &FunctionSamples::getKey<SampleContext>() const {
return getContext();
}

raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);

/// This class is a wrapper to associative container MapT<KeyT, ValueT> using
/// the hash value of the original key as the new key. This greatly improves the
/// performance of insert and query operations especially when hash values of
/// keys are available a priori, and reduces memory usage if KeyT has a large
/// size.
/// When performing any action, if an existing entry with a given key is found,
/// and the interface "KeyT ValueT::getKey<KeyT>() const" to retrieve a value's
/// original key exists, this class checks if the given key actually matches
/// the existing entry's original key. If they do not match, this class behaves
/// as if the entry did not exist (for insertion, this means the new value will
/// replace the existing entry's value, as if it is newly inserted). If
/// ValueT::getKey<KeyT>() is not available, all keys with the same hash value
/// are considered equivalent (i.e. hash collision is silently ignored). Given
/// such feature this class should only be used where it does not affect
/// compilation correctness, for example, when loading a sample profile.
/// Assuming the hashing algorithm is uniform, the probability of hash collision
/// with 1,000,000 entries is
/// (2^64)!/((2^64-1000000)!*(2^64)^1000000) ~= 3*10^-8.
template <template <typename, typename, typename...> typename MapT,
typename KeyT, typename ValueT, typename... MapTArgs>
class HashKeyMap : public MapT<hash_code, ValueT, MapTArgs...> {
public:
using base_type = MapT<hash_code, ValueT, MapTArgs...>;
using key_type = hash_code;
using original_key_type = KeyT;
using mapped_type = ValueT;
using value_type = typename base_type::value_type;
using SampleProfileMap =
std::unordered_map<SampleContext, FunctionSamples, SampleContext::Hash>;

using iterator = typename base_type::iterator;
using const_iterator = typename base_type::const_iterator;

private:
// If the value type has getKey(), retrieve its original key for comparison.
template <typename U = mapped_type,
typename = decltype(U().template getKey<original_key_type>())>
static bool
CheckKeyMatch(const original_key_type &Key, const mapped_type &ExistingValue,
original_key_type *ExistingKeyIfDifferent = nullptr) {
const original_key_type &ExistingKey =
ExistingValue.template getKey<original_key_type>();
bool Result = (Key == ExistingKey);
if (!Result && ExistingKeyIfDifferent)
*ExistingKeyIfDifferent = ExistingKey;
return Result;
}

// If getKey() does not exist, this overload is selected, which assumes all
// keys with the same hash are equivalent.
static bool CheckKeyMatch(...) { return true; }

public:
template <typename... Ts>
std::pair<iterator, bool> try_emplace(const key_type &Hash,
const original_key_type &Key,
Ts &&...Args) {
assert(Hash == hash_value(Key));
auto Ret = base_type::try_emplace(Hash, std::forward<Ts>(Args)...);
if (!Ret.second) {
original_key_type ExistingKey;
if (LLVM_UNLIKELY(!CheckKeyMatch(Key, Ret.first->second, &ExistingKey))) {
dbgs() << "MD5 collision detected: " << Key << " and " << ExistingKey
<< " has same hash value " << Hash << "\n";
Ret.second = true;
Ret.first->second = mapped_type(std::forward<Ts>(Args)...);
}
}
return Ret;
}

template <typename... Ts>
std::pair<iterator, bool> try_emplace(const original_key_type &Key,
Ts &&...Args) {
key_type Hash = hash_value(Key);
return try_emplace(Hash, Key, std::forward<Ts>(Args)...);
}

template <typename... Ts> std::pair<iterator, bool> emplace(Ts &&...Args) {
return try_emplace(std::forward<Ts>(Args)...);
}

mapped_type &operator[](const original_key_type &Key) {
return try_emplace(Key, mapped_type()).first->second;
}

iterator find(const original_key_type &Key) {
key_type Hash = hash_value(Key);
auto It = base_type::find(Hash);
if (It != base_type::end())
if (LLVM_LIKELY(CheckKeyMatch(Key, It->second)))
return It;
return base_type::end();
}

const_iterator find(const original_key_type &Key) const {
key_type Hash = hash_value(Key);
auto It = base_type::find(Hash);
if (It != base_type::end())
if (LLVM_LIKELY(CheckKeyMatch(Key, It->second)))
return It;
return base_type::end();
}

size_t erase(const original_key_type &Ctx) {
auto It = find(Ctx);
if (It != base_type::end()) {
base_type::erase(It);
return 1;
}
return 0;
}
};

/// This class provides operator overloads to the map container using MD5 as the
/// key type, so that existing code can still work in most cases using
/// SampleContext as key.
/// Note: when populating container, make sure to assign the SampleContext to
/// the mapped value immediately because the key no longer holds it.
class SampleProfileMap
: public HashKeyMap<DenseMap, SampleContext, FunctionSamples> {
public:
// Convenience method because this is being used in many places. Set the
// FunctionSamples' context if its newly inserted.
mapped_type &Create(const SampleContext &Ctx) {
auto Ret = try_emplace(Ctx, FunctionSamples());
if (Ret.second)
Ret.first->second.setContext(Ctx);
return Ret.first->second;
}

iterator find(const SampleContext &Ctx) {
return HashKeyMap<DenseMap, SampleContext, FunctionSamples>::find(Ctx);
}

const_iterator find(const SampleContext &Ctx) const {
return HashKeyMap<DenseMap, SampleContext, FunctionSamples>::find(Ctx);
}

// Overloaded find() to lookup a function by name. This is called by IPO
// passes with an actual function name, and it is possible that the profile
// reader converted function names in the profile to MD5 strings, so we need
// to check if either representation matches.
iterator find(StringRef Fname) {
hash_code Hash = hashFuncName(Fname);
auto It = base_type::find(Hash);
if (It != end()) {
StringRef CtxName = It->second.getContext().getName();
if (LLVM_LIKELY(CtxName == Fname || CtxName == std::to_string(Hash)))
return It;
}
return end();
}

size_t erase(const SampleContext &Ctx) {
return HashKeyMap<DenseMap, SampleContext, FunctionSamples>::erase(Ctx);
}

size_t erase(const key_type &Key) { return base_type::erase(Key); }
};

using NameFunctionSamples = std::pair<hash_code, const FunctionSamples *>;
using NameFunctionSamples = std::pair<SampleContext, const FunctionSamples *>;

void sortFuncProfiles(const SampleProfileMap &ProfileMap,
std::vector<NameFunctionSamples> &SortedProfiles);
Expand Down Expand Up @@ -1495,6 +1316,8 @@ class SampleContextTrimmer {
bool MergeColdContext,
uint32_t ColdContextFrameLength,
bool TrimBaseProfileOnly);
// Canonicalize context profile name and attributes.
void canonicalizeContextProfiles();

private:
SampleProfileMap &ProfileMap;
Expand Down Expand Up @@ -1540,12 +1363,12 @@ class ProfileConverter {
SampleProfileMap &OutputProfiles,
bool ProfileIsCS = false) {
if (ProfileIsCS) {
for (const auto &I : InputProfiles) {
// Retain the profile name and clear the full context for each function
// profile.
FunctionSamples &FS = OutputProfiles.Create(I.second.getName());
FS.merge(I.second);
}
for (const auto &I : InputProfiles)
OutputProfiles[I.second.getName()].merge(I.second);
// Retain the profile name and clear the full context for each function
// profile.
for (auto &I : OutputProfiles)
I.second.setContext(SampleContext(I.first));
} else {
for (const auto &I : InputProfiles)
flattenNestedProfile(OutputProfiles, I.second);
Expand Down
52 changes: 24 additions & 28 deletions llvm/include/llvm/ProfileData/SampleProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ class SampleProfileReader {
public:
SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
SampleProfileFormat Format = SPF_None)
: Profiles(), Ctx(C), Buffer(std::move(B)), Format(Format) {}
: Profiles(0), Ctx(C), Buffer(std::move(B)), Format(Format) {}

virtual ~SampleProfileReader() = default;

Expand Down Expand Up @@ -383,8 +383,8 @@ class SampleProfileReader {
/// The implementaion to read sample profiles from the associated file.
virtual std::error_code readImpl() = 0;

/// Print the profile for \p FunctionSamples on stream \p OS.
void dumpFunctionProfile(const FunctionSamples &FS, raw_ostream &OS = dbgs());
/// Print the profile for \p FContext on stream \p OS.
void dumpFunctionProfile(SampleContext FContext, raw_ostream &OS = dbgs());

/// Collect functions with definitions in Module M. For reader which
/// support loading function profiles on demand, return true when the
Expand All @@ -410,15 +410,23 @@ class SampleProfileReader {
/// Return the samples collected for function \p F, create empty
/// FunctionSamples if it doesn't exist.
FunctionSamples *getOrCreateSamplesFor(const Function &F) {
std::string FGUID;
StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
auto it = Profiles.find(CanonName);
if (it != Profiles.end())
return &it->second;
CanonName = getRepInFormat(CanonName, useMD5(), FGUID);
auto It = Profiles.find(CanonName);
if (It != Profiles.end())
return &It->second;
if (!FGUID.empty()) {
assert(useMD5() && "New name should only be generated for md5 profile");
CanonName = *MD5NameBuffer.insert(FGUID).first;
}
return &Profiles[CanonName];
}

/// Return the samples collected for function \p F.
FunctionSamples *getSamplesFor(StringRef Fname) {
virtual FunctionSamples *getSamplesFor(StringRef Fname) {
std::string FGUID;
Fname = getRepInFormat(Fname, useMD5(), FGUID);
auto It = Profiles.find(Fname);
if (It != Profiles.end())
return &It->second;
Expand Down Expand Up @@ -646,16 +654,15 @@ class SampleProfileReaderBinary : public SampleProfileReader {
/// Read the whole name table.
std::error_code readNameTable();

/// Read a string indirectly via the name table. Optionally return the index.
ErrorOr<StringRef> readStringFromTable(size_t *RetIdx = nullptr);
/// Read a string indirectly via the name table.
ErrorOr<StringRef> readStringFromTable();

/// Read a context indirectly via the CSNameTable. Optionally return the
/// index.
ErrorOr<SampleContextFrames> readContextFromTable(size_t *RetIdx = nullptr);
/// Read a context indirectly via the CSNameTable.
ErrorOr<SampleContextFrames> readContextFromTable();

/// Read a context indirectly via the CSNameTable if the profile has context,
/// otherwise same as readStringFromTable, also return its hash value.
ErrorOr<std::pair<SampleContext, hash_code>> readSampleContextFromTable();
/// otherwise same as readStringFromTable.
ErrorOr<SampleContext> readSampleContextFromTable();

/// Points to the current location in the buffer.
const uint8_t *Data = nullptr;
Expand All @@ -675,24 +682,13 @@ class SampleProfileReaderBinary : public SampleProfileReader {
/// the lifetime of MD5StringBuf is not shorter than that of NameTable.
std::vector<std::string> MD5StringBuf;

/// The starting address of fixed length MD5 name table section.
/// The starting address of NameTable containing fixed length MD5.
const uint8_t *MD5NameMemStart = nullptr;

/// CSNameTable is used to save full context vectors. It is the backing buffer
/// for SampleContextFrames.
std::vector<SampleContextFrameVector> CSNameTable;

/// Table to cache MD5 values of sample contexts corresponding to
/// readSampleContextFromTable(), used to index into Profiles or
/// FuncOffsetTable.
std::vector<hash_code> MD5SampleContextTable;

/// The starting address of the table of MD5 values of sample contexts. For
/// fixed length MD5 non-CS profile it is same as MD5NameMemStart because
/// hashes of non-CS contexts are already in the profile. Otherwise it points
/// to the start of MD5SampleContextTable.
const hash_code *MD5SampleContextStart = nullptr;

private:
std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
virtual std::error_code verifySPMagic(uint64_t Magic) = 0;
Expand Down Expand Up @@ -766,10 +762,10 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {

std::unique_ptr<ProfileSymbolList> ProfSymList;

/// The table mapping from a function context's MD5 to the offset of its
/// The table mapping from function context to the offset of its
/// FunctionSample towards file start.
/// At most one of FuncOffsetTable and FuncOffsetList is populated.
DenseMap<hash_code, uint64_t> FuncOffsetTable;
DenseMap<SampleContext, uint64_t> FuncOffsetTable;

/// The list version of FuncOffsetTable. This is used if every entry is
/// being accessed.
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,9 @@ SampleProfileSummaryBuilder::computeSummaryForProfiles(
// profiles before computing profile summary.
if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
!UseContextLessSummary.getNumOccurrences())) {
ProfileConverter::flattenProfile(Profiles, ContextLessProfiles,
/*ProfileIsCS=*/true);
for (const auto &I : Profiles) {
ContextLessProfiles[I.second.getName()].merge(I.second);
}
ProfilesToUse = &ContextLessProfiles;
}

Expand Down
Loading

0 comments on commit c9a8a0e

Please sign in to comment.