Skip to content

Commit

Permalink
Show file tree
Hide file tree
Showing 8 changed files with 40 additions and 705 deletions.
2 changes: 0 additions & 2 deletions llvm/include/llvm/DebugInfo/GSYM/FileWriter.h
Expand Up @@ -113,8 +113,6 @@ class FileWriter {
return OS;
}

llvm::support::endianness getByteOrder() const { return ByteOrder; }

private:
FileWriter(const FileWriter &rhs) = delete;
void operator=(const FileWriter &rhs) = delete;
Expand Down
15 changes: 0 additions & 15 deletions llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
Expand Up @@ -90,10 +90,6 @@ struct FunctionInfo {
uint32_t Name; ///< String table offset in the string table.
std::optional<LineTable> OptLineTable;
std::optional<InlineInfo> Inline;
/// If we encode a FunctionInfo during segmenting so we know its size, we can
/// cache that encoding here so we don't need to re-encode it when saving the
/// GSYM file.
SmallString<32> EncodingCache;

FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
: Range(Addr, Addr + Size), Name(N) {}
Expand Down Expand Up @@ -144,17 +140,6 @@ struct FunctionInfo {
/// function info that was successfully written into the stream.
llvm::Expected<uint64_t> encode(FileWriter &O) const;

/// Encode this function info into the internal byte cache and return the size
/// in bytes.
///
/// When segmenting GSYM files we need to know how big each FunctionInfo will
/// encode into so we can generate segments of the right size. We don't want
/// to have to encode a FunctionInfo twice, so we can cache the encoded bytes
/// and re-use then when calling FunctionInfo::encode(...).
///
/// \returns The size in bytes of the FunctionInfo if it were to be encoded
/// into a byte stream.
uint64_t cacheEncoding();

/// Lookup an address within a FunctionInfo object's data stream.
///
Expand Down
179 changes: 1 addition & 178 deletions llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
Expand Up @@ -137,8 +137,6 @@ class GsymCreator {
StringTableBuilder StrTab;
StringSet<> StringStorage;
DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
// Needed for mapping string offsets back to the string stored in \a StrTab.
DenseMap<uint64_t, CachedHashStringRef> StringOffsetMap;
std::vector<llvm::gsym::FileEntry> Files;
std::vector<uint8_t> UUID;
std::optional<AddressRanges> ValidTextRanges;
Expand All @@ -147,168 +145,15 @@ class GsymCreator {
bool Finalized = false;
bool Quiet;


/// Get the first function start address.
///
/// \returns The start address of the first FunctionInfo or std::nullopt if
/// there are no function infos.
std::optional<uint64_t> getFirstFunctionAddress() const;

/// Get the last function address.
///
/// \returns The start address of the last FunctionInfo or std::nullopt if
/// there are no function infos.
std::optional<uint64_t> getLastFunctionAddress() const;

/// Get the base address to use for this GSYM file.
///
/// \returns The base address to put into the header and to use when creating
/// the address offset table or std::nullpt if there are no valid
/// function infos or if the base address wasn't specified.
std::optional<uint64_t> getBaseAddress() const;

/// Get the size of an address offset in the address offset table.
///
/// GSYM files store offsets from the base address in the address offset table
/// and we store the size of the address offsets in the GSYM header. This
/// function will calculate the size in bytes of these address offsets based
/// on the current contents of the GSYM file.
///
/// \returns The size in byets of the address offsets.
uint8_t getAddressOffsetSize() const;

/// Get the maximum address offset for the current address offset size.
///
/// This is used when creating the address offset table to ensure we have
/// values that are in range so we don't end up truncating address offsets
/// when creating GSYM files as the code evolves.
///
/// \returns The maximum address offset value that will be encoded into a GSYM
/// file.
uint64_t getMaxAddressOffset() const;

/// Calculate the byte size of the GSYM header and tables sizes.
///
/// This function will calculate the exact size in bytes of the encocded GSYM
/// for the following items:
/// - The GSYM header
/// - The Address offset table
/// - The Address info offset table
/// - The file table
/// - The string table
///
/// This is used to help split GSYM files into segments.
///
/// \returns Size in bytes the GSYM header and tables.
uint64_t calculateHeaderAndTableSize() const;

/// Copy a FunctionInfo from the \a SrcGC GSYM creator into this creator.
///
/// Copy the function info and only the needed files and strings and add a
/// converted FunctionInfo into this object. This is used to segment GSYM
/// files into separate files while only transferring the files and strings
/// that are needed from \a SrcGC.
///
/// \param SrcGC The source gsym creator to copy from.
/// \param FuncInfoIdx The function info index within \a SrcGC to copy.
/// \returns The number of bytes it will take to encode the function info in
/// this GsymCreator. This helps calculate the size of the current GSYM
/// segment file.
uint64_t copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncInfoIdx);

/// Copy a string from \a SrcGC into this object.
///
/// Copy a string from \a SrcGC by string table offset into this GSYM creator.
/// If a string has already been copied, the uniqued string table offset will
/// be returned, otherwise the string will be copied and a unique offset will
/// be returned.
///
/// \param SrcGC The source gsym creator to copy from.
/// \param StrOff The string table offset from \a SrcGC to copy.
/// \returns The new string table offset of the string within this object.
uint32_t copyString(const GsymCreator &SrcGC, uint32_t StrOff);

/// Copy a file from \a SrcGC into this object.
///
/// Copy a file from \a SrcGC by file index into this GSYM creator. Files
/// consist of two string table entries, one for the directory and one for the
/// filename, this function will copy any needed strings ensure the file is
/// uniqued within this object. If a file already exists in this GSYM creator
/// the uniqued index will be returned, else the stirngs will be copied and
/// the new file index will be returned.
///
/// \param SrcGC The source gsym creator to copy from.
/// \param FileIdx The 1 based file table index within \a SrcGC to copy. A
/// file index of zero will always return zero as the zero is a reserved file
/// index that means no file.
/// \returns The new file index of the file within this object.
uint32_t copyFile(const GsymCreator &SrcGC, uint32_t FileIdx);

/// Inserts a FileEntry into the file table.
///
/// This is used to insert a file entry in a thread safe way into this object.
///
/// \param FE A file entry object that contains valid string table offsets
/// from this object already.
uint32_t insertFileEntry(FileEntry FE);

/// Fixup any string and file references by updating any file indexes and
/// strings offsets in the InlineInfo parameter.
///
/// When copying InlineInfo entries, we can simply make a copy of the object
/// and then fixup the files and strings for efficiency.
///
/// \param SrcGC The source gsym creator to copy from.
/// \param II The inline info that contains file indexes and string offsets
/// that come from \a SrcGC. The entries will be updated by coping any files
/// and strings over into this object.
void fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II);

/// Get the first function info address from this GSYM file.
///
/// This is used to add a suffix to segmented GSYM files to indicate the first
/// address for the first function info within the file.
///
/// \returns The first function info address.
uint64_t getFirstFunctionInfoAddress() const;

/// Save this GSYM file into segments that are roughly \a SegmentSize in size.
///
/// When segemented GSYM files are saved to disk, they will use \a Path as a
/// prefix and then have the first function info address appended to the path
/// when each segment is saved. Each segmented GSYM file has a only the
/// strings and files that are needed to save the function infos that are in
/// each segment. These smaller files are easy to compress and download
/// separately and allow for efficient lookups with very large GSYM files and
/// segmenting them allows servers to download only the segments that are
/// needed.
///
/// \param Path The path prefix to use when saving the GSYM files.
/// \param ByteOrder The endianness to use when saving the file.
/// \param SegmentSize The size in bytes to segment the GSYM file into.
llvm::Error saveSegments(StringRef Path,
llvm::support::endianness ByteOrder,
uint64_t SegmentSize) const;

public:
GsymCreator(bool Quiet = false);

/// Save a GSYM file to a stand alone file.
///
/// \param Path The file path to save the GSYM file to.
/// \param ByteOrder The endianness to use when saving the file.
/// \param SegmentSize The size in bytes to segment the GSYM file into. If
/// this option is set this function will create N segments
/// that are all around \a SegmentSize bytes in size. This
/// allows a very large GSYM file to be broken up into
/// shards. Each GSYM file will have its own file table,
/// and string table that only have the files and strings
/// needed for the shared. If this argument has no value,
/// a single GSYM file that contains all function
/// information will be created.
/// \returns An error object that indicates success or failure of the save.
llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder,
std::optional<uint64_t> SegmentSize = std::nullopt) const;
llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const;

/// Encode a GSYM into the file writer stream at the current position.
///
Expand Down Expand Up @@ -446,28 +291,6 @@ class GsymCreator {

/// Whether the transformation should be quiet, i.e. not output warnings.
bool isQuiet() const { return Quiet; }


/// Create a segmented GSYM creator starting with function info index
/// \a FuncIdx.
///
/// This function will create a GsymCreator object that will encode into
/// roughly \a SegmentSize bytes and return it. It is used by the private
/// saveSegments(...) function and also is used by the GSYM unit tests to test
/// segmenting of GSYM files. The returned GsymCreator can be finalized and
/// encoded.
///
/// \param [in] SegmentSize The size in bytes to roughly segment the GSYM file
/// into.
/// \param [in,out] FuncIdx The index of the first function info to encode
/// into the returned GsymCreator. This index will be updated so it can be
/// used in subsequent calls to this function to allow more segments to be
/// created.
/// \returns An expected unique pointer to a GsymCreator or an error. The
/// returned unique pointer can be NULL if there are no more functions to
/// encode.
llvm::Expected<std::unique_ptr<GsymCreator>>
createSegment(uint64_t SegmentSize, size_t &FuncIdx) const;
};

} // namespace gsym
Expand Down
10 changes: 0 additions & 10 deletions llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
Expand Up @@ -52,16 +52,6 @@ struct LookupResult {
std::string getSourceFile(uint32_t Index) const;
};

inline bool operator==(const LookupResult &LHS, const LookupResult &RHS) {
if (LHS.LookupAddr != RHS.LookupAddr)
return false;
if (LHS.FuncRange != RHS.FuncRange)
return false;
if (LHS.FuncName != RHS.FuncName)
return false;
return LHS.Locations == RHS.Locations;
}

raw_ostream &operator<<(raw_ostream &OS, const LookupResult &R);

} // namespace gsym
Expand Down
64 changes: 19 additions & 45 deletions llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
Expand Up @@ -96,83 +96,57 @@ llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
return std::move(FI);
}

uint64_t FunctionInfo::cacheEncoding() {
EncodingCache.clear();
if (!isValid())
return 0;
raw_svector_ostream OutStrm(EncodingCache);
FileWriter FW(OutStrm, support::endian::system_endianness());
llvm::Expected<uint64_t> Result = encode(FW);
if (!Result) {
EncodingCache.clear();
consumeError(Result.takeError());
return 0;
}
return EncodingCache.size();
}

llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out) const {
llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const {
if (!isValid())
return createStringError(std::errc::invalid_argument,
"attempted to encode invalid FunctionInfo object");
// Align FunctionInfo data to a 4 byte alignment.
Out.alignTo(4);
const uint64_t FuncInfoOffset = Out.tell();
// Check if we have already encoded this function info into EncodingCache.
// This will be non empty when creating segmented GSYM files as we need to
// precompute exactly how big FunctionInfo objects encode into so we can
// accurately make segments of a specific size.
if (!EncodingCache.empty() &&
support::endian::system_endianness() == Out.getByteOrder()) {
// We already encoded this object, just write out the bytes.
Out.writeData(llvm::ArrayRef<uint8_t>((const uint8_t *)EncodingCache.data(),
EncodingCache.size()));
return FuncInfoOffset;
}
O.alignTo(4);
const uint64_t FuncInfoOffset = O.tell();
// Write the size in bytes of this function as a uint32_t. This can be zero
// if we just have a symbol from a symbol table and that symbol has no size.
Out.writeU32(size());
O.writeU32(size());
// Write the name of this function as a uint32_t string table offset.
Out.writeU32(Name);
O.writeU32(Name);

if (OptLineTable) {
Out.writeU32(InfoType::LineTableInfo);
O.writeU32(InfoType::LineTableInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the LineTable out with the number of bytes that were written.
Out.writeU32(0);
const auto StartOffset = Out.tell();
llvm::Error err = OptLineTable->encode(Out, Range.start());
O.writeU32(0);
const auto StartOffset = O.tell();
llvm::Error err = OptLineTable->encode(O, Range.start());
if (err)
return std::move(err);
const auto Length = Out.tell() - StartOffset;
const auto Length = O.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"LineTable length is greater than UINT32_MAX");
// Fixup the size of the LineTable data with the correct size.
Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
}

// Write out the inline function info if we have any and if it is valid.
if (Inline) {
Out.writeU32(InfoType::InlineInfo);
O.writeU32(InfoType::InlineInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the LineTable out with the number of bytes that were written.
Out.writeU32(0);
const auto StartOffset = Out.tell();
llvm::Error err = Inline->encode(Out, Range.start());
O.writeU32(0);
const auto StartOffset = O.tell();
llvm::Error err = Inline->encode(O, Range.start());
if (err)
return std::move(err);
const auto Length = Out.tell() - StartOffset;
const auto Length = O.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"InlineInfo length is greater than UINT32_MAX");
// Fixup the size of the InlineInfo data with the correct size.
Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
}

// Terminate the data chunks with and end of list with zero size
Out.writeU32(InfoType::EndOfList);
Out.writeU32(0);
O.writeU32(InfoType::EndOfList);
O.writeU32(0);
return FuncInfoOffset;
}

Expand Down

0 comments on commit a14e3c2

Please sign in to comment.