Revert "Add the ability to segment GSYM files."

This reverts commit fe75825. This change was causing several buildbot failures: - https://lab.llvm.org/buildbot/#/builders/38/builds/10105 - https://lab.llvm.org/buildbot/#/builders/192/builds/562 - https://lab.llvm.org/buildbot/#/builders/109/builds/58893 - https://lab.llvm.org/buildbot/#/builders/16/builds/44360 - https://lab.llvm.org/buildbot/#/builders/247/builds/2095 - https://lab.llvm.org/buildbot/#/builders/196/builds/27236 - https://lab.llvm.org/buildbot/#/builders/54/builds/3714
llvm · Mar 3, 2023 · a14e3c2 · a14e3c2
1 parent 27ab138
commit a14e3c2
Show file tree

Hide file tree

Showing 8 changed files with 40 additions and 705 deletions.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h b/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h
@@ -113,8 +113,6 @@ class FileWriter {
     return OS;
   }
 
-  llvm::support::endianness getByteOrder() const { return ByteOrder; }
-
 private:
   FileWriter(const FileWriter &rhs) = delete;
   void operator=(const FileWriter &rhs) = delete;

diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -90,10 +90,6 @@ struct FunctionInfo {
   uint32_t Name; ///< String table offset in the string table.
   std::optional<LineTable> OptLineTable;
   std::optional<InlineInfo> Inline;
-  /// If we encode a FunctionInfo during segmenting so we know its size, we can
-  /// cache that encoding here so we don't need to re-encode it when saving the
-  /// GSYM file.
-  SmallString<32> EncodingCache;
 
   FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
       : Range(Addr, Addr + Size), Name(N) {}
@@ -144,17 +140,6 @@ struct FunctionInfo {
   /// function info that was successfully written into the stream.
   llvm::Expected<uint64_t> encode(FileWriter &O) const;
 
-  /// Encode this function info into the internal byte cache and return the size
-  /// in bytes.
-  ///
-  /// When segmenting GSYM files we need to know how big each FunctionInfo will
-  /// encode into so we can generate segments of the right size. We don't want
-  /// to have to encode a FunctionInfo twice, so we can cache the encoded bytes
-  /// and re-use then when calling FunctionInfo::encode(...).
-  ///
-  /// \returns The size in bytes of the FunctionInfo if it were to be encoded
-  /// into a byte stream.
-  uint64_t cacheEncoding();
 
   /// Lookup an address within a FunctionInfo object's data stream.
   ///

diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -137,8 +137,6 @@ class GsymCreator {
   StringTableBuilder StrTab;
   StringSet<> StringStorage;
   DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
-  // Needed for mapping string offsets back to the string stored in \a StrTab.
-  DenseMap<uint64_t, CachedHashStringRef> StringOffsetMap;
   std::vector<llvm::gsym::FileEntry> Files;
   std::vector<uint8_t> UUID;
   std::optional<AddressRanges> ValidTextRanges;
@@ -147,168 +145,15 @@ class GsymCreator {
   bool Finalized = false;
   bool Quiet;
 
-
-  /// Get the first function start address.
-  ///
-  /// \returns The start address of the first FunctionInfo or std::nullopt if
-  /// there are no function infos.
-  std::optional<uint64_t> getFirstFunctionAddress() const;
-
-  /// Get the last function address.
-  ///
-  /// \returns The start address of the last FunctionInfo or std::nullopt if
-  /// there are no function infos.
-  std::optional<uint64_t> getLastFunctionAddress() const;
-
-  /// Get the base address to use for this GSYM file.
-  ///
-  /// \returns The base address to put into the header and to use when creating
-  ///          the address offset table or std::nullpt if there are no valid
-  ///          function infos or if the base address wasn't specified.
-  std::optional<uint64_t> getBaseAddress() const;
-
-  /// Get the size of an address offset in the address offset table.
-  ///
-  /// GSYM files store offsets from the base address in the address offset table
-  /// and we store the size of the address offsets in the GSYM header. This
-  /// function will calculate the size in bytes of these address offsets based
-  /// on the current contents of the GSYM file.
-  ///
-  /// \returns The size in byets of the address offsets.
-  uint8_t getAddressOffsetSize() const;
-
-  /// Get the maximum address offset for the current address offset size.
-  ///
-  /// This is used when creating the address offset table to ensure we have
-  /// values that are in range so we don't end up truncating address offsets
-  /// when creating GSYM files as the code evolves.
-  ///
-  /// \returns The maximum address offset value that will be encoded into a GSYM
-  /// file.
-  uint64_t getMaxAddressOffset() const;
-
-  /// Calculate the byte size of the GSYM header and tables sizes.
-  ///
-  /// This function will calculate the exact size in bytes of the encocded GSYM
-  /// for the following items:
-  /// - The GSYM header
-  /// - The Address offset table
-  /// - The Address info offset table
-  /// - The file table
-  /// - The string table
-  ///
-  /// This is used to help split GSYM files into segments.
-  ///
-  /// \returns Size in bytes the GSYM header and tables.
-  uint64_t calculateHeaderAndTableSize() const;
-
-  /// Copy a FunctionInfo from the \a SrcGC GSYM creator into this creator.
-  ///
-  /// Copy the function info and only the needed files and strings and add a
-  /// converted FunctionInfo into this object. This is used to segment GSYM
-  /// files into separate files while only transferring the files and strings
-  /// that are needed from \a SrcGC.
-  ///
-  /// \param SrcGC The source gsym creator to copy from.
-  /// \param FuncInfoIdx The function info index within \a SrcGC to copy.
-  /// \returns The number of bytes it will take to encode the function info in
-  /// this GsymCreator. This helps calculate the size of the current GSYM
-  /// segment file.
-  uint64_t copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncInfoIdx);
-
-  /// Copy a string from \a SrcGC into this object.
-  ///
-  /// Copy a string from \a SrcGC by string table offset into this GSYM creator.
-  /// If a string has already been copied, the uniqued string table offset will
-  /// be returned, otherwise the string will be copied and a unique offset will
-  /// be returned.
-  ///
-  /// \param SrcGC The source gsym creator to copy from.
-  /// \param StrOff The string table offset from \a SrcGC to copy.
-  /// \returns The new string table offset of the string within this object.
-  uint32_t copyString(const GsymCreator &SrcGC, uint32_t StrOff);
-
-  /// Copy a file from \a SrcGC into this object.
-  ///
-  /// Copy a file from \a SrcGC by file index into this GSYM creator. Files
-  /// consist of two string table entries, one for the directory and one for the
-  /// filename, this function will copy any needed strings ensure the file is
-  /// uniqued within this object. If a file already exists in this GSYM creator
-  /// the uniqued index will be returned, else the stirngs will be copied and
-  /// the new file index will be returned.
-  ///
-  /// \param SrcGC The source gsym creator to copy from.
-  /// \param FileIdx The 1 based file table index within \a SrcGC to copy. A
-  /// file index of zero will always return zero as the zero is a reserved file
-  /// index that means no file.
-  /// \returns The new file index of the file within this object.
-  uint32_t copyFile(const GsymCreator &SrcGC, uint32_t FileIdx);
-
-  /// Inserts a FileEntry into the file table.
-  ///
-  /// This is used to insert a file entry in a thread safe way into this object.
-  ///
-  /// \param FE A file entry object that contains valid string table offsets
-  /// from this object already.
-  uint32_t insertFileEntry(FileEntry FE);
-
-  /// Fixup any string and file references by updating any file indexes and
-  /// strings offsets in the InlineInfo parameter.
-  ///
-  /// When copying InlineInfo entries, we can simply make a copy of the object
-  /// and then fixup the files and strings for efficiency.
-  ///
-  /// \param SrcGC The source gsym creator to copy from.
-  /// \param II The inline info that contains file indexes and string offsets
-  /// that come from \a SrcGC. The entries will be updated by coping any files
-  /// and strings over into this object.
-  void fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II);
-
-  /// Get the first function info address from this GSYM file.
-  ///
-  /// This is used to add a suffix to segmented GSYM files to indicate the first
-  /// address for the first function info within the file.
-  ///
-  /// \returns The first function info address.
-  uint64_t getFirstFunctionInfoAddress() const;
-
-  /// Save this GSYM file into segments that are roughly \a SegmentSize in size.
-  ///
-  /// When segemented GSYM files are saved to disk, they will use \a Path as a
-  /// prefix and then have the first function info address appended to the path
-  /// when each segment is saved. Each segmented GSYM file has a only the
-  /// strings and files that are needed to save the function infos that are in
-  /// each segment. These smaller files are easy to compress and download
-  /// separately and allow for efficient lookups with very large GSYM files and
-  /// segmenting them allows servers to download only the segments that are
-  /// needed.
-  ///
-  /// \param Path The path prefix to use when saving the GSYM files.
-  /// \param ByteOrder The endianness to use when saving the file.
-  /// \param SegmentSize The size in bytes to segment the GSYM file into.
-  llvm::Error saveSegments(StringRef Path,
-                           llvm::support::endianness ByteOrder,
-                           uint64_t SegmentSize) const;
-
 public:
   GsymCreator(bool Quiet = false);
 
   /// Save a GSYM file to a stand alone file.
   ///
   /// \param Path The file path to save the GSYM file to.
   /// \param ByteOrder The endianness to use when saving the file.
-  /// \param SegmentSize The size in bytes to segment the GSYM file into. If
-  ///                    this option is set this function will create N segments
-  ///                    that are all around \a SegmentSize bytes in size. This
-  ///                    allows a very large GSYM file to be broken up into
-  ///                    shards. Each GSYM file will have its own file table,
-  ///                    and string table that only have the files and strings
-  ///                    needed for the shared. If this argument has no value,
-  ///                    a single GSYM file that contains all function
-  ///                    information will be created.
   /// \returns An error object that indicates success or failure of the save.
-  llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder,
-                   std::optional<uint64_t> SegmentSize = std::nullopt) const;
+  llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const;
 
   /// Encode a GSYM into the file writer stream at the current position.
   ///
@@ -446,28 +291,6 @@ class GsymCreator {
 
   /// Whether the transformation should be quiet, i.e. not output warnings.
   bool isQuiet() const { return Quiet; }
-
-
-  /// Create a segmented GSYM creator starting with function info index
-  /// \a FuncIdx.
-  ///
-  /// This function will create a GsymCreator object that will encode into
-  /// roughly \a SegmentSize bytes and return it. It is used by the private
-  /// saveSegments(...) function and also is used by the GSYM unit tests to test
-  /// segmenting of GSYM files. The returned GsymCreator can be finalized and
-  /// encoded.
-  ///
-  /// \param [in] SegmentSize The size in bytes to roughly segment the GSYM file
-  /// into.
-  /// \param [in,out] FuncIdx The index of the first function info to encode
-  /// into the returned GsymCreator. This index will be updated so it can be
-  /// used in subsequent calls to this function to allow more segments to be
-  /// created.
-  /// \returns An expected unique pointer to a GsymCreator or an error. The
-  /// returned unique pointer can be NULL if there are no more functions to
-  /// encode.
-  llvm::Expected<std::unique_ptr<GsymCreator>>
-  createSegment(uint64_t SegmentSize, size_t &FuncIdx) const;
 };
 
 } // namespace gsym

diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
@@ -52,16 +52,6 @@ struct LookupResult {
   std::string getSourceFile(uint32_t Index) const;
 };
 
-inline bool operator==(const LookupResult &LHS, const LookupResult &RHS) {
-  if (LHS.LookupAddr != RHS.LookupAddr)
-    return false;
-  if (LHS.FuncRange != RHS.FuncRange)
-    return false;
-  if (LHS.FuncName != RHS.FuncName)
-    return false;
-  return LHS.Locations == RHS.Locations;
-}
-
 raw_ostream &operator<<(raw_ostream &OS, const LookupResult &R);
 
 } // namespace gsym

diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -96,83 +96,57 @@ llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
   return std::move(FI);
 }
 
-uint64_t FunctionInfo::cacheEncoding() {
-  EncodingCache.clear();
-  if (!isValid())
-    return 0;
-  raw_svector_ostream OutStrm(EncodingCache);
-  FileWriter FW(OutStrm, support::endian::system_endianness());
-  llvm::Expected<uint64_t> Result = encode(FW);
-  if (!Result) {
-    EncodingCache.clear();
-    consumeError(Result.takeError());
-    return 0;
-  }
-  return EncodingCache.size();
-}
-
-llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out) const {
+llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const {
   if (!isValid())
     return createStringError(std::errc::invalid_argument,
         "attempted to encode invalid FunctionInfo object");
   // Align FunctionInfo data to a 4 byte alignment.
-  Out.alignTo(4);
-  const uint64_t FuncInfoOffset = Out.tell();
-  // Check if we have already encoded this function info into EncodingCache.
-  // This will be non empty when creating segmented GSYM files as we need to
-  // precompute exactly how big FunctionInfo objects encode into so we can
-  // accurately make segments of a specific size.
-  if (!EncodingCache.empty() &&
-      support::endian::system_endianness() == Out.getByteOrder()) {
-    // We already encoded this object, just write out the bytes.
-    Out.writeData(llvm::ArrayRef<uint8_t>((const uint8_t *)EncodingCache.data(),
-                                          EncodingCache.size()));
-    return FuncInfoOffset;
-  }
+  O.alignTo(4);
+  const uint64_t FuncInfoOffset = O.tell();
   // Write the size in bytes of this function as a uint32_t. This can be zero
   // if we just have a symbol from a symbol table and that symbol has no size.
-  Out.writeU32(size());
+  O.writeU32(size());
   // Write the name of this function as a uint32_t string table offset.
-  Out.writeU32(Name);
+  O.writeU32(Name);
 
   if (OptLineTable) {
-    Out.writeU32(InfoType::LineTableInfo);
+    O.writeU32(InfoType::LineTableInfo);
     // Write a uint32_t length as zero for now, we will fix this up after
     // writing the LineTable out with the number of bytes that were written.
-    Out.writeU32(0);
-    const auto StartOffset = Out.tell();
-    llvm::Error err = OptLineTable->encode(Out, Range.start());
+    O.writeU32(0);
+    const auto StartOffset = O.tell();
+    llvm::Error err = OptLineTable->encode(O, Range.start());
     if (err)
       return std::move(err);
-    const auto Length = Out.tell() - StartOffset;
+    const auto Length = O.tell() - StartOffset;
     if (Length > UINT32_MAX)
         return createStringError(std::errc::invalid_argument,
             "LineTable length is greater than UINT32_MAX");
     // Fixup the size of the LineTable data with the correct size.
-    Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
+    O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
   }
 
   // Write out the inline function info if we have any and if it is valid.
   if (Inline) {
-    Out.writeU32(InfoType::InlineInfo);
+    O.writeU32(InfoType::InlineInfo);
     // Write a uint32_t length as zero for now, we will fix this up after
     // writing the LineTable out with the number of bytes that were written.
-    Out.writeU32(0);
-    const auto StartOffset = Out.tell();
-    llvm::Error err = Inline->encode(Out, Range.start());
+    O.writeU32(0);
+    const auto StartOffset = O.tell();
+    llvm::Error err = Inline->encode(O, Range.start());
     if (err)
       return std::move(err);
-    const auto Length = Out.tell() - StartOffset;
+    const auto Length = O.tell() - StartOffset;
     if (Length > UINT32_MAX)
         return createStringError(std::errc::invalid_argument,
             "InlineInfo length is greater than UINT32_MAX");
     // Fixup the size of the InlineInfo data with the correct size.
-    Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
+    O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
   }
 
   // Terminate the data chunks with and end of list with zero size
-  Out.writeU32(InfoType::EndOfList);
-  Out.writeU32(0);
+  O.writeU32(InfoType::EndOfList);
+  O.writeU32(0);
   return FuncInfoOffset;
 }