[DebugInfo][llvm-dwarfutil] Combine overlapped address ranges.

DWARF files may contain overlapping address ranges. f.e. it can happen if the two copies of the function have identical instruction sequences and they end up sharing. That looks incorrect from the point of view of DWARF spec. Current implementation of DWARFLinker does not combine overlapped address ranges. It would be good if such ranges would be handled in some useful way. Thus, this patch allows DWARFLinker to combine overlapped ranges in a single one. Depends on D86539 Reviewed By: aprantl Differential Revision: https://reviews.llvm.org/D123469
llvm · Jul 21, 2022 · d2a4d6b · d2a4d6b
1 parent b988d8d
commit d2a4d6b
Show file tree

Hide file tree

Showing 14 changed files with 768 additions and 158 deletions.
diff --git a/llvm/include/llvm/ADT/AddressRanges.h b/llvm/include/llvm/ADT/AddressRanges.h
@@ -10,9 +10,10 @@
 #define LLVM_ADT_ADDRESSRANGES_H
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include <cassert>
 #include <stdint.h>
-#include <vector>
 
 namespace llvm {
 
@@ -47,20 +48,29 @@ class AddressRange {
 /// The AddressRanges class helps normalize address range collections.
 /// This class keeps a sorted vector of AddressRange objects and can perform
 /// insertions and searches efficiently. The address ranges are always sorted
-/// and never contain any invalid or empty address ranges. Intersecting
+/// and never contain any invalid or empty address ranges.
+/// Intersecting([100,200), [150,300)) and adjacent([100,200), [200,300))
 /// address ranges are combined during insertion.
 class AddressRanges {
 protected:
-  using Collection = std::vector<AddressRange>;
+  using Collection = SmallVector<AddressRange>;
   Collection Ranges;
 
 public:
   void clear() { Ranges.clear(); }
   bool empty() const { return Ranges.empty(); }
-  bool contains(uint64_t Addr) const;
-  bool contains(AddressRange Range) const;
-  Optional<AddressRange> getRangeThatContains(uint64_t Addr) const;
-  void insert(AddressRange Range);
+  bool contains(uint64_t Addr) const { return find(Addr) != Ranges.end(); }
+  bool contains(AddressRange Range) const {
+    return find(Range) != Ranges.end();
+  }
+  Optional<AddressRange> getRangeThatContains(uint64_t Addr) const {
+    Collection::const_iterator It = find(Addr);
+    if (It == Ranges.end())
+      return None;
+
+    return *It;
+  }
+  Collection::const_iterator insert(AddressRange Range);
   void reserve(size_t Capacity) { Ranges.reserve(Capacity); }
   size_t size() const { return Ranges.size(); }
   bool operator==(const AddressRanges &RHS) const {
@@ -72,6 +82,64 @@ class AddressRanges {
   }
   Collection::const_iterator begin() const { return Ranges.begin(); }
   Collection::const_iterator end() const { return Ranges.end(); }
+
+protected:
+  Collection::const_iterator find(uint64_t Addr) const;
+  Collection::const_iterator find(AddressRange Range) const;
+};
+
+/// AddressRangesMap class maps values to the address ranges.
+/// It keeps address ranges and corresponding values. If ranges
+/// are combined during insertion, then combined range keeps
+/// newly inserted value.
+template <typename T> class AddressRangesMap : protected AddressRanges {
+public:
+  void clear() {
+    Ranges.clear();
+    Values.clear();
+  }
+  bool empty() const { return AddressRanges::empty(); }
+  bool contains(uint64_t Addr) const { return AddressRanges::contains(Addr); }
+  bool contains(AddressRange Range) const {
+    return AddressRanges::contains(Range);
+  }
+  void insert(AddressRange Range, T Value) {
+    size_t InputSize = Ranges.size();
+    Collection::const_iterator RangesIt = AddressRanges::insert(Range);
+    if (RangesIt == Ranges.end())
+      return;
+
+    // make Values match to Ranges.
+    size_t Idx = RangesIt - Ranges.begin();
+    typename ValuesCollection::iterator ValuesIt = Values.begin() + Idx;
+    if (InputSize < Ranges.size())
+      Values.insert(ValuesIt, T());
+    else if (InputSize > Ranges.size())
+      Values.erase(ValuesIt, ValuesIt + InputSize - Ranges.size());
+    assert(Ranges.size() == Values.size());
+
+    // set value to the inserted or combined range.
+    Values[Idx] = Value;
+  }
+  size_t size() const {
+    assert(Ranges.size() == Values.size());
+    return AddressRanges::size();
+  }
+  Optional<std::pair<AddressRange, T>>
+  getRangeValueThatContains(uint64_t Addr) const {
+    Collection::const_iterator It = find(Addr);
+    if (It == Ranges.end())
+      return None;
+
+    return std::make_pair(*It, Values[It - Ranges.begin()]);
+  }
+  std::pair<AddressRange, T> operator[](size_t Idx) const {
+    return std::make_pair(Ranges[Idx], Values[Idx]);
+  }
+
+protected:
+  using ValuesCollection = SmallVector<T>;
+  ValuesCollection Values;
 };
 
 } // namespace llvm

diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_DWARFLINKER_DWARFLINKER_H
 #define LLVM_DWARFLINKER_DWARFLINKER_H
 
+#include "llvm/ADT/AddressRanges.h"
 #include "llvm/CodeGen/AccelTable.h"
 #include "llvm/CodeGen/NonRelocatableStringpool.h"
 #include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h"
@@ -37,25 +38,6 @@ enum class DwarfLinkerAccelTableKind : uint8_t {
   Pub,     ///< .debug_pubnames, .debug_pubtypes
 };
 
-/// Partial address range. Besides an offset, only the
-/// HighPC is stored. The structure is stored in a map where the LowPC is the
-/// key.
-struct ObjFileAddressRange {
-  /// Function HighPC.
-  uint64_t HighPC;
-  /// Offset to apply to the linked address.
-  /// should be 0 for not-linked object file.
-  int64_t Offset;
-
-  ObjFileAddressRange(uint64_t EndPC, int64_t Offset)
-      : HighPC(EndPC), Offset(Offset) {}
-
-  ObjFileAddressRange() : HighPC(0), Offset(0) {}
-};
-
-/// Map LowPC to ObjFileAddressRange.
-using RangesTy = std::map<uint64_t, ObjFileAddressRange>;
-
 /// AddressesMap represents information about valid addresses used
 /// by debug information. Valid addresses are those which points to
 /// live code sections. i.e. relocations for these addresses point
@@ -142,7 +124,7 @@ class DwarfEmitter {
   /// original \p Entries.
   virtual void emitRangesEntries(
       int64_t UnitPcOffset, uint64_t OrigLowPc,
-      const FunctionIntervals::const_iterator &FuncRange,
+      Optional<std::pair<AddressRange, int64_t>> FuncRange,
       const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries,
       unsigned AddressSize) = 0;
 

diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
@@ -9,21 +9,18 @@
 #ifndef LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H
 #define LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H
 
+#include "llvm/ADT/AddressRanges.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IntervalMap.h"
 #include "llvm/CodeGen/DIE.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
 
 namespace llvm {
 
 class DeclContext;
 
-template <typename KeyT, typename ValT>
-using HalfOpenIntervalMap =
-    IntervalMap<KeyT, ValT, IntervalMapImpl::NodeSizer<KeyT, ValT>::LeafSize,
-                IntervalMapHalfOpenInfo<KeyT>>;
-
-using FunctionIntervals = HalfOpenIntervalMap<uint64_t, int64_t>;
+/// Mapped value in the address map is the offset to apply to the
+/// linked address.
+using RangesTy = AddressRangesMap<int64_t>;
 
 // FIXME: Delete this structure.
 struct PatchLocation {
@@ -84,8 +81,7 @@ class CompileUnit {
 
   CompileUnit(DWARFUnit &OrigUnit, unsigned ID, bool CanUseODR,
               StringRef ClangModuleName)
-      : OrigUnit(OrigUnit), ID(ID), Ranges(RangeAlloc),
-        ClangModuleName(ClangModuleName) {
+      : OrigUnit(OrigUnit), ID(ID), ClangModuleName(ClangModuleName) {
     Info.resize(OrigUnit.getNumDIEs());
 
     auto CUDie = OrigUnit.getUnitDIE(false);
@@ -143,7 +139,7 @@ class CompileUnit {
     return UnitRangeAttribute;
   }
 
-  const FunctionIntervals &getFunctionRanges() const { return Ranges; }
+  const RangesTy &getFunctionRanges() const { return Ranges; }
 
   const std::vector<PatchLocation> &getRangesAttributes() const {
     return RangeAttributes;
@@ -182,10 +178,6 @@ class CompileUnit {
   /// offset \p PCOffset.
   void addFunctionRange(uint64_t LowPC, uint64_t HighPC, int64_t PCOffset);
 
-  /// Check whether specified address range \p LowPC \p HighPC
-  /// overlaps with existing function ranges.
-  bool overlapsWithFunctionRanges(uint64_t LowPC, uint64_t HighPC);
-
   /// Keep track of a DW_AT_range attribute that we will need to patch up later.
   void noteRangeAttribute(const DIE &Die, PatchLocation Attr);
 
@@ -270,12 +262,10 @@ class CompileUnit {
       std::tuple<DIE *, const CompileUnit *, DeclContext *, PatchLocation>>
       ForwardDIEReferences;
 
-  FunctionIntervals::Allocator RangeAlloc;
-
-  /// The ranges in that interval map are the PC ranges for
-  /// functions in this unit, associated with the PC offset to apply
-  /// to the addresses to get the linked address.
-  FunctionIntervals Ranges;
+  /// The ranges in that map are the PC ranges for functions in this unit,
+  /// associated with the PC offset to apply to the addresses to get
+  /// the linked address.
+  RangesTy Ranges;
 
   /// The DW_AT_low_pc of each DW_TAG_label.
   SmallDenseMap<uint64_t, uint64_t, 1> Labels;

diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
@@ -96,7 +96,7 @@ class DwarfStreamer : public DwarfEmitter {
   /// original \p Entries.
   void emitRangesEntries(
       int64_t UnitPcOffset, uint64_t OrigLowPc,
-      const FunctionIntervals::const_iterator &FuncRange,
+      Optional<std::pair<AddressRange, int64_t>> FuncRange,
       const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries,
       unsigned AddressSize) override;
 

diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -504,22 +504,14 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE(
                   &DIE);
     return Flags;
   }
-
-  // TODO: Following check is a workaround for overlapping address ranges.
-  //       ELF binaries built with LTO might contain overlapping address
-  //       ranges. The better fix would be to combine such ranges. Following
-  //       is a workaround that should be removed when a good fix is done.
-  if (Unit.overlapsWithFunctionRanges(*LowPc, *HighPc)) {
-    reportWarning(
-        formatv("Overlapping address range [{0:X}, {1:X}]. Range will "
-                "be discarded.\n",
-                *LowPc, *HighPc),
-        File, &DIE);
+  if (*LowPc > *HighPc) {
+    reportWarning("low_pc greater than high_pc. Range will be discarded.\n",
+                  File, &DIE);
     return Flags;
   }
 
   // Replace the debug map range with a more accurate one.
-  Ranges[*LowPc] = ObjFileAddressRange(*HighPc, MyInfo.AddrAdjust);
+  Ranges.insert({*LowPc, *HighPc}, MyInfo.AddrAdjust);
   Unit.addFunctionRange(*LowPc, *HighPc, MyInfo.AddrAdjust);
   return Flags;
 }
@@ -1588,7 +1580,7 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit,
   DWARFDataExtractor RangeExtractor(OrigDwarf.getDWARFObj(),
                                     OrigDwarf.getDWARFObj().getRangesSection(),
                                     OrigDwarf.isLittleEndian(), AddressSize);
-  auto InvalidRange = FunctionRanges.end(), CurrRange = InvalidRange;
+  Optional<std::pair<AddressRange, int64_t>> CurrRange;
   DWARFUnit &OrigUnit = Unit.getOrigUnit();
   auto OrigUnitDie = OrigUnit.getUnitDIE(false);
   uint64_t OrigLowPc =
@@ -1611,12 +1603,11 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit,
     if (!Entries.empty()) {
       const DWARFDebugRangeList::RangeListEntry &First = Entries.front();
 
-      if (CurrRange == InvalidRange ||
-          First.StartAddress + OrigLowPc < CurrRange.start() ||
-          First.StartAddress + OrigLowPc >= CurrRange.stop()) {
-        CurrRange = FunctionRanges.find(First.StartAddress + OrigLowPc);
-        if (CurrRange == InvalidRange ||
-            CurrRange.start() > First.StartAddress + OrigLowPc) {
+      if (!CurrRange ||
+          !CurrRange->first.contains(First.StartAddress + OrigLowPc)) {
+        CurrRange = FunctionRanges.getRangeValueThatContains(
+            First.StartAddress + OrigLowPc);
+        if (!CurrRange) {
           reportWarning("no mapping for range.", File);
           continue;
         }
@@ -1723,7 +1714,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
   // in NewRows.
   std::vector<DWARFDebugLine::Row> Seq;
   const auto &FunctionRanges = Unit.getFunctionRanges();
-  auto InvalidRange = FunctionRanges.end(), CurrRange = InvalidRange;
+  Optional<std::pair<AddressRange, int64_t>> CurrRange;
 
   // FIXME: This logic is meant to generate exactly the same output as
   // Darwin's classic dsymutil. There is a nicer way to implement this
@@ -1742,34 +1733,24 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
     // it is marked as end_sequence in the input (because in that
     // case, the relocation offset is accurate and that entry won't
     // serve as the start of another function).
-    if (CurrRange == InvalidRange || Row.Address.Address < CurrRange.start() ||
-        Row.Address.Address > CurrRange.stop() ||
-        (Row.Address.Address == CurrRange.stop() && !Row.EndSequence)) {
+    if (!CurrRange || !CurrRange->first.contains(Row.Address.Address) ||
+        (Row.Address.Address == CurrRange->first.end() && !Row.EndSequence)) {
       // We just stepped out of a known range. Insert a end_sequence
       // corresponding to the end of the range.
-      uint64_t StopAddress = CurrRange != InvalidRange
-                                 ? CurrRange.stop() + CurrRange.value()
-                                 : -1ULL;
-      CurrRange = FunctionRanges.find(Row.Address.Address);
-      bool CurrRangeValid =
-          CurrRange != InvalidRange && CurrRange.start() <= Row.Address.Address;
-      if (!CurrRangeValid) {
-        CurrRange = InvalidRange;
+      uint64_t StopAddress =
+          CurrRange ? CurrRange->first.end() + CurrRange->second : -1ULL;
+      CurrRange = FunctionRanges.getRangeValueThatContains(Row.Address.Address);
+      if (!CurrRange) {
         if (StopAddress != -1ULL) {
           // Try harder by looking in the Address ranges map.
           // There are corner cases where this finds a
           // valid entry. It's unclear if this is right or wrong, but
           // for now do as dsymutil.
           // FIXME: Understand exactly what cases this addresses and
           // potentially remove it along with the Ranges map.
-          auto Range = Ranges.lower_bound(Row.Address.Address);
-          if (Range != Ranges.begin() && Range != Ranges.end())
-            --Range;
-
-          if (Range != Ranges.end() && Range->first <= Row.Address.Address &&
-              Range->second.HighPC >= Row.Address.Address) {
-            StopAddress = Row.Address.Address + Range->second.Offset;
-          }
+          if (Optional<std::pair<AddressRange, int64_t>> Range =
+                  Ranges.getRangeValueThatContains(Row.Address.Address))
+            StopAddress = Row.Address.Address + (*Range).second;
         }
       }
       if (StopAddress != -1ULL && !Seq.empty()) {
@@ -1785,7 +1766,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
         insertLineSequence(Seq, NewRows);
       }
 
-      if (!CurrRangeValid)
+      if (!CurrRange)
         continue;
     }
 
@@ -1794,7 +1775,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
       continue;
 
     // Relocate row address and add it to the current sequence.
-    Row.Address.Address += CurrRange.value();
+    Row.Address.Address += CurrRange->second;
     Seq.emplace_back(Row);
 
     if (Row.EndSequence)
@@ -1934,11 +1915,9 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File,
     // the function entry point, thus we can't just lookup the address
     // in the debug map. Use the AddressInfo's range map to see if the FDE
     // describes something that we can relocate.
-    auto Range = Ranges.upper_bound(Loc);
-    if (Range != Ranges.begin())
-      --Range;
-    if (Range == Ranges.end() || Range->first > Loc ||
-        Range->second.HighPC <= Loc) {
+    Optional<std::pair<AddressRange, int64_t>> Range =
+        Ranges.getRangeValueThatContains(Loc);
+    if (!Range) {
       // The +4 is to account for the size of the InitialLength field itself.
       InputOffset = EntryOffset + InitialLength + 4;
       continue;
@@ -1966,7 +1945,7 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File,
     // fields that will get reconstructed by emitFDE().
     unsigned FDERemainingBytes = InitialLength - (4 + AddrSize);
     TheDwarfEmitter->emitFDE(IteratorInserted.first->getValue(), AddrSize,
-                             Loc + Range->second.Offset,
+                             Loc + Range->second,
                              FrameData.substr(InputOffset, FDERemainingBytes));
     InputOffset += FDERemainingBytes;
   }