[clang] Add Bytes/Columns types to TextDiagnostic #165541

tbaederr · 2025-10-29T11:02:14Z

In TextDiagnostic.cpp, we're using column- and byte indices everywhere, but we were using integers for them which made it hard to know what to pass where, and what was produced. To make matters worse, that SourceManager considers a "column" is a byte in TextDiagnostic.

Add Bytes and Columns structs, which are not related so API using them can differentiate between values interpreted columns or bytes.

llvmbot · 2025-10-29T11:02:50Z

@llvm/pr-subscribers-clang

Author: Timm Baeder (tbaederr)

Changes

In TextDiagnostic.c, we're using column- and byte indices everywhere, but we were using integers for them which made it hard to know what to pass where, and what was produced. To make matters worse, that SourceManager considers a "column" is a byte in TextDiagnostic.

Add Bytes and Columns structs, which are not related so API using them can differentiate between values interpreted columns or bytes.

Patch is 25.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165541.diff

1 Files Affected:

(modified) clang/lib/Frontend/TextDiagnostic.cpp (+201-141)

diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index f5add2a941f72..c7599db79d8ed 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -47,6 +47,62 @@ static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW;
 static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
 static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
 
+struct Bytes {
+  int V = 0;
+  Bytes(int V) : V(V) {}
+  Bytes(unsigned V) : V(V) {}
+  Bytes(unsigned long V) : V(V) {}
+  Bytes next() const { return Bytes(V + 1); }
+  Bytes prev() const { return Bytes(V - 1); }
+  bool isValid() const { return V != -1; }
+  bool operator>(int V) { return this->V > V; }
+  bool operator>(Bytes O) { return V > O.V; }
+  bool operator<(int V) { return this->V < V; }
+  bool operator<(Bytes O) { return V < O.V; }
+  Bytes &operator++() {
+    ++V;
+    return *this;
+  }
+  Bytes &operator--() {
+    --V;
+    return *this;
+  }
+  bool operator<=(Bytes B) { return V <= B.V; }
+  Bytes operator-(int V) { return Bytes{this->V - V}; }
+  bool operator!=(int V) { return this->V != V; }
+};
+struct Columns {
+  int V = 0;
+  Columns(unsigned V) : V(V) {}
+  Columns(int V) : V(V) {}
+  Columns(size_t V) : V(V) {}
+  bool isValid() const { return V != -1; }
+  Columns operator+(Columns B) { return Columns{V + B.V}; }
+  Columns &operator+=(Columns B) {
+    V += B.V;
+    return *this;
+  }
+  Columns &operator-=(Columns B) {
+    V -= B.V;
+    return *this;
+  }
+  Columns operator-(Columns B) { return Columns{V - B.V}; }
+  bool operator==(int V) { return this->V == V; }
+  bool operator<=(Columns B) { return V <= B.V; }
+  bool operator<(Columns B) { return V < B.V; }
+  bool operator>(Columns B) { return V > B.V; }
+  Columns &operator++() {
+    ++V;
+    return *this;
+  }
+  Columns &operator--() {
+    --V;
+    return *this;
+  }
+  bool operator!=(int V) { return this->V != V; }
+  bool operator!=(Columns C) { return C.V != V; }
+};
+
 /// Add highlights to differences in template strings.
 static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str,
                                       bool &Normal, bool Bold) {
@@ -109,8 +165,8 @@ printableTextForNextCharacter(StringRef SourceLine, size_t *I,
   if (SourceLine[*I] == '\t') {
     assert(0 < TabStop && TabStop <= DiagnosticOptions::MaxTabStop &&
            "Invalid -ftabstop value");
-    unsigned Col = bytesSincePreviousTabOrLineBegin(SourceLine, *I);
-    unsigned NumSpaces = TabStop - (Col % TabStop);
+    unsigned LineBytes = bytesSincePreviousTabOrLineBegin(SourceLine, *I);
+    unsigned NumSpaces = TabStop - (LineBytes % TabStop);
     assert(0 < NumSpaces && NumSpaces <= TabStop
            && "Invalid computation of space amt");
     ++(*I);
@@ -220,33 +276,33 @@ static void expandTabs(std::string &SourceLine, unsigned TabStop) {
 ///  (\\u3042 is represented in UTF-8 by three bytes and takes two columns to
 ///   display)
 static void genColumnByteMapping(StringRef SourceLine, unsigned TabStop,
-                                 SmallVectorImpl<int> &BytesOut,
-                                 SmallVectorImpl<int> &ColumnsOut) {
+                                 SmallVectorImpl<Bytes> &BytesOut,
+                                 SmallVectorImpl<Columns> &ColumnsOut) {
   assert(BytesOut.empty());
   assert(ColumnsOut.empty());
 
   if (SourceLine.empty()) {
-    BytesOut.resize(1u, 0);
-    ColumnsOut.resize(1u, 0);
+    BytesOut.resize(1u, Bytes(0));
+    ColumnsOut.resize(1u, Columns(0));
     return;
   }
 
   ColumnsOut.resize(SourceLine.size() + 1, -1);
 
-  int Columns = 0;
+  int NumColumns = 0;
   size_t I = 0;
   while (I < SourceLine.size()) {
-    ColumnsOut[I] = Columns;
-    BytesOut.resize(Columns + 1, -1);
-    BytesOut.back() = I;
+    ColumnsOut[I] = Columns(NumColumns);
+    BytesOut.resize(NumColumns + 1, -1);
+    BytesOut.back() = Bytes(I);
     auto [Str, Printable] =
         printableTextForNextCharacter(SourceLine, &I, TabStop);
-    Columns += llvm::sys::locale::columnWidth(Str);
+    NumColumns += llvm::sys::locale::columnWidth(Str);
   }
 
-  ColumnsOut.back() = Columns;
-  BytesOut.resize(Columns + 1, -1);
-  BytesOut.back() = I;
+  ColumnsOut.back() = Columns(NumColumns);
+  BytesOut.resize(NumColumns + 1, -1);
+  BytesOut.back() = Bytes(I);
 }
 
 namespace {
@@ -258,47 +314,47 @@ struct SourceColumnMap {
 
     assert(m_byteToColumn.size()==SourceLine.size()+1);
     assert(0 < m_byteToColumn.size() && 0 < m_columnToByte.size());
-    assert(m_byteToColumn.size()
-           == static_cast<unsigned>(m_columnToByte.back()+1));
-    assert(static_cast<unsigned>(m_byteToColumn.back()+1)
-           == m_columnToByte.size());
+    assert(m_byteToColumn.size() ==
+           static_cast<unsigned>(m_columnToByte.back().V + 1));
+    assert(static_cast<unsigned>(m_byteToColumn.back().V + 1) ==
+           m_columnToByte.size());
   }
-  int columns() const { return m_byteToColumn.back(); }
-  int bytes() const { return m_columnToByte.back(); }
+  Columns columns() const { return m_byteToColumn.back(); }
+  Bytes bytes() const { return m_columnToByte.back(); }
 
   /// Map a byte to the column which it is at the start of, or return -1
   /// if it is not at the start of a column (for a UTF-8 trailing byte).
-  int byteToColumn(int n) const {
-    assert(0<=n && n<static_cast<int>(m_byteToColumn.size()));
-    return m_byteToColumn[n];
+  Columns byteToColumn(Bytes N) const {
+    assert(0 <= N.V && N.V < static_cast<int>(m_byteToColumn.size()));
+    return m_byteToColumn[N.V];
   }
 
   /// Map a byte to the first column which contains it.
-  int byteToContainingColumn(int N) const {
-    assert(0 <= N && N < static_cast<int>(m_byteToColumn.size()));
-    while (m_byteToColumn[N] == -1)
-      --N;
-    return m_byteToColumn[N];
+  Columns byteToContainingColumn(Bytes N) const {
+    assert(0 <= N.V && N.V < static_cast<int>(m_byteToColumn.size()));
+    while (!m_byteToColumn[N.V].isValid())
+      --N.V;
+    return m_byteToColumn[N.V];
   }
 
   /// Map a column to the byte which starts the column, or return -1 if
   /// the column the second or subsequent column of an expanded tab or similar
   /// multi-column entity.
-  int columnToByte(int n) const {
-    assert(0<=n && n<static_cast<int>(m_columnToByte.size()));
-    return m_columnToByte[n];
+  Bytes columnToByte(Columns N) const {
+    assert(0 <= N.V && N.V < static_cast<int>(m_columnToByte.size()));
+    return m_columnToByte[N.V];
   }
 
   /// Map from a byte index to the next byte which starts a column.
-  int startOfNextColumn(int N) const {
-    assert(0 <= N && N < static_cast<int>(m_byteToColumn.size() - 1));
+  Bytes startOfNextColumn(Bytes N) const {
+    assert(0 <= N.V && N.V < static_cast<int>(m_byteToColumn.size() - 1));
     while (byteToColumn(++N) == -1) {}
     return N;
   }
 
   /// Map from a byte index to the previous byte which starts a column.
-  int startOfPreviousColumn(int N) const {
-    assert(0 < N && N < static_cast<int>(m_byteToColumn.size()));
+  Bytes startOfPreviousColumn(Bytes N) const {
+    assert(0 < N.V && N.V < static_cast<int>(m_byteToColumn.size()));
     while (byteToColumn(--N) == -1) {}
     return N;
   }
@@ -308,9 +364,9 @@ struct SourceColumnMap {
   }
 
 private:
-  const std::string m_SourceLine;
-  SmallVector<int,200> m_byteToColumn;
-  SmallVector<int,200> m_columnToByte;
+  StringRef m_SourceLine;
+  SmallVector<Columns, 200> m_byteToColumn;
+  SmallVector<Bytes, 200> m_columnToByte;
 };
 } // end anonymous namespace
 
@@ -319,14 +375,15 @@ struct SourceColumnMap {
 static void selectInterestingSourceRegion(std::string &SourceLine,
                                           std::string &CaretLine,
                                           std::string &FixItInsertionLine,
-                                          unsigned Columns,
+                                          unsigned NonGutterColumns,
                                           const SourceColumnMap &map) {
-  unsigned CaretColumns = CaretLine.size();
-  unsigned FixItColumns = llvm::sys::locale::columnWidth(FixItInsertionLine);
-  unsigned MaxColumns = std::max(static_cast<unsigned>(map.columns()),
-                                 std::max(CaretColumns, FixItColumns));
+  Columns CaretColumns = Columns{CaretLine.size()};
+  Columns FixItColumns =
+      Columns(llvm::sys::locale::columnWidth(FixItInsertionLine));
+  unsigned MaxColumns =
+      std::max(map.columns().V, std::max(CaretColumns.V, FixItColumns.V));
   // if the number of columns is less than the desired number we're done
-  if (MaxColumns <= Columns)
+  if (MaxColumns <= NonGutterColumns)
     return;
 
   // No special characters are allowed in CaretLine.
@@ -334,13 +391,13 @@ static void selectInterestingSourceRegion(std::string &SourceLine,
 
   // Find the slice that we need to display the full caret line
   // correctly.
-  unsigned CaretStart = 0, CaretEnd = CaretLine.size();
+  Columns CaretStart = 0, CaretEnd = CaretLine.size();
   for (; CaretStart != CaretEnd; ++CaretStart)
-    if (!isWhitespace(CaretLine[CaretStart]))
+    if (!isWhitespace(CaretLine[CaretStart.V]))
       break;
 
   for (; CaretEnd != CaretStart; --CaretEnd)
-    if (!isWhitespace(CaretLine[CaretEnd - 1]))
+    if (!isWhitespace(CaretLine[CaretEnd.V - 1]))
       break;
 
   // caret has already been inserted into CaretLine so the above whitespace
@@ -361,27 +418,25 @@ static void selectInterestingSourceRegion(std::string &SourceLine,
     // We can safely use the byte offset FixItStart as the column offset
     // because the characters up until FixItStart are all ASCII whitespace
     // characters.
-    unsigned FixItStartCol = FixItStart;
-    unsigned FixItEndCol
-      = llvm::sys::locale::columnWidth(FixItInsertionLine.substr(0, FixItEnd));
+    Columns FixItStartCol = FixItStart;
+    Columns FixItEndCol = Columns(
+        llvm::sys::locale::columnWidth(FixItInsertionLine.substr(0, FixItEnd)));
 
-    CaretStart = std::min(FixItStartCol, CaretStart);
-    CaretEnd = std::max(FixItEndCol, CaretEnd);
+    CaretStart = Columns(std::min(FixItStartCol.V, CaretStart.V));
+    CaretEnd = Columns(std::max(FixItEndCol.V, CaretEnd.V));
   }
 
   // CaretEnd may have been set at the middle of a character
   // If it's not at a character's first column then advance it past the current
   //   character.
-  while (static_cast<int>(CaretEnd) < map.columns() &&
-         -1 == map.columnToByte(CaretEnd))
+  while (CaretEnd < map.columns() && !map.columnToByte(CaretEnd).isValid())
     ++CaretEnd;
 
-  assert((static_cast<int>(CaretStart) > map.columns() ||
-          -1!=map.columnToByte(CaretStart)) &&
-         "CaretStart must not point to a column in the middle of a source"
-         " line character");
-  assert((static_cast<int>(CaretEnd) > map.columns() ||
-          -1!=map.columnToByte(CaretEnd)) &&
+  assert(
+      (CaretStart > map.columns() || map.columnToByte(CaretStart).isValid()) &&
+      "CaretStart must not point to a column in the middle of a source"
+      " line character");
+  assert((CaretEnd > map.columns() || map.columnToByte(CaretEnd).isValid()) &&
          "CaretEnd must not point to a column in the middle of a source line"
          " character");
 
@@ -390,70 +445,72 @@ static void selectInterestingSourceRegion(std::string &SourceLine,
   // number of columns we have, try to grow the slice to encompass
   // more context.
 
-  unsigned SourceStart = map.columnToByte(std::min<unsigned>(CaretStart,
-                                                             map.columns()));
-  unsigned SourceEnd = map.columnToByte(std::min<unsigned>(CaretEnd,
-                                                           map.columns()));
+  Bytes SourceStart =
+      map.columnToByte(Columns(std::min<int>(CaretStart.V, map.columns().V)));
+  Bytes SourceEnd =
+      map.columnToByte(Columns(std::min<int>(CaretEnd.V, map.columns().V)));
 
-  unsigned CaretColumnsOutsideSource = CaretEnd-CaretStart
-    - (map.byteToColumn(SourceEnd)-map.byteToColumn(SourceStart));
+  Columns CaretColumnsOutsideSource =
+      CaretEnd - CaretStart -
+      (map.byteToColumn(SourceEnd) - map.byteToColumn(SourceStart));
 
   char const *front_ellipse = "  ...";
   char const *front_space   = "     ";
   char const *back_ellipse = "...";
-  unsigned ellipses_space = strlen(front_ellipse) + strlen(back_ellipse);
+  Columns EllipsesColumns =
+      Columns(strlen(front_ellipse) + strlen(back_ellipse));
 
-  unsigned TargetColumns = Columns;
+  Columns TargetColumns = Columns(NonGutterColumns);
   // Give us extra room for the ellipses
   //  and any of the caret line that extends past the source
-  if (TargetColumns > ellipses_space+CaretColumnsOutsideSource)
-    TargetColumns -= ellipses_space+CaretColumnsOutsideSource;
+  if (TargetColumns > EllipsesColumns + CaretColumnsOutsideSource)
+    TargetColumns -= EllipsesColumns + CaretColumnsOutsideSource;
 
-  while (SourceStart>0 || SourceEnd<SourceLine.size()) {
+  while (SourceStart > 0 || SourceEnd < SourceLine.size()) {
     bool ExpandedRegion = false;
 
-    if (SourceStart>0) {
-      unsigned NewStart = map.startOfPreviousColumn(SourceStart);
+    if (SourceStart > 0) {
+      Bytes NewStart = map.startOfPreviousColumn(SourceStart);
 
       // Skip over any whitespace we see here; we're looking for
       // another bit of interesting text.
       // FIXME: Detect non-ASCII whitespace characters too.
-      while (NewStart && isWhitespace(SourceLine[NewStart]))
+      while (NewStart > 0 && isWhitespace(SourceLine[NewStart.V]))
         NewStart = map.startOfPreviousColumn(NewStart);
 
       // Skip over this bit of "interesting" text.
-      while (NewStart) {
-        unsigned Prev = map.startOfPreviousColumn(NewStart);
-        if (isWhitespace(SourceLine[Prev]))
+      while (NewStart > 0) {
+        Bytes Prev = map.startOfPreviousColumn(NewStart);
+        if (isWhitespace(SourceLine[Prev.V]))
           break;
         NewStart = Prev;
       }
 
-      assert(map.byteToColumn(NewStart) != -1);
-      unsigned NewColumns = map.byteToColumn(SourceEnd) -
-                              map.byteToColumn(NewStart);
+      assert(map.byteToColumn(NewStart).isValid());
+      Columns NewColumns =
+          map.byteToColumn(SourceEnd) - map.byteToColumn(NewStart);
       if (NewColumns <= TargetColumns) {
         SourceStart = NewStart;
         ExpandedRegion = true;
       }
     }
 
-    if (SourceEnd<SourceLine.size()) {
-      unsigned NewEnd = map.startOfNextColumn(SourceEnd);
+    if (SourceEnd < SourceLine.size()) {
+      Bytes NewEnd = map.startOfNextColumn(SourceEnd);
 
       // Skip over any whitespace we see here; we're looking for
       // another bit of interesting text.
       // FIXME: Detect non-ASCII whitespace characters too.
-      while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd]))
+      while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd.V]))
         NewEnd = map.startOfNextColumn(NewEnd);
 
       // Skip over this bit of "interesting" text.
-      while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd]))
+      while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd.V]))
         NewEnd = map.startOfNextColumn(NewEnd);
 
-      assert(map.byteToColumn(NewEnd) != -1);
-      unsigned NewColumns = map.byteToColumn(NewEnd) -
-                              map.byteToColumn(SourceStart);
+      assert(map.byteToColumn(NewEnd).isValid());
+      Columns NewColumns =
+          map.byteToColumn(NewEnd) - map.byteToColumn(SourceStart);
       if (NewColumns <= TargetColumns) {
         SourceEnd = NewEnd;
         ExpandedRegion = true;
@@ -469,34 +526,36 @@ static void selectInterestingSourceRegion(std::string &SourceLine,
 
   // [CaretStart, CaretEnd) is the slice we want. Update the various
   // output lines to show only this slice.
-  assert(CaretStart!=(unsigned)-1 && CaretEnd!=(unsigned)-1 &&
-         SourceStart!=(unsigned)-1 && SourceEnd!=(unsigned)-1);
+  assert(CaretStart.isValid() && CaretEnd.isValid() && SourceStart.isValid() &&
+         SourceEnd.isValid());
   assert(SourceStart <= SourceEnd);
   assert(CaretStart <= CaretEnd);
 
-  unsigned BackColumnsRemoved
-    = map.byteToColumn(SourceLine.size())-map.byteToColumn(SourceEnd);
-  unsigned FrontColumnsRemoved = CaretStart;
-  unsigned ColumnsKept = CaretEnd-CaretStart;
+  Columns BackColumnsRemoved =
+      map.byteToColumn(Bytes{static_cast<int>(SourceLine.size())}) -
+      map.byteToColumn(SourceEnd);
+  Columns FrontColumnsRemoved = CaretStart;
+  Columns ColumnsKept = CaretEnd - CaretStart;
 
   // We checked up front that the line needed truncation
-  assert(FrontColumnsRemoved+ColumnsKept+BackColumnsRemoved > Columns);
+  assert(FrontColumnsRemoved + ColumnsKept + BackColumnsRemoved >
+         NonGutterColumns);
 
   // The line needs some truncation, and we'd prefer to keep the front
   //  if possible, so remove the back
-  if (BackColumnsRemoved > strlen(back_ellipse))
-    SourceLine.replace(SourceEnd, std::string::npos, back_ellipse);
+  if (BackColumnsRemoved > Columns(strlen(back_ellipse)))
+    SourceLine.replace(SourceEnd.V, std::string::npos, back_ellipse);
 
   // If that's enough then we're done
-  if (FrontColumnsRemoved+ColumnsKept <= Columns)
+  if (FrontColumnsRemoved + ColumnsKept <= Columns(NonGutterColumns))
     return;
 
   // Otherwise remove the front as well
-  if (FrontColumnsRemoved > strlen(front_ellipse)) {
-    SourceLine.replace(0, SourceStart, front_ellipse);
-    CaretLine.replace(0, CaretStart, front_space);
+  if (FrontColumnsRemoved > Columns(strlen(front_ellipse))) {
+    SourceLine.replace(0, SourceStart.V, front_ellipse);
+    CaretLine.replace(0, CaretStart.V, front_space);
     if (!FixItInsertionLine.empty())
-      FixItInsertionLine.replace(0, CaretStart, front_space);
+      FixItInsertionLine.replace(0, CaretStart.V, front_space);
   }
 }
 
@@ -961,41 +1020,41 @@ maybeAddRange(std::pair<unsigned, unsigned> A, std::pair<unsigned, unsigned> B,
 
 struct LineRange {
   unsigned LineNo;
-  unsigned StartCol;
-  unsigned EndCol;
+  Bytes StartCol; // FIXME: RENAME
+  Bytes EndCol;   // FIXME: RENAME
 };
 
 /// Highlight \p R (with ~'s) on the current source line.
 static void highlightRange(const LineRange &R, const SourceColumnMap &Map,
                            std::string &CaretLine) {
   // Pick the first non-whitespace column.
-  unsigned StartColNo = R.StartCol;
-  while (StartColNo < Map.getSourceLine().size() &&
-         (Map.getSourceLine()[StartColNo] == ' ' ||
-          Map.getSourceLine()[StartColNo] == '\t'))
-    StartColNo = Map.startOfNextColumn(StartColNo);
+  Bytes StartByte = R.StartCol;
+  while (StartByte < Map.bytes() && (Map.getSourceLine()[StartByte.V] == ' ' ||
+                                     Map.getSourceLine()[StartByte.V] == '\t'))
+    StartByte = Map.startOfNextColumn(StartByte);
 
   // Pick the last non-whitespace column.
-  unsigned EndColNo =
-      std::min(static_cast<size_t>(R.EndCol), Map.getSourceLine().size());
-  while (EndColNo && (Map.getSourceLine()[EndColNo - 1] == ' ' ||
-                      Map.getSourceLine()[EndColNo - 1] == '\t'))
-    EndColNo = Map.startOfPreviousColumn(EndColNo);
+  Bytes EndByte = Bytes(
+      std::min(static_cast<size_t>(R.EndCol.V), Map.getSourceLine().size()));
+  while (EndByte != 0 && (Map.getSourceLine()[EndByte.V - 1] == ' ' ||
+                          Map.getSourceLine()[EndByte.V - 1] == '\t'))
+    EndByte = Map.startOfPreviousColumn(EndByte);
 
   // If the start/end passed each other, then we are trying to highlight a
   // range that just exists in whitespace. That most likely means we have
   // a multi-line highlighting range that covers a blank line.
-  if (StartColNo > EndColNo)
+  if (StartByte > EndByte)
     return;
 
+  assert(StartByte <= EndByte && "Invalid range!");
   // Fill the range with ~'s.
-  StartColNo = Map.byteToContainingColumn(StartColNo);
-  EndColNo = Map.byteToContainingColumn(EndColNo);
+  Columns StartCol = Map.byteToContainingColumn(StartByte);
+  Columns EndCol = Map.byteToContainingColumn(EndByte);
+
+  if (CaretLine.size() < static_cast<size_t>(EndCol.V))
+    CaretLine.resize(EndCol.V, ' ');
 
-  assert...
[truncated]

Sirraide

In my experience, it’s usually easier to (ab)use enums for this, i.e.

enum class Bytes : unsigned;
enum class Columns : unsigned;

and then define additional operators as needed.

tbaederr · 2025-10-29T11:34:18Z

In my experience, it’s usually easier to (ab)use enums for this, i.e.
enum class Bytes : unsigned;
enum class Columns : unsigned;
and then define additional operators as needed.

Well it's not an enumeration and I can't define member functions like that. This is also similar to the Bytes struct I have already added elsewhere

Sirraide · 2025-10-29T11:46:21Z

Well it's not an enumeration and I can't define member functions like that.

Yeah, I missed that you also have isValid() as a member; I thought it was only operators

Sirraide · 2025-10-29T11:47:10Z

That’s still quite a bit of code duplication; it’d be nice to e.g. inherit from some common base class that provides most of the common operators.

Sirraide

Overall this does improve readability quite a bit

clang/lib/Frontend/TextDiagnostic.cpp

Sirraide · 2025-10-29T11:56:41Z

clang/lib/Frontend/TextDiagnostic.cpp

  //  if possible, so remove the back
-  if (BackColumnsRemoved > strlen(back_ellipse))
-    SourceLine.replace(SourceEnd, std::string::npos, back_ellipse);
+  if (BackColumnsRemoved > Columns(strlen(back_ellipse)))


(This is unrelated to this patch, but we should change back_ellipse to be constexpr StringRefs)

clang/lib/Frontend/TextDiagnostic.cpp

tbaederr · 2025-10-29T12:21:57Z

That’s still quite a bit of code duplication; it’d be nice to e.g. inherit from some common base class that provides most of the common operators.

It doesn't buy much since most of the member functions shouldn't return "a byte or a column" but a specific type.

Sirraide · 2025-10-29T12:24:31Z

Wait I think I acidentally edited your comment; goddamnit github

Sirraide · 2025-10-29T12:25:16Z

It doesn't buy much since most of the member functions shouldn't return "a byte or a column" but a specific type.

Ok, I think I managed to revert it. What I meant to do is say that maybe using CRTP would help?

clang/lib/Frontend/TextDiagnostic.cpp

Sirraide

Some more minor things, but otherwise this LGTM. Thanks!

clang/lib/Frontend/TextDiagnostic.cpp

llvm-ci · 2025-10-30T08:47:08Z

LLVM Buildbot has detected a new failure on builder openmp-s390x-linux running on systemz-1 while building clang at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/17652

Here is the relevant piece of the build log for the reference

Step 6 (test-openmp) failure: test (failure)
******************** TEST 'libomp :: tasking/issue-94260-2.c' FAILED ********************
Exit Code: -11

Command Output (stdout):
--
# RUN: at line 1
/home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp   -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic && /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# note: command had no output on stdout or stderr
# error: command failed with exit status: -11

--

********************

llvm-ci · 2025-10-30T09:10:22Z

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-darwin running on doug-worker-3 while building clang at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/23/builds/15110

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'Clang :: Lexer/has_feature_undefined_behavior_sanitizer.cpp' FAILED ********************
Exit Code: 127

Command Output (stdout):
--
# RUN: at line 1
/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=undefined /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o - | /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefix=CHECK-UBSAN /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=undefined /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o -
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefix=CHECK-UBSAN /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# note: command had no output on stdout or stderr
# RUN: at line 2
/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=alignment /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o - | /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefixes=CHECK-UBSAN,CHECK-ALIGNMENT /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=alignment /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o -
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefixes=CHECK-UBSAN,CHECK-ALIGNMENT /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# note: command had no output on stdout or stderr
# RUN: at line 3
/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=bool /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o - | /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefixes=CHECK-UBSAN,CHECK-BOOL /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=bool /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o -
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefixes=CHECK-UBSAN,CHECK-BOOL /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# note: command had no output on stdout or stderr
# RUN: at line 4
/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=builtin /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o - | /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefixes=CHECK-UBSAN,CHECK-BUILTIN /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=builtin /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o -
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefixes=CHECK-UBSAN,CHECK-BUILTIN /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# note: command had no output on stdout or stderr
# RUN: at line 5
/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=array-bounds /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o - | /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefixes=CHECK-UBSAN,CHECK-ARRAY-BOUNDS /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=array-bounds /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o -
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefixes=CHECK-UBSAN,CHECK-ARRAY-BOUNDS /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# note: command had no output on stdout or stderr
# RUN: at line 6
/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=enum /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o - | /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefixes=CHECK-UBSAN,CHECK-ENUM /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=enum /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o -
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefixes=CHECK-UBSAN,CHECK-ENUM /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# note: command had no output on stdout or stderr
# RUN: at line 7
/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=float-cast-overflow /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o - | /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefixes=CHECK-UBSAN,CHECK-FLOAT-CAST-OVERFLOW /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=float-cast-overflow /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o -
# note: command had no output on stdout or stderr
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefixes=CHECK-UBSAN,CHECK-FLOAT-CAST-OVERFLOW /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# note: command had no output on stdout or stderr
# RUN: at line 8
/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=integer-divide-by-zero /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o - | /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck --check-prefixes=CHECK-UBSAN,CHECK-INTEGER-DIVIDE-BY-ZERO /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp
# executed command: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/clang -E -target x86_64-unknown-linux-gnu -fsanitize=integer-divide-by-zero /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/clang/test/Lexer/has_feature_undefined_behavior_sanitizer.cpp -o -
...

In `TextDiagnostic.cpp`, we're using column- and byte indices everywhere, but we were using integers for them which made it hard to know what to pass where, and what was produced. To make matters worse, that `SourceManager` considers a "column" is a byte in `TextDiagnostic`. Add `Bytes` and `Columns` structs, which are not related so API using them can differentiate between values interpreted columns or bytes.

tbaederr requested review from AaronBallman and Sirraide October 29, 2025 11:02

llvmbot added the clang Clang issues not falling into any other category label Oct 29, 2025

tbaederr added the clang:frontend Language frontend issues, e.g. anything involving "Sema" label Oct 29, 2025

tbaederr force-pushed the perf/text-diag branch 2 times, most recently from a01c42a to 72f10ae Compare October 29, 2025 11:10

Sirraide reviewed Oct 29, 2025

View reviewed changes

tbaederr force-pushed the perf/text-diag branch from 72f10ae to d3227fd Compare October 29, 2025 11:25

tbaederr force-pushed the perf/text-diag branch from d3227fd to 795bad3 Compare October 29, 2025 11:39

Sirraide reviewed Oct 29, 2025

View reviewed changes

zwuis reviewed Oct 29, 2025

View reviewed changes

clang/lib/Frontend/TextDiagnostic.cpp Show resolved Hide resolved

clang/lib/Frontend/TextDiagnostic.cpp Outdated Show resolved Hide resolved

tbaederr force-pushed the perf/text-diag branch from 69068ae to 21e9fa7 Compare October 29, 2025 12:44

Sirraide approved these changes Oct 29, 2025

View reviewed changes

clang/lib/Frontend/TextDiagnostic.cpp Outdated Show resolved Hide resolved

clang/lib/Frontend/TextDiagnostic.cpp Outdated Show resolved Hide resolved

clang/lib/Frontend/TextDiagnostic.cpp Outdated Show resolved Hide resolved

tbaederr force-pushed the perf/text-diag branch 5 times, most recently from e49a8cf to d010dc3 Compare October 30, 2025 04:38

[clang] Add Bytes/Column types to TextDiagnostic

ed9f170

tbaederr force-pushed the perf/text-diag branch from d010dc3 to ed9f170 Compare October 30, 2025 07:46

tbaederr changed the title ~~[clang] Add Bytes/Column types to TextDiagnostic~~ [clang] Add Bytes/Columns types to TextDiagnostic Oct 30, 2025

tbaederr merged commit 67db5fd into llvm:main Oct 30, 2025
10 checks passed

[clang] Add Bytes/Columns types to TextDiagnostic #165541

[clang] Add Bytes/Columns types to TextDiagnostic #165541

Conversation

tbaederr commented Oct 29, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Oct 29, 2025

Uh oh!

Sirraide left a comment • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

tbaederr commented Oct 29, 2025

Uh oh!

Sirraide commented Oct 29, 2025

Uh oh!

Sirraide commented Oct 29, 2025

Uh oh!

Sirraide left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Sirraide Oct 29, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

tbaederr commented Oct 29, 2025 • edited by Sirraide Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Sirraide commented Oct 29, 2025

Uh oh!

Sirraide commented Oct 29, 2025

Uh oh!

Uh oh!

Uh oh!

Sirraide left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

llvm-ci commented Oct 30, 2025

Uh oh!

llvm-ci commented Oct 30, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

tbaederr commented Oct 29, 2025 •

edited

Loading

Sirraide left a comment •

edited

Loading

tbaederr commented Oct 29, 2025 •

edited by Sirraide

Loading