Fix various problems found by fuzzing.

r4nt · r4nt · commit 84bf5e328664 · 2021-11-22T11:08:38.000+01:00
1. IndexTokenSource::getNextToken cannot return nullptr; some code was
still written assuming it can; make getNextToken more resilient against
incorrect input and fix its call-sites.

2. Change various asserts that can happen due to user provided input to
conditionals in the code.
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
@@ -1986,7 +1986,9 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current,
               Current.Previous->isNot(TT_ImplicitStringLiteral))) {
     if (!Style.ReflowComments ||
         CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
-        switchesFormatting(Current))
+        switchesFormatting(Current) ||
+        !(Current.TokenText.startswith("//") ||
+          Current.TokenText.startswith("#")))
       return nullptr;
     return std::make_unique<BreakableLineCommentSection>(
         Current, StartColumn, /*InPPDirective=*/false, Encoding, Style);
@@ -2195,11 +2197,10 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
       // When breaking before a tab character, it may be moved by a few columns,
       // but will still be expanded to the next tab stop, so we don't save any
       // columns.
-      if (NewRemainingTokenColumns == RemainingTokenColumns) {
+      if (NewRemainingTokenColumns >= RemainingTokenColumns) {
         // FIXME: Do we need to adjust the penalty?
         break;
       }
-      assert(NewRemainingTokenColumns < RemainingTokenColumns);
 
       LLVM_DEBUG(llvm::dbgs() << "    Breaking at: " << TailOffset + Split.first
                               << ", " << Split.second << "\n");
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
@@ -946,11 +946,15 @@ class AnnotatingParser {
                  !Line.First->isOneOf(tok::kw_enum, tok::kw_case,
                                       tok::kw_default)) {
         FormatToken *Prev = Tok->getPreviousNonComment();
+        if (!Prev)
+          break;
         if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept))
           Tok->setType(TT_CtorInitializerColon);
         else if (Prev->is(tok::kw_try)) {
           // Member initializer list within function try block.
           FormatToken *PrevPrev = Prev->getPreviousNonComment();
+          if (!PrevPrev)
+            break;
           if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept))
             Tok->setType(TT_CtorInitializerColon);
         } else
@@ -1578,6 +1582,8 @@ class AnnotatingParser {
         if (TemplateCloser->is(tok::l_paren)) {
           // No Matching Paren yet so skip to matching paren
           TemplateCloser = untilMatchingParen(TemplateCloser);
+          if (!TemplateCloser)
+            break;
         }
         if (TemplateCloser->is(tok::less))
           NestingLevel++;
@@ -2639,8 +2645,8 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
     if (Current->Role)
       Current->Role->precomputeFormattingInfos(Current);
     if (Current->MatchingParen &&
-        Current->MatchingParen->opensBlockOrBlockTypeList(Style)) {
-      assert(IndentLevel > 0);
+        Current->MatchingParen->opensBlockOrBlockTypeList(Style) &&
+        IndentLevel > 0) {
       --IndentLevel;
     }
     Current->IndentLevel = IndentLevel;
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -199,6 +199,8 @@ class IndexedTokenSource : public FormatTokenSource {
       : Tokens(Tokens), Position(-1) {}
 
   FormatToken *getNextToken() override {
+    if (Position >= 0 && Tokens[Position]->is(tok::eof))
+      return Tokens[Position];
     ++Position;
     return Tokens[Position];
   }
@@ -399,7 +401,7 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
       FormatToken *Next;
       do {
         Next = Tokens->getNextToken();
-      } while (Next && Next->is(tok::comment));
+      } while (Next->is(tok::comment));
       FormatTok = Tokens->setPosition(StoredPosition);
       if (Next && Next->isNot(tok::colon)) {
         // default not followed by ':' is not a case label; treat it like
@@ -1097,7 +1099,6 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
 }
 
 void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
-  assert(!FormatTok->is(tok::l_brace));
   if (Style.Language == FormatStyle::LK_TableGen &&
       FormatTok->is(tok::pp_include)) {
     nextToken();
@@ -1488,7 +1489,7 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
           unsigned StoredPosition = Tokens->getPosition();
           FormatToken *Next = Tokens->getNextToken();
           FormatTok = Tokens->setPosition(StoredPosition);
-          if (Next && !mustBeJSIdent(Keywords, Next)) {
+          if (!mustBeJSIdent(Keywords, Next)) {
             nextToken();
             break;
           }
@@ -2653,23 +2654,25 @@ bool UnwrappedLineParser::tryToParseSimpleAttribute() {
   ScopedTokenPosition AutoPosition(Tokens);
   FormatToken *Tok = Tokens->getNextToken();
   // We already read the first [ check for the second.
-  if (Tok && !Tok->is(tok::l_square)) {
+  if (!Tok->is(tok::l_square)) {
     return false;
   }
   // Double check that the attribute is just something
   // fairly simple.
-  while (Tok) {
+  while (Tok->isNot(tok::eof)) {
     if (Tok->is(tok::r_square)) {
       break;
     }
     Tok = Tokens->getNextToken();
   }
+  if (Tok->is(tok::eof))
+    return false;
   Tok = Tokens->getNextToken();
-  if (Tok && !Tok->is(tok::r_square)) {
+  if (!Tok->is(tok::r_square)) {
     return false;
   }
   Tok = Tokens->getNextToken();
-  if (Tok && Tok->is(tok::semi)) {
+  if (Tok->is(tok::semi)) {
     return false;
   }
   return true;
@@ -2682,7 +2685,7 @@ void UnwrappedLineParser::parseJavaEnumBody() {
   unsigned StoredPosition = Tokens->getPosition();
   bool IsSimple = true;
   FormatToken *Tok = Tokens->getNextToken();
-  while (Tok) {
+  while (!Tok->is(tok::eof)) {
     if (Tok->is(tok::r_brace))
       break;
     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
@@ -372,8 +372,6 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End,
     if (ContinuedStringLiteral)
       Changes[i].Spaces += Shift;
 
-    assert(Shift >= 0);
-
     Changes[i].StartOfTokenColumn += Shift;
     if (i + 1 != Changes.size())
       Changes[i + 1].PreviousEndOfTokenColumn += Shift;
@@ -915,7 +913,7 @@ void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
               Changes[i].StartOfBlockComment->StartOfTokenColumn -
               Changes[i].StartOfTokenColumn;
     }
-    assert(Shift >= 0);
+    if (Shift < 0) continue;
     Changes[i].Spaces += Shift;
     if (i + 1 != Changes.size())
       Changes[i + 1].PreviousEndOfTokenColumn += Shift;
@@ -1270,10 +1268,10 @@ WhitespaceManager::linkCells(CellDescriptions &&CellDesc) {
 void WhitespaceManager::generateChanges() {
   for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
     const Change &C = Changes[i];
-    if (i > 0) {
-      assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
-                 C.OriginalWhitespaceRange.getBegin() &&
-             "Generating two replacements for the same location");
+    if (i > 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() ==
+                 C.OriginalWhitespaceRange.getBegin()) {
+      // Do not generate two replacements for the same location.
+      continue;
     }
     if (C.CreateReplacement) {
       std::string ReplacementText = C.PreviousLinePostfix;