[clangd] Duplicate lines of semantic highlightings sent removed.

Summary: Added a class for diffing highlightings and removing duplicate lines. Integrated into the highlighting generation flow. Only works correctly if all tokens are on a single line. Also returns empty lines if the IDE should remove previous highlightings on a line. Reviewers: hokein, sammccall, ilya-biryukov Subscribers: MaskRay, jkorous, mgrang, arphaman, kadircet, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64475 llvm-svn: 367521
llvm · Aug 1, 2019 · c2653ef · c2653ef
1 parent c5877e9
commit c2653ef
Show file tree

Hide file tree

Showing 8 changed files with 334 additions and 42 deletions.
diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -615,6 +615,10 @@ void ClangdLSPServer::onDocumentDidClose(
     std::lock_guard<std::mutex> Lock(FixItsMutex);
     FixItsMap.erase(File);
   }
+  {
+    std::lock_guard<std::mutex> HLock(HighlightingsMutex);
+    FileToHighlightings.erase(File);
+  }
   // clangd will not send updates for this file anymore, so we empty out the
   // list of diagnostics shown on the client (e.g. in the "Problems" pane of
   // VSCode). Note that this cannot race with actual diagnostics responses
@@ -1113,10 +1117,21 @@ bool ClangdLSPServer::shouldRunCompletion(
 }
 
 void ClangdLSPServer::onHighlightingsReady(
-    PathRef File, std::vector<HighlightingToken> Highlightings) {
+    PathRef File, std::vector<HighlightingToken> Highlightings, int NumLines) {
+  std::vector<HighlightingToken> Old;
+  std::vector<HighlightingToken> HighlightingsCopy = Highlightings;
+  {
+    std::lock_guard<std::mutex> Lock(HighlightingsMutex);
+    Old = std::move(FileToHighlightings[File]);
+    FileToHighlightings[File] = std::move(HighlightingsCopy);
+  }
+  // LSP allows us to send incremental edits of highlightings. Also need to diff
+  // to remove highlightings from tokens that should no longer have them.
+  std::vector<LineHighlightings> Diffed =
+      diffHighlightings(Highlightings, Old, NumLines);
   publishSemanticHighlighting(
       {{URIForFile::canonicalize(File, /*TUPath=*/File)},
-       toSemanticHighlightingInformation(Highlightings)});
+       toSemanticHighlightingInformation(Diffed)});
 }
 
 void ClangdLSPServer::onDiagnosticsReady(PathRef File,

diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h
@@ -55,9 +55,9 @@ class ClangdLSPServer : private DiagnosticsConsumer {
   // Implement DiagnosticsConsumer.
   void onDiagnosticsReady(PathRef File, std::vector<Diag> Diagnostics) override;
   void onFileUpdated(PathRef File, const TUStatus &Status) override;
-  void
-  onHighlightingsReady(PathRef File,
-                       std::vector<HighlightingToken> Highlightings) override;
+  void onHighlightingsReady(PathRef File,
+                            std::vector<HighlightingToken> Highlightings,
+                            int NumLines) override;
 
   // LSP methods. Notifications have signature void(const Params&).
   // Calls have signature void(const Params&, Callback<Response>).
@@ -138,6 +138,8 @@ class ClangdLSPServer : private DiagnosticsConsumer {
       DiagnosticToReplacementMap;
   /// Caches FixIts per file and diagnostics
   llvm::StringMap<DiagnosticToReplacementMap> FixItsMap;
+  std::mutex HighlightingsMutex;
+  llvm::StringMap<std::vector<HighlightingToken>> FileToHighlightings;
 
   // Most code should not deal with Transport directly.
   // MessageHandler deals with incoming messages, use call() etc for outgoing.

diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp
@@ -71,10 +71,19 @@ struct UpdateIndexCallbacks : public ParsingCallbacks {
     if (SemanticHighlighting)
       Highlightings = getSemanticHighlightings(AST);
 
+    // FIXME: We need a better way to send the maximum line number to the
+    // differ.
+    // The differ needs the information about the max number of lines
+    // to not send diffs that are outside the file.
+    const SourceManager &SM = AST.getSourceManager();
+    FileID MainFileID = SM.getMainFileID();
+    int NumLines = SM.getBufferData(MainFileID).count('\n') + 1;
+
     Publish([&]() {
       DiagConsumer.onDiagnosticsReady(Path, std::move(Diagnostics));
       if (SemanticHighlighting)
-        DiagConsumer.onHighlightingsReady(Path, std::move(Highlightings));
+        DiagConsumer.onHighlightingsReady(Path, std::move(Highlightings),
+                                          NumLines);
     });
   }
 

diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h
@@ -52,9 +52,12 @@ class DiagnosticsConsumer {
   virtual void onFileUpdated(PathRef File, const TUStatus &Status){};
 
   /// Called by ClangdServer when some \p Highlightings for \p File are ready.
+  /// \p NumLines are the number of lines in the file where the highlightings
+  /// where generated from.
   virtual void
   onHighlightingsReady(PathRef File,
-                       std::vector<HighlightingToken> Highlightings) {}
+                       std::vector<HighlightingToken> Highlightings,
+                       int NumLines) {}
 };
 
 /// When set, used by ClangdServer to get clang-tidy options for each particular

diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp
@@ -12,6 +12,7 @@
 #include "SourceCode.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include <algorithm>
 
 namespace clang {
 namespace clangd {
@@ -33,10 +34,7 @@ class HighlightingTokenCollector
     TraverseAST(Ctx);
     // Initializer lists can give duplicates of tokens, therefore all tokens
     // must be deduplicated.
-    llvm::sort(Tokens,
-               [](const HighlightingToken &L, const HighlightingToken &R) {
-                 return std::tie(L.R, L.Kind) < std::tie(R.R, R.Kind);
-               });
+    llvm::sort(Tokens);
     auto Last = std::unique(Tokens.begin(), Tokens.end());
     Tokens.erase(Last, Tokens.end());
     return Tokens;
@@ -260,33 +258,93 @@ void write16be(uint16_t I, llvm::raw_ostream &OS) {
   llvm::support::endian::write16be(Buf.data(), I);
   OS.write(Buf.data(), Buf.size());
 }
+
+// Get the highlightings on \c Line where the first entry of line is at \c
+// StartLineIt. If it is not at \c StartLineIt an empty vector is returned.
+ArrayRef<HighlightingToken>
+takeLine(ArrayRef<HighlightingToken> AllTokens,
+         ArrayRef<HighlightingToken>::iterator StartLineIt, int Line) {
+  return ArrayRef<HighlightingToken>(StartLineIt, AllTokens.end())
+      .take_while([Line](const HighlightingToken &Token) {
+        return Token.R.start.line == Line;
+      });
+}
 } // namespace
 
-bool operator==(const HighlightingToken &Lhs, const HighlightingToken &Rhs) {
-  return Lhs.Kind == Rhs.Kind && Lhs.R == Rhs.R;
+std::vector<LineHighlightings>
+diffHighlightings(ArrayRef<HighlightingToken> New,
+                  ArrayRef<HighlightingToken> Old, int NewMaxLine) {
+  assert(std::is_sorted(New.begin(), New.end()) && "New must be a sorted vector");
+  assert(std::is_sorted(Old.begin(), Old.end()) && "Old must be a sorted vector");
+
+  // FIXME: There's an edge case when tokens span multiple lines. If the first
+  // token on the line started on a line above the current one and the rest of
+  // the line is the equal to the previous one than we will remove all
+  // highlights but the ones for the token spanning multiple lines. This means
+  // that when we get into the LSP layer the only highlights that will be
+  // visible are the ones for the token spanning multiple lines.
+  // Example:
+  // EndOfMultilineToken  Token Token Token
+  // If "Token Token Token" don't differ from previously the line is
+  // incorrectly removed. Suggestion to fix is to separate any multiline tokens
+  // into one token for every line it covers. This requires reading from the
+  // file buffer to figure out the length of each line though.
+  std::vector<LineHighlightings> DiffedLines;
+  // ArrayRefs to the current line in the highlightings.
+  ArrayRef<HighlightingToken> NewLine(New.begin(),
+                                      /*length*/0UL);
+  ArrayRef<HighlightingToken> OldLine(Old.begin(),
+                                      /*length*/ 0UL);
+  auto NewEnd = New.end();
+  auto OldEnd = Old.end();
+  auto NextLineNumber = [&]() {
+    int NextNew = NewLine.end() != NewEnd ? NewLine.end()->R.start.line
+                                             : std::numeric_limits<int>::max();
+    int NextOld = OldLine.end() != OldEnd ? OldLine.end()->R.start.line
+                                             : std::numeric_limits<int>::max();
+    return std::min(NextNew, NextOld);
+  };
+
+  // If the New file has fewer lines than the Old file we don't want to send
+  // highlightings beyond the end of the file.
+  for (int LineNumber = 0; LineNumber < NewMaxLine;
+       LineNumber = NextLineNumber()) {
+    NewLine = takeLine(New, NewLine.end(), LineNumber);
+    OldLine = takeLine(Old, OldLine.end(), LineNumber);
+    if (NewLine != OldLine)
+      DiffedLines.push_back({LineNumber, NewLine});
+  }
+
+  return DiffedLines;
+}
+
+bool operator==(const HighlightingToken &L, const HighlightingToken &R) {
+  return std::tie(L.R, L.Kind) == std::tie(R.R, R.Kind);
+}
+bool operator<(const HighlightingToken &L, const HighlightingToken &R) {
+  return std::tie(L.R, L.Kind) < std::tie(R.R, R.Kind);
+}
+bool operator==(const LineHighlightings &L, const LineHighlightings &R) {
+  return std::tie(L.Line, L.Tokens) == std::tie(R.Line, R.Tokens);
 }
 
 std::vector<HighlightingToken> getSemanticHighlightings(ParsedAST &AST) {
   return HighlightingTokenCollector(AST).collectTokens();
 }
 
 std::vector<SemanticHighlightingInformation>
-toSemanticHighlightingInformation(llvm::ArrayRef<HighlightingToken> Tokens) {
+toSemanticHighlightingInformation(llvm::ArrayRef<LineHighlightings> Tokens) {
   if (Tokens.size() == 0)
     return {};
 
   // FIXME: Tokens might be multiple lines long (block comments) in this case
   // this needs to add multiple lines for those tokens.
-  std::map<int, std::vector<HighlightingToken>> TokenLines;
-  for (const HighlightingToken &Token : Tokens)
-    TokenLines[Token.R.start.line].push_back(Token);
-
   std::vector<SemanticHighlightingInformation> Lines;
-  Lines.reserve(TokenLines.size());
-  for (const auto &Line : TokenLines) {
+  Lines.reserve(Tokens.size());
+  for (const auto &Line : Tokens) {
     llvm::SmallVector<char, 128> LineByteTokens;
     llvm::raw_svector_ostream OS(LineByteTokens);
-    for (const auto &Token : Line.second) {
+    for (const auto &Token : Line.Tokens) {
       // Writes the token to LineByteTokens in the byte format specified by the
       // LSP proposal. Described below.
       // |<---- 4 bytes ---->|<-- 2 bytes -->|<--- 2 bytes -->|
@@ -297,7 +355,7 @@ toSemanticHighlightingInformation(llvm::ArrayRef<HighlightingToken> Tokens) {
       write16be(static_cast<int>(Token.Kind), OS);
     }
 
-    Lines.push_back({Line.first, encodeBase64(LineByteTokens)});
+    Lines.push_back({Line.Line, encodeBase64(LineByteTokens)});
   }
 
   return Lines;

diff --git a/clang-tools-extra/clangd/SemanticHighlighting.h b/clang-tools-extra/clangd/SemanticHighlighting.h
@@ -43,7 +43,16 @@ struct HighlightingToken {
   Range R;
 };
 
-bool operator==(const HighlightingToken &Lhs, const HighlightingToken &Rhs);
+bool operator==(const HighlightingToken &L, const HighlightingToken &R);
+bool operator<(const HighlightingToken &L, const HighlightingToken &R);
+
+/// Contains all information about highlightings on a single line.
+struct LineHighlightings {
+  int Line;
+  std::vector<HighlightingToken> Tokens;
+};
+
+bool operator==(const LineHighlightings &L, const LineHighlightings &R);
 
 // Returns all HighlightingTokens from an AST. Only generates highlights for the
 // main AST.
@@ -53,9 +62,22 @@ std::vector<HighlightingToken> getSemanticHighlightings(ParsedAST &AST);
 /// (https://manual.macromates.com/en/language_grammars).
 llvm::StringRef toTextMateScope(HighlightingKind Kind);
 
-// Convert to LSP's semantic highlighting information.
+/// Convert to LSP's semantic highlighting information.
 std::vector<SemanticHighlightingInformation>
-toSemanticHighlightingInformation(llvm::ArrayRef<HighlightingToken> Tokens);
+toSemanticHighlightingInformation(llvm::ArrayRef<LineHighlightings> Tokens);
+
+/// Return a line-by-line diff between two highlightings.
+///  - if the tokens on a line are the same in both hightlightings, this line is
+///  omitted.
+///  - if a line exists in New but not in Old the tokens on this line are
+///  emitted.
+///  - if a line does not exists in New but exists in Old an empty line is
+///  emitted (to tell client to clear the previous highlightings on this line).
+/// \p NewMaxLine is the maximum line number from the new file.
+/// REQUIRED: Old and New are sorted.
+std::vector<LineHighlightings>
+diffHighlightings(ArrayRef<HighlightingToken> New,
+                  ArrayRef<HighlightingToken> Old, int NewMaxLine);
 
 } // namespace clangd
 } // namespace clang

diff --git a/clang-tools-extra/clangd/test/semantic-highlighting.test b/clang-tools-extra/clangd/test/semantic-highlighting.test
@@ -49,6 +49,50 @@
 # CHECK-NEXT:  }
 # CHECK-NEXT:}
 ---
+{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///foo2.cpp","languageId":"cpp","version":1,"text":"int x = 2;\nint y = 2;"}}}
+#      CHECK:  "method": "textDocument/semanticHighlighting",
+# CHECK-NEXT:  "params": {
+# CHECK-NEXT:    "lines": [
+# CHECK-NEXT:      {
+# CHECK-NEXT:        "line": 0,
+# CHECK-NEXT:        "tokens": "AAAABAABAAA="
+# CHECK-NEXT:      }
+# CHECK-NEXT:      {
+# CHECK-NEXT:        "line": 1,
+# CHECK-NEXT:        "tokens": "AAAABAABAAA="
+# CHECK-NEXT:      }
+# CHECK-NEXT:    ],
+# CHECK-NEXT:    "textDocument": {
+# CHECK-NEXT:      "uri": "file:///clangd-test/foo2.cpp"
+# CHECK-NEXT:    }
+# CHECK-NEXT:  }
+# CHECK-NEXT:}
+---
+{"jsonrpc":"2.0","method":"textDocument/didChange","params":{"textDocument":{"uri":"test:///foo.cpp","version":2},"contentChanges": [{"range":{"start": {"line": 0,"character": 10},"end": {"line": 0,"character": 10}},"rangeLength": 0,"text": "\nint y = 2;"}]}}
+#      CHECK:  "method": "textDocument/semanticHighlighting",
+# CHECK-NEXT:  "params": {
+# CHECK-NEXT:    "lines": [
+# CHECK-NEXT:      {
+# CHECK-NEXT:        "line": 1,
+# CHECK-NEXT:        "tokens": "AAAABAABAAA="
+# CHECK-NEXT:      }
+# CHECK-NEXT:   ],
+# CHECK-NEXT:    "textDocument": {
+# CHECK-NEXT:      "uri": "file:///clangd-test/foo.cpp"
+# CHECK-NEXT:    }
+# CHECK-NEXT:  }
+# CHECK-NEXT:}
+---
+{"jsonrpc":"2.0","method":"textDocument/didChange","params":{"textDocument":{"uri":"test:///foo.cpp","version":2},"contentChanges": [{"range":{"start": {"line": 0,"character": 10},"end": {"line": 1,"character": 10}},"rangeLength": 11,"text": ""}]}}
+#      CHECK:  "method": "textDocument/semanticHighlighting",
+# CHECK-NEXT:  "params": {
+# CHECK-NEXT:    "lines": [],
+# CHECK-NEXT:    "textDocument": {
+# CHECK-NEXT:      "uri": "file:///clangd-test/foo.cpp"
+# CHECK-NEXT:    }
+# CHECK-NEXT:  }
+# CHECK-NEXT:}
+---
 {"jsonrpc":"2.0","id":3,"method":"shutdown"}
 ---
 {"jsonrpc":"2.0","method":"exit"}