Skip to content

Commit

Permalink
[clangd] Avoid libFormat's objective-c guessing heuristic where possi…
Browse files Browse the repository at this point in the history
…ble (#84133)

This avoids a known libFormat bug where the heuristic can OOM on certain
large files (particularly single-header libraries such as miniaudio.h).

The OOM will still happen on affected files if you actually try to
format them (this is harder to avoid since the underlyting issue affects
the actual formatting logic, not just the language-guessing heuristic),
but at least it's avoided during non-modifying operations like hover,
and modifying operations that do local formatting like code completion.

Fixes clangd/clangd#719
Fixes clangd/clangd#1384
Fixes #70945
  • Loading branch information
HighCommander4 committed Mar 11, 2024
1 parent 561ddb1 commit 3093d73
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 12 deletions.
10 changes: 5 additions & 5 deletions clang-tools-extra/clangd/ClangdServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ void ClangdServer::formatFile(PathRef File, std::optional<Range> Rng,
auto Action = [File = File.str(), Code = std::move(*Code),
Ranges = std::vector<tooling::Range>{RequestedRange},
CB = std::move(CB), this]() mutable {
format::FormatStyle Style = getFormatStyleForFile(File, Code, TFS);
format::FormatStyle Style = getFormatStyleForFile(File, Code, TFS, true);
tooling::Replacements IncludeReplaces =
format::sortIncludes(Style, Code, Ranges, File);
auto Changed = tooling::applyAllReplacements(Code, IncludeReplaces);
Expand Down Expand Up @@ -551,7 +551,7 @@ void ClangdServer::formatOnType(PathRef File, Position Pos,
auto Action = [File = File.str(), Code = std::move(*Code),
TriggerText = TriggerText.str(), CursorPos = *CursorPos,
CB = std::move(CB), this]() mutable {
auto Style = getFormatStyleForFile(File, Code, TFS);
auto Style = getFormatStyleForFile(File, Code, TFS, false);
std::vector<TextEdit> Result;
for (const tooling::Replacement &R :
formatIncremental(Code, CursorPos, TriggerText, Style))
Expand Down Expand Up @@ -605,7 +605,7 @@ void ClangdServer::rename(PathRef File, Position Pos, llvm::StringRef NewName,

if (Opts.WantFormat) {
auto Style = getFormatStyleForFile(File, InpAST->Inputs.Contents,
*InpAST->Inputs.TFS);
*InpAST->Inputs.TFS, false);
llvm::Error Err = llvm::Error::success();
for (auto &E : R->GlobalChanges)
Err =
Expand Down Expand Up @@ -762,7 +762,7 @@ void ClangdServer::applyTweak(PathRef File, Range Sel, StringRef TweakID,
for (auto &It : (*Effect)->ApplyEdits) {
Edit &E = It.second;
format::FormatStyle Style =
getFormatStyleForFile(File, E.InitialCode, TFS);
getFormatStyleForFile(File, E.InitialCode, TFS, false);
if (llvm::Error Err = reformatEdit(E, Style))
elog("Failed to format {0}: {1}", It.first(), std::move(Err));
}
Expand Down Expand Up @@ -825,7 +825,7 @@ void ClangdServer::findHover(PathRef File, Position Pos,
if (!InpAST)
return CB(InpAST.takeError());
format::FormatStyle Style = getFormatStyleForFile(
File, InpAST->Inputs.Contents, *InpAST->Inputs.TFS);
File, InpAST->Inputs.Contents, *InpAST->Inputs.TFS, false);
CB(clangd::getHover(InpAST->AST, Pos, std::move(Style), Index));
};

Expand Down
4 changes: 2 additions & 2 deletions clang-tools-extra/clangd/CodeComplete.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1628,7 +1628,7 @@ class CodeCompleteFlow {
IsUsingDeclaration = Recorder->CCContext.isUsingDeclaration();
auto Style = getFormatStyleForFile(SemaCCInput.FileName,
SemaCCInput.ParseInput.Contents,
*SemaCCInput.ParseInput.TFS);
*SemaCCInput.ParseInput.TFS, false);
const auto NextToken = findTokenAfterCompletionPoint(
Recorder->CCSema->getPreprocessor().getCodeCompletionLoc(),
Recorder->CCSema->getSourceManager(), Recorder->CCSema->LangOpts);
Expand Down Expand Up @@ -1719,7 +1719,7 @@ class CodeCompleteFlow {
ProxSources[FileName].Cost = 0;
FileProximity.emplace(ProxSources);

auto Style = getFormatStyleForFile(FileName, Content, TFS);
auto Style = getFormatStyleForFile(FileName, Content, TFS, false);
// This will only insert verbatim headers.
Inserter.emplace(FileName, Content, Style,
/*BuildDir=*/"", /*HeaderSearchInfo=*/nullptr);
Expand Down
2 changes: 1 addition & 1 deletion clang-tools-extra/clangd/IncludeCleaner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ std::vector<Diag> generateMissingIncludeDiagnostics(
const SourceManager &SM = AST.getSourceManager();
const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());

auto FileStyle = getFormatStyleForFile(AST.tuPath(), Code, TFS);
auto FileStyle = getFormatStyleForFile(AST.tuPath(), Code, TFS, false);

tooling::HeaderIncludes HeaderIncludes(AST.tuPath(), Code,
FileStyle.IncludeStyle);
Expand Down
2 changes: 1 addition & 1 deletion clang-tools-extra/clangd/ParsedAST.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,7 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs,
// (e.g. incomplete type) and attach include insertion fixes to diagnostics.
if (Inputs.Index && !BuildDir.getError()) {
auto Style =
getFormatStyleForFile(Filename, Inputs.Contents, *Inputs.TFS);
getFormatStyleForFile(Filename, Inputs.Contents, *Inputs.TFS, false);
auto Inserter = std::make_shared<IncludeInserter>(
Filename, Inputs.Contents, Style, BuildDir.get(),
&Clang->getPreprocessor().getHeaderSearchInfo());
Expand Down
16 changes: 15 additions & 1 deletion clang-tools-extra/clangd/SourceCode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,21 @@ std::optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {

format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
llvm::StringRef Content,
const ThreadsafeFS &TFS) {
const ThreadsafeFS &TFS,
bool FormatFile) {
// Unless we're formatting a substantial amount of code (the entire file
// or an arbitrarily large range), skip libFormat's heuristic check for
// .h files that tries to determine whether the file contains objective-c
// code. (This is accomplished by passing empty code contents to getStyle().
// The heuristic is the only thing that looks at the contents.)
// This is a workaround for PR60151, a known issue in libFormat where this
// heuristic can OOM on large files. If we *are* formatting the entire file,
// there's no point in doing this because the actual format::reformat() call
// will run into the same OOM; we'd just be risking inconsistencies between
// clangd and clang-format on smaller .h files where they disagree on what
// language is detected.
if (!FormatFile)
Content = {};
auto Style = format::getStyle(format::DefaultFormatStyle, File,
format::DefaultFallbackStyle, Content,
TFS.view(/*CWD=*/std::nullopt).get());
Expand Down
6 changes: 5 additions & 1 deletion clang-tools-extra/clangd/SourceCode.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,13 @@ std::optional<std::string> getCanonicalPath(const FileEntryRef F,
/// FIXME: should we be caching the .clang-format file search?
/// This uses format::DefaultFormatStyle and format::DefaultFallbackStyle,
/// though the latter may have been overridden in main()!
/// \p FormatFile indicates whether the returned FormatStyle is used
/// to format the entire main file (or a range selected by the user
/// which can be arbitrarily long).
format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
llvm::StringRef Content,
const ThreadsafeFS &TFS);
const ThreadsafeFS &TFS,
bool FormatFile);

/// Cleanup and format the given replacements.
llvm::Expected<tooling::Replacements>
Expand Down
2 changes: 1 addition & 1 deletion clang-tools-extra/clangd/tool/Check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ class Checker {

// FIXME: Check that resource-dir/built-in-headers exist?

Style = getFormatStyleForFile(File, Inputs.Contents, TFS);
Style = getFormatStyleForFile(File, Inputs.Contents, TFS, false);

return true;
}
Expand Down
38 changes: 38 additions & 0 deletions clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1090,6 +1090,44 @@ TEST(ApplyEditsTest, EndLineOutOfRange) {
FailedWithMessage("Line value is out of range (100)"));
}

TEST(FormatStyleForFile, LanguageGuessingHeuristic) {
StringRef ObjCContent = "@interface Foo\n@end\n";
StringRef CppContent = "class Foo {};\n";
using LK = format::FormatStyle::LanguageKind;
struct TestCase {
llvm::StringRef Filename;
llvm::StringRef Contents;
bool FormatFile;
LK ExpectedLanguage;
} TestCases[] = {
// If the file extension identifies the file as ObjC, the guessed
// language should be ObjC regardless of content or FormatFile flag.
{"foo.mm", ObjCContent, true, LK::LK_ObjC},
{"foo.mm", ObjCContent, false, LK::LK_ObjC},
{"foo.mm", CppContent, true, LK::LK_ObjC},
{"foo.mm", CppContent, false, LK::LK_ObjC},

// If the file extension is ambiguous like .h, FormatFile=true should
// result in using libFormat's heuristic to guess the language based
// on the file contents.
{"foo.h", ObjCContent, true, LK::LK_ObjC},
{"foo.h", CppContent, true, LK::LK_Cpp},

// With FomatFile=false, the language guessing heuristic should be
// bypassed
{"foo.h", ObjCContent, false, LK::LK_Cpp},
{"foo.h", CppContent, false, LK::LK_Cpp},
};

MockFS FS;
for (const auto &[Filename, Contents, FormatFile, ExpectedLanguage] :
TestCases) {
EXPECT_EQ(
getFormatStyleForFile(Filename, Contents, FS, FormatFile).Language,
ExpectedLanguage);
}
}

} // namespace
} // namespace clangd
} // namespace clang

0 comments on commit 3093d73

Please sign in to comment.