Skip to content

Commit b2780cd

Browse files
committed
clang-format: [JS] handle "off" in imports
Previously, the JavaScript import sorter would ignore `// clang-format off` and `on` comments. This change fixes that. It tracks whether formatting is enabled for a stretch of imports, and then only sorts and merges the imports where formatting is enabled, in individual chunks. This means that there's no meaningful total order when module references are mixed with blocks that have formatting disabled. The alternative approach would have been to sort all imports that have formatting enabled in one group. However that raises the question where to insert the formatting-off block, which can also impact symbol visibility (in particular for exports). In practice, sorting in chunks probably isn't a big problem. This change also simplifies the general algorithm: instead of tracking indices separately and sorting them, it just sorts the vector of module references. And instead of attempting to do fine grained tracking of whether the code changed order, it just prints out the module references text, and compares that to the previous text. Given that source files typically have dozens, but not even hundreds of imports, the performance impact seems negligible. Differential Revision: https://reviews.llvm.org/D101515
1 parent 7861cb6 commit b2780cd

File tree

2 files changed

+150
-45
lines changed

2 files changed

+150
-45
lines changed

clang/lib/Format/SortJavaScriptImports.cpp

Lines changed: 91 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "clang/Basic/LLVM.h"
2020
#include "clang/Basic/SourceLocation.h"
2121
#include "clang/Basic/SourceManager.h"
22+
#include "clang/Basic/TokenKinds.h"
2223
#include "clang/Format/Format.h"
2324
#include "llvm/ADT/STLExtras.h"
2425
#include "llvm/ADT/SmallVector.h"
@@ -69,6 +70,7 @@ struct JsImportedSymbol {
6970
// This struct represents both exports and imports to build up the information
7071
// required for sorting module references.
7172
struct JsModuleReference {
73+
bool FormattingOff = false;
7274
bool IsExport = false;
7375
// Module references are sorted into these categories, in order.
7476
enum ReferenceCategory {
@@ -146,57 +148,51 @@ class JavaScriptImportSorter : public TokenAnalyzer {
146148
if (References.empty())
147149
return {Result, 0};
148150

149-
SmallVector<unsigned, 16> Indices;
150-
for (unsigned i = 0, e = References.size(); i != e; ++i)
151-
Indices.push_back(i);
152-
llvm::stable_sort(Indices, [&](unsigned LHSI, unsigned RHSI) {
153-
return References[LHSI] < References[RHSI];
154-
});
155-
bool ReferencesInOrder = llvm::is_sorted(Indices);
151+
// The text range of all parsed imports, to be replaced later.
152+
SourceRange InsertionPoint = References[0].Range;
153+
InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd());
156154

157-
mergeModuleReferences(References, Indices);
155+
References = sortModuleReferences(References);
158156

159157
std::string ReferencesText;
160-
bool SymbolsInOrder = true;
161-
for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
162-
JsModuleReference Reference = References[Indices[i]];
163-
if (appendReference(ReferencesText, Reference))
164-
SymbolsInOrder = false;
165-
if (i + 1 < e) {
158+
for (unsigned I = 0, E = References.size(); I != E; ++I) {
159+
JsModuleReference Reference = References[I];
160+
appendReference(ReferencesText, Reference);
161+
if (I + 1 < E) {
166162
// Insert breaks between imports and exports.
167163
ReferencesText += "\n";
168164
// Separate imports groups with two line breaks, but keep all exports
169165
// in a single group.
170166
if (!Reference.IsExport &&
171-
(Reference.IsExport != References[Indices[i + 1]].IsExport ||
172-
Reference.Category != References[Indices[i + 1]].Category))
167+
(Reference.IsExport != References[I + 1].IsExport ||
168+
Reference.Category != References[I + 1].Category))
173169
ReferencesText += "\n";
174170
}
175171
}
176-
if (ReferencesInOrder && SymbolsInOrder)
172+
llvm::StringRef PreviousText = getSourceText(InsertionPoint);
173+
if (ReferencesText == PreviousText)
177174
return {Result, 0};
178175

179-
SourceRange InsertionPoint = References[0].Range;
180-
InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd());
181-
182176
// The loop above might collapse previously existing line breaks between
183177
// import blocks, and thus shrink the file. SortIncludes must not shrink
184178
// overall source length as there is currently no re-calculation of ranges
185179
// after applying source sorting.
186180
// This loop just backfills trailing spaces after the imports, which are
187181
// harmless and will be stripped by the subsequent formatting pass.
188182
// FIXME: A better long term fix is to re-calculate Ranges after sorting.
189-
unsigned PreviousSize = getSourceText(InsertionPoint).size();
183+
unsigned PreviousSize = PreviousText.size();
190184
while (ReferencesText.size() < PreviousSize) {
191185
ReferencesText += " ";
192186
}
193187

194188
// Separate references from the main code body of the file.
195-
if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2)
189+
if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2 &&
190+
!(FirstNonImportLine->First->is(tok::comment) &&
191+
FirstNonImportLine->First->TokenText.trim() == "// clang-format on"))
196192
ReferencesText += "\n";
197193

198194
LLVM_DEBUG(llvm::dbgs() << "Replacing imports:\n"
199-
<< getSourceText(InsertionPoint) << "\nwith:\n"
195+
<< PreviousText << "\nwith:\n"
200196
<< ReferencesText << "\n");
201197
auto Err = Result.add(tooling::Replacement(
202198
Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint),
@@ -248,23 +244,53 @@ class JavaScriptImportSorter : public TokenAnalyzer {
248244
SM.getFileOffset(End) - SM.getFileOffset(Begin));
249245
}
250246

247+
// Sorts the given module references.
248+
// Imports can have formatting disabled (FormattingOff), so the code below
249+
// skips runs of "no-formatting" module references, and sorts/merges the
250+
// references that have formatting enabled in individual chunks.
251+
SmallVector<JsModuleReference, 16>
252+
sortModuleReferences(const SmallVector<JsModuleReference, 16> &References) {
253+
// Sort module references.
254+
// Imports can have formatting disabled (FormattingOff), so the code below
255+
// skips runs of "no-formatting" module references, and sorts other
256+
// references per group.
257+
const auto *Start = References.begin();
258+
SmallVector<JsModuleReference, 16> ReferencesSorted;
259+
while (Start != References.end()) {
260+
while (Start != References.end() && Start->FormattingOff) {
261+
// Skip over all imports w/ disabled formatting.
262+
ReferencesSorted.push_back(*Start);
263+
Start++;
264+
}
265+
SmallVector<JsModuleReference, 16> SortChunk;
266+
while (Start != References.end() && !Start->FormattingOff) {
267+
// Skip over all imports w/ disabled formatting.
268+
SortChunk.push_back(*Start);
269+
Start++;
270+
}
271+
llvm::stable_sort(SortChunk);
272+
mergeModuleReferences(SortChunk);
273+
ReferencesSorted.insert(ReferencesSorted.end(), SortChunk.begin(),
274+
SortChunk.end());
275+
}
276+
return ReferencesSorted;
277+
}
278+
251279
// Merge module references.
252280
// After sorting, find all references that import named symbols from the
253281
// same URL and merge their names. E.g.
254282
// import {X} from 'a';
255283
// import {Y} from 'a';
256284
// should be rewritten to:
257285
// import {X, Y} from 'a';
258-
// Note: this modifies the passed in ``Indices`` vector (by removing no longer
259-
// needed references), but not ``References``.
260-
// ``JsModuleReference``s that get merged have the ``SymbolsMerged`` flag
261-
// flipped to true.
262-
void mergeModuleReferences(SmallVector<JsModuleReference, 16> &References,
263-
SmallVector<unsigned, 16> &Indices) {
264-
JsModuleReference *PreviousReference = &References[Indices[0]];
265-
auto *It = std::next(Indices.begin());
266-
while (It != std::end(Indices)) {
267-
JsModuleReference *Reference = &References[*It];
286+
// Note: this modifies the passed in ``References`` vector (by removing no
287+
// longer needed references).
288+
void mergeModuleReferences(SmallVector<JsModuleReference, 16> &References) {
289+
if (References.empty())
290+
return;
291+
JsModuleReference *PreviousReference = References.begin();
292+
auto *Reference = std::next(References.begin());
293+
while (Reference != References.end()) {
268294
// Skip:
269295
// import 'foo';
270296
// import * as foo from 'foo'; on either previous or this.
@@ -278,20 +304,19 @@ class JavaScriptImportSorter : public TokenAnalyzer {
278304
!Reference->DefaultImport.empty() || Reference->Symbols.empty() ||
279305
PreviousReference->URL != Reference->URL) {
280306
PreviousReference = Reference;
281-
++It;
307+
++Reference;
282308
continue;
283309
}
284310
// Merge symbols from identical imports.
285311
PreviousReference->Symbols.append(Reference->Symbols);
286312
PreviousReference->SymbolsMerged = true;
287313
// Remove the merged import.
288-
It = Indices.erase(It);
314+
Reference = References.erase(Reference);
289315
}
290316
}
291317

292-
// Appends ``Reference`` to ``Buffer``, returning true if text within the
293-
// ``Reference`` changed (e.g. symbol order).
294-
bool appendReference(std::string &Buffer, JsModuleReference &Reference) {
318+
// Appends ``Reference`` to ``Buffer``.
319+
void appendReference(std::string &Buffer, JsModuleReference &Reference) {
295320
// Sort the individual symbols within the import.
296321
// E.g. `import {b, a} from 'x';` -> `import {a, b} from 'x';`
297322
SmallVector<JsImportedSymbol, 1> Symbols = Reference.Symbols;
@@ -303,7 +328,7 @@ class JavaScriptImportSorter : public TokenAnalyzer {
303328
// Symbols didn't change, just emit the entire module reference.
304329
StringRef ReferenceStmt = getSourceText(Reference.Range);
305330
Buffer += ReferenceStmt;
306-
return false;
331+
return;
307332
}
308333
// Stitch together the module reference start...
309334
Buffer += getSourceText(Reference.Range.getBegin(), Reference.SymbolsStart);
@@ -315,7 +340,6 @@ class JavaScriptImportSorter : public TokenAnalyzer {
315340
}
316341
// ... followed by the module reference end.
317342
Buffer += getSourceText(Reference.SymbolsEnd, Reference.Range.getEnd());
318-
return true;
319343
}
320344

321345
// Parses module references in the given lines. Returns the module references,
@@ -328,9 +352,30 @@ class JavaScriptImportSorter : public TokenAnalyzer {
328352
SourceLocation Start;
329353
AnnotatedLine *FirstNonImportLine = nullptr;
330354
bool AnyImportAffected = false;
355+
bool FormattingOff = false;
331356
for (auto *Line : AnnotatedLines) {
332357
Current = Line->First;
333358
LineEnd = Line->Last;
359+
// clang-format comments toggle formatting on/off.
360+
// This is tracked in FormattingOff here and on JsModuleReference.
361+
while (Current && Current->is(tok::comment)) {
362+
StringRef CommentText = Current->TokenText.trim();
363+
if (CommentText == "// clang-format off") {
364+
FormattingOff = true;
365+
} else if (CommentText == "// clang-format on") {
366+
FormattingOff = false;
367+
// Special case: consider a trailing "clang-format on" line to be part
368+
// of the module reference, so that it gets moved around together with
369+
// it (as opposed to the next module reference, which might get sorted
370+
// around).
371+
if (!References.empty()) {
372+
References.back().Range.setEnd(Current->Tok.getEndLoc());
373+
Start = Current->Tok.getEndLoc().getLocWithOffset(1);
374+
}
375+
}
376+
// Handle all clang-format comments on a line, e.g. for an empty block.
377+
Current = Current->Next;
378+
}
334379
skipComments();
335380
if (Start.isInvalid() || References.empty())
336381
// After the first file level comment, consider line comments to be part
@@ -343,6 +388,7 @@ class JavaScriptImportSorter : public TokenAnalyzer {
343388
continue;
344389
}
345390
JsModuleReference Reference;
391+
Reference.FormattingOff = FormattingOff;
346392
Reference.Range.setBegin(Start);
347393
if (!parseModuleReference(Keywords, Reference)) {
348394
if (!FirstNonImportLine)
@@ -354,13 +400,14 @@ class JavaScriptImportSorter : public TokenAnalyzer {
354400
Reference.Range.setEnd(LineEnd->Tok.getEndLoc());
355401
LLVM_DEBUG({
356402
llvm::dbgs() << "JsModuleReference: {"
357-
<< "is_export: " << Reference.IsExport
403+
<< "formatting_off: " << Reference.FormattingOff
404+
<< ", is_export: " << Reference.IsExport
358405
<< ", cat: " << Reference.Category
359406
<< ", url: " << Reference.URL
360407
<< ", prefix: " << Reference.Prefix;
361-
for (size_t i = 0; i < Reference.Symbols.size(); ++i)
362-
llvm::dbgs() << ", " << Reference.Symbols[i].Symbol << " as "
363-
<< Reference.Symbols[i].Alias;
408+
for (size_t I = 0; I < Reference.Symbols.size(); ++I)
409+
llvm::dbgs() << ", " << Reference.Symbols[I].Symbol << " as "
410+
<< Reference.Symbols[I].Alias;
364411
llvm::dbgs() << ", text: " << getSourceText(Reference.Range);
365412
llvm::dbgs() << "}\n";
366413
});

clang/unittests/Format/SortImportsTestJS.cpp

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,8 @@ TEST_F(SortImportsTestJS, MergeImports) {
358358

359359
// do not merge imports and exports
360360
verifySort("import {A} from 'foo';\n"
361-
"export {B} from 'foo';",
361+
"\n"
362+
"export {B} from 'foo';\n",
362363
"import {A} from 'foo';\n"
363364
"export {B} from 'foo';");
364365
// do merge exports
@@ -373,6 +374,63 @@ TEST_F(SortImportsTestJS, MergeImports) {
373374
"import './a';\n");
374375
}
375376

377+
TEST_F(SortImportsTestJS, RespectsClangFormatOff) {
378+
verifySort("// clang-format off\n"
379+
"import {B} from './b';\n"
380+
"import {A} from './a';\n"
381+
"// clang-format on\n",
382+
"// clang-format off\n"
383+
"import {B} from './b';\n"
384+
"import {A} from './a';\n"
385+
"// clang-format on\n");
386+
387+
verifySort("import {A} from './sorted1_a';\n"
388+
"import {B} from './sorted1_b';\n"
389+
"// clang-format off\n"
390+
"import {B} from './unsorted_b';\n"
391+
"import {A} from './unsorted_a';\n"
392+
"// clang-format on\n"
393+
"import {A} from './sorted2_a';\n"
394+
"import {B} from './sorted2_b';\n",
395+
"import {B} from './sorted1_b';\n"
396+
"import {A} from './sorted1_a';\n"
397+
"// clang-format off\n"
398+
"import {B} from './unsorted_b';\n"
399+
"import {A} from './unsorted_a';\n"
400+
"// clang-format on\n"
401+
"import {B} from './sorted2_b';\n"
402+
"import {A} from './sorted2_a';\n");
403+
404+
// Boundary cases
405+
verifySort("// clang-format on\n", "// clang-format on\n");
406+
verifySort("// clang-format off\n", "// clang-format off\n");
407+
verifySort("// clang-format on\n"
408+
"// clang-format off\n",
409+
"// clang-format on\n"
410+
"// clang-format off\n");
411+
verifySort("// clang-format off\n"
412+
"// clang-format on\n"
413+
"import {A} from './a';\n"
414+
"import {B} from './b';\n",
415+
"// clang-format off\n"
416+
"// clang-format on\n"
417+
"import {B} from './b';\n"
418+
"import {A} from './a';\n");
419+
// section ends with comment
420+
verifySort("// clang-format on\n"
421+
"import {A} from './a';\n"
422+
"import {B} from './b';\n"
423+
"import {C} from './c';\n"
424+
"\n" // inserted empty line is working as intended: splits imports
425+
// section from main code body
426+
"// clang-format off\n",
427+
"// clang-format on\n"
428+
"import {C} from './c';\n"
429+
"import {B} from './b';\n"
430+
"import {A} from './a';\n"
431+
"// clang-format off\n");
432+
}
433+
376434
} // end namespace
377435
} // end namespace format
378436
} // end namespace clang

0 commit comments

Comments
 (0)