Merge pull request #3562 from zelliott/condense-tokens

[api-extractor] Merge tokens in a separate post-processing method after original construction instead of during construction
microsoft · Aug 10, 2022 · cc9843c · cc9843c
2 parents eaee086 + 2045cc1
commit cc9843c
Show file tree

Hide file tree

Showing 11 changed files with 355 additions and 223 deletions.
diff --git a/apps/api-extractor/src/generators/ExcerptBuilder.ts b/apps/api-extractor/src/generators/ExcerptBuilder.ts
@@ -48,13 +48,6 @@ interface IBuildSpanState {
 
   tokenRangesByNode: Map<ts.Node, IExcerptTokenRange>;
 
-  /**
-   * Normally adjacent tokens of the same kind get merged, to avoid creating lots of unnecessary extra tokens.
-   * However when an captured excerpt needs to start/end at a specific character, we temporarily disable merging by
-   * setting this flag.  After the new token is added, this flag is cleared.
-   */
-  disableMergingForNextToken: boolean;
-
   /**
    * Tracks whether the last appended token was a separator. If so, and we're in the middle of
    * capturing a token range, then omit the separator from the range.
@@ -119,9 +112,9 @@ export class ExcerptBuilder {
       startingNode: span.node,
       stopBeforeChildKind,
       tokenRangesByNode,
-      disableMergingForNextToken: false,
       lastAppendedTokenIsSeparator: false
     });
+    ExcerptBuilder._condenseTokens(excerptTokens, [...tokenRangesByNode.values()]);
   }
 
   public static createEmptyTokenRange(): IExcerptTokenRange {
@@ -141,7 +134,6 @@ export class ExcerptBuilder {
     if (capturedTokenRange) {
       // We will assign capturedTokenRange.startIndex to be the index of the next token to be appended
       excerptStartIndex = excerptTokens.length;
-      state.disableMergingForNextToken = true;
     }
 
     if (span.prefix) {
@@ -159,11 +151,10 @@ export class ExcerptBuilder {
           excerptTokens,
           ExcerptTokenKind.Reference,
           span.prefix,
-          state,
           canonicalReference
         );
       } else {
-        ExcerptBuilder._appendToken(excerptTokens, ExcerptTokenKind.Content, span.prefix, state);
+        ExcerptBuilder._appendToken(excerptTokens, ExcerptTokenKind.Content, span.prefix);
       }
       state.lastAppendedTokenIsSeparator = false;
     }
@@ -182,11 +173,11 @@ export class ExcerptBuilder {
     }
 
     if (span.suffix) {
-      ExcerptBuilder._appendToken(excerptTokens, ExcerptTokenKind.Content, span.suffix, state);
+      ExcerptBuilder._appendToken(excerptTokens, ExcerptTokenKind.Content, span.suffix);
       state.lastAppendedTokenIsSeparator = false;
     }
     if (span.separator) {
-      ExcerptBuilder._appendToken(excerptTokens, ExcerptTokenKind.Content, span.separator, state);
+      ExcerptBuilder._appendToken(excerptTokens, ExcerptTokenKind.Content, span.separator);
       state.lastAppendedTokenIsSeparator = true;
     }
 
@@ -195,16 +186,14 @@ export class ExcerptBuilder {
       capturedTokenRange.startIndex = excerptStartIndex;
 
       // We will assign capturedTokenRange.startIndex to be the index after the last token
-      // that was appended so far. However, if the last appended token was a separator and
-      // there is no additional spaces, omit it from the range.
+      // that was appended so far. However, if the last appended token was a separator, omit
+      // it from the range.
       let excerptEndIndex: number = excerptTokens.length;
-      if (state.lastAppendedTokenIsSeparator && excerptEndIndex > excerptStartIndex + 1) {
+      if (state.lastAppendedTokenIsSeparator) {
         excerptEndIndex--;
       }
 
       capturedTokenRange.endIndex = excerptEndIndex;
-
-      state.disableMergingForNextToken = true;
     }
 
     return true;
@@ -214,54 +203,105 @@ export class ExcerptBuilder {
     excerptTokens: IExcerptToken[],
     excerptTokenKind: ExcerptTokenKind,
     text: string,
-    state: IBuildSpanState,
     canonicalReference?: DeclarationReference
   ): void {
     if (text.length === 0) {
       return;
     }
 
-    if (excerptTokenKind !== ExcerptTokenKind.Content) {
-      if (
-        excerptTokenKind === ExcerptTokenKind.Reference &&
-        excerptTokens.length > 1 &&
-        !state.disableMergingForNextToken
-      ) {
-        // If the previous two tokens were a Reference and a '.', then concatenate
-        // all three tokens as a qualified name Reference.
-        const previousTokenM1: IExcerptToken = excerptTokens[excerptTokens.length - 1];
-        const previousTokenM2: IExcerptToken = excerptTokens[excerptTokens.length - 2];
+    const excerptToken: IExcerptToken = { kind: excerptTokenKind, text: text };
+    if (canonicalReference !== undefined) {
+      excerptToken.canonicalReference = canonicalReference.toString();
+    }
+    excerptTokens.push(excerptToken);
+  }
+
+  /**
+   * Condenses the provided excerpt tokens by merging tokens where possible. Updates the provided token ranges to
+   * remain accurate after token merging.
+   *
+   * @remarks
+   * For example, suppose we have excerpt tokens ["A", "B", "C"] and a token range [0, 2]. If the excerpt tokens
+   * are condensed to ["AB", "C"], then the token range would be updated to [0, 1]. Note that merges are only
+   * performed if they are compatible with the provided token ranges. In the example above, if our token range was
+   * originally [0, 1], we would not be able to merge tokens "A" and "B".
+   */
+  private static _condenseTokens(excerptTokens: IExcerptToken[], tokenRanges: IExcerptTokenRange[]): void {
+    // This set is used to quickly lookup a start or end index.
+    const startOrEndIndices: Set<number> = new Set();
+    for (const tokenRange of tokenRanges) {
+      startOrEndIndices.add(tokenRange.startIndex);
+      startOrEndIndices.add(tokenRange.endIndex);
+    }
+
+    for (let currentIndex: number = 1; currentIndex < excerptTokens.length; ++currentIndex) {
+      while (currentIndex < excerptTokens.length) {
+        const prevPrevToken: IExcerptToken = excerptTokens[currentIndex - 2]; // May be undefined
+        const prevToken: IExcerptToken = excerptTokens[currentIndex - 1];
+        const currentToken: IExcerptToken = excerptTokens[currentIndex];
+
+        // The number of excerpt tokens that are merged in this iteration. We need this to determine
+        // how to update the start and end indices of our token ranges.
+        let mergeCount: number;
+
+        // There are two types of merges that can occur. We only perform these merges if they are
+        // compatible with all of our token ranges.
         if (
-          previousTokenM1.kind === ExcerptTokenKind.Content &&
-          previousTokenM1.text.trim() === '.' &&
-          previousTokenM2.kind === ExcerptTokenKind.Reference
+          prevPrevToken &&
+          prevPrevToken.kind === ExcerptTokenKind.Reference &&
+          prevToken.kind === ExcerptTokenKind.Content &&
+          prevToken.text.trim() === '.' &&
+          currentToken.kind === ExcerptTokenKind.Reference &&
+          !startOrEndIndices.has(currentIndex) &&
+          !startOrEndIndices.has(currentIndex - 1)
         ) {
-          previousTokenM2.text += '.' + text;
-          if (canonicalReference !== undefined) {
-            previousTokenM2.canonicalReference = canonicalReference.toString();
+          // If the current token is a reference token, the previous token is a ".", and the previous-
+          // previous token is a reference token, then merge all three tokens into a reference token.
+          //
+          // For example: Given ["MyNamespace" (R), ".", "MyClass" (R)], tokens "." and "MyClass" might
+          // be merged into "MyNamespace". The condensed token would be ["MyNamespace.MyClass" (R)].
+          prevPrevToken.text += prevToken.text + currentToken.text;
+          prevPrevToken.canonicalReference = currentToken.canonicalReference;
+          mergeCount = 2;
+          currentIndex--;
+        } else if (
+          // If the current and previous tokens are both content tokens, then merge the tokens into a
+          // single content token. For example: Given ["export ", "declare class"], these tokens
+          // might be merged into "export declare class".
+          prevToken.kind === ExcerptTokenKind.Content &&
+          prevToken.kind === currentToken.kind &&
+          !startOrEndIndices.has(currentIndex)
+        ) {
+          prevToken.text += currentToken.text;
+          mergeCount = 1;
+        } else {
+          // Otherwise, no merging can occur here. Continue to the next index.
+          break;
+        }
+
+        // Remove the now redundant excerpt token(s), as they were merged into a previous token.
+        excerptTokens.splice(currentIndex, mergeCount);
+
+        // Update the start and end indices for all token ranges based upon how many excerpt
+        // tokens were merged and in what positions.
+        for (const tokenRange of tokenRanges) {
+          if (tokenRange.startIndex > currentIndex) {
+            tokenRange.startIndex -= mergeCount;
+          }
+
+          if (tokenRange.endIndex > currentIndex) {
+            tokenRange.endIndex -= mergeCount;
           }
-          excerptTokens.pop(); // remove previousTokenM1;
-          return;
         }
-      }
-    } else {
-      // If someone referenced this index, then we need to start a new token
-      if (excerptTokens.length > 0 && !state.disableMergingForNextToken) {
-        // Otherwise, can we merge with the previous token?
-        const previousToken: IExcerptToken = excerptTokens[excerptTokens.length - 1];
-        if (previousToken.kind === excerptTokenKind) {
-          previousToken.text += text;
-          return;
+
+        // Clear and repopulate our set with the updated indices.
+        startOrEndIndices.clear();
+        for (const tokenRange of tokenRanges) {
+          startOrEndIndices.add(tokenRange.startIndex);
+          startOrEndIndices.add(tokenRange.endIndex);
         }
       }
     }
-
-    const excerptToken: IExcerptToken = { kind: excerptTokenKind, text: text };
-    if (canonicalReference !== undefined) {
-      excerptToken.canonicalReference = canonicalReference.toString();
-    }
-    excerptTokens.push(excerptToken);
-    state.disableMergingForNextToken = false;
   }
 
   private static _isDeclarationName(name: ts.Identifier): boolean {

diff --git a/build-tests/api-documenter-scenarios/etc/inheritedMembers/api-documenter-scenarios.api.json b/build-tests/api-documenter-scenarios/etc/inheritedMembers/api-documenter-scenarios.api.json
@@ -188,7 +188,11 @@
             },
             {
               "kind": "Content",
-              "text": "<number> "
+              "text": "<number>"
+            },
+            {
+              "kind": "Content",
+              "text": " "
             }
           ],
           "releaseTag": "Public",
@@ -303,7 +307,7 @@
           ],
           "extendsTokenRange": {
             "startIndex": 1,
-            "endIndex": 2
+            "endIndex": 3
           },
           "implementsTokenRanges": []
         },

diff --git a/...nter-scenarios/etc/inheritedMembers/markdown/api-documenter-scenarios.class1.md b/...nter-scenarios/etc/inheritedMembers/markdown/api-documenter-scenarios.class1.md
@@ -10,7 +10,7 @@
 ```typescript
 export declare class Class1 extends Class2<number> 
 ```
-<b>Extends:</b> [Class2](./api-documenter-scenarios.class2.md)
+<b>Extends:</b> [Class2](./api-documenter-scenarios.class2.md)<!-- -->&lt;number&gt;
 
 ## Properties
 

diff --git a/build-tests/api-documenter-test/etc/api-documenter-test.api.json b/build-tests/api-documenter-test/etc/api-documenter-test.api.json
@@ -363,11 +363,7 @@
             },
             {
               "kind": "Content",
-              "text": " "
-            },
-            {
-              "kind": "Content",
-              "text": "implements "
+              "text": " implements "
             },
             {
               "kind": "Reference",
@@ -548,11 +544,7 @@
                 },
                 {
                   "kind": "Content",
-                  "text": " "
-                },
-                {
-                  "kind": "Content",
-                  "text": "= "
+                  "text": " = "
                 },
                 {
                   "kind": "Reference",
@@ -588,15 +580,15 @@
                     "endIndex": 2
                   },
                   "defaultTypeTokenRange": {
-                    "startIndex": 4,
-                    "endIndex": 5
+                    "startIndex": 3,
+                    "endIndex": 4
                   }
                 }
               ],
               "isStatic": false,
               "returnTypeTokenRange": {
-                "startIndex": 8,
-                "endIndex": 9
+                "startIndex": 7,
+                "endIndex": 8
               },
               "releaseTag": "Public",
               "isProtected": false,
@@ -605,8 +597,8 @@
                 {
                   "parameterName": "x",
                   "parameterTypeTokenRange": {
-                    "startIndex": 6,
-                    "endIndex": 7
+                    "startIndex": 5,
+                    "endIndex": 6
                   },
                   "isOptional": false
                 }
@@ -976,11 +968,7 @@
                 },
                 {
                   "kind": "Content",
-                  "text": ";"
-                },
-                {
-                  "kind": "Content",
-                  "text": "\n\nset writeableProperty(value: string);"
+                  "text": ";\n\nset writeableProperty(value: string);"
                 }
               ],
               "isReadonly": false,
@@ -1030,12 +1018,12 @@
           },
           "implementsTokenRanges": [
             {
-              "startIndex": 4,
-              "endIndex": 5
+              "startIndex": 3,
+              "endIndex": 4
             },
             {
-              "startIndex": 6,
-              "endIndex": 7
+              "startIndex": 5,
+              "endIndex": 6
             }
           ]
         },