sql-formatter-org · inferrinizzard · Aug 10, 2022 · Aug 4, 2022 · Aug 4, 2022 · Aug 4, 2022
diff --git a/src/languages/bigquery/bigquery.formatter.ts b/src/languages/bigquery/bigquery.formatter.ts
@@ -203,6 +203,8 @@ function combineParameterizedTypes(tokens: Token[]) {
         type: TokenType.IDENTIFIER,
         raw: typeDefTokens.map(formatTypeDefToken('raw')).join(''),
         text: typeDefTokens.map(formatTypeDefToken('text')).join(''),
+        start: token.start,
+        end: token.end + typeDefTokens.map(t => t.text.length).reduce((a, b) => a + b),
       });
       i = endIndex;
     } else {

diff --git a/src/languages/spark/spark.formatter.ts b/src/languages/spark/spark.formatter.ts
@@ -151,7 +151,7 @@ function postProcess(tokens: Token[]) {
     if (token.text === 'ITEMS' && token.type === TokenType.RESERVED_KEYWORD) {
       if (!(prevToken.text === 'COLLECTION' && nextToken.text === 'TERMINATED')) {
         // this is a word and not COLLECTION ITEMS
-        return { type: TokenType.IDENTIFIER, raw: token.raw, text: token.raw };
+        return { ...token, type: TokenType.IDENTIFIER, text: token.raw };
       }
     }
 

diff --git a/src/lexer/TokenizerEngine.ts b/src/lexer/TokenizerEngine.ts
@@ -10,10 +10,9 @@ export interface TokenRule {
 export default class TokenizerEngine {
   private rules: Partial<Record<TokenType, TokenRule>>;
 
-  // The input SQL string to process
-  private input = '';
-  // Current position in string
-  private index = 0;
+  private input = ''; // The input SQL string to process
+
+  private index = 0; // Current position in string
 
   constructor(rules: Partial<Record<TokenType, TokenRule>>) {
     this.rules = rules;
@@ -52,6 +51,7 @@ export default class TokenizerEngine {
 
   private skipWhitespace(): void {
     WHITESPACE_REGEX.lastIndex = this.index;
+
     const matches = WHITESPACE_REGEX.exec(this.input);
     if (matches) {
       // Advance current position by matched whitespace length
@@ -145,13 +145,17 @@ export default class TokenizerEngine {
     if (matches) {
       const matchedToken = matches[0];
 
-      // Advance current position by matched token length
-      this.index += matchedToken.length;
-      return {
+      const outToken = {
         type,
         raw: matchedToken,
         text: transform ? transform(matchedToken) : matchedToken,
+        start: this.index,
+        end: this.index + matchedToken.length,
       };
+
+      // Advance current position by matched token length
+      this.index += matchedToken.length;
+      return outToken;
     }
     return undefined;
   }

diff --git a/src/lexer/token.ts b/src/lexer/token.ts
@@ -35,13 +35,21 @@ export interface Token {
   raw: string; // The raw original text that was matched
   text: string; // Cleaned up text e.g. keyword converted to uppercase and extra spaces removed
   key?: string;
+  start: number; // 0-based index of the token in the whole query string
+  end: number; // 0-based index of where the token ends in the query string
 }
 
 /**
  * For use as a "missing token"
  * e.g. in lookAhead and lookBehind to avoid dealing with null values
  */
-export const EOF_TOKEN = { type: TokenType.EOF, raw: '«EOF»', text: '«EOF»' };
+export const EOF_TOKEN: Token = {
+  type: TokenType.EOF,
+  raw: '«EOF»',
+  text: '«EOF»',
+  start: Infinity,
+  end: Infinity,
+};
 
 /** Checks if two tokens are equivalent */
 export const testToken =

diff --git a/test/unit/Parser.test.ts b/test/unit/Parser.test.ts
@@ -29,7 +29,9 @@ describe('Parser', () => {
           "children": Array [
             Object {
               "token": Object {
+                "end": 3,
                 "raw": "foo",
+                "start": 0,
                 "text": "foo",
                 "type": "IDENTIFIER",
               },
@@ -43,7 +45,9 @@ describe('Parser', () => {
           "children": Array [
             Object {
               "token": Object {
+                "end": 8,
                 "raw": "bar",
+                "start": 5,
                 "text": "bar",
                 "type": "IDENTIFIER",
               },
@@ -66,15 +70,19 @@ describe('Parser', () => {
               "children": Array [
                 Object {
                   "nameToken": Object {
+                    "end": 11,
                     "raw": "SQRT",
+                    "start": 7,
                     "text": "SQRT",
                     "type": "RESERVED_FUNCTION_NAME",
                   },
                   "parenthesis": Object {
                     "children": Array [
                       Object {
                         "token": Object {
+                          "end": 13,
                           "raw": "2",
+                          "start": 12,
                           "text": "2",
                           "type": "NUMBER",
                         },
@@ -89,7 +97,9 @@ describe('Parser', () => {
                 },
               ],
               "nameToken": Object {
+                "end": 6,
                 "raw": "SELECT",
+                "start": 0,
                 "text": "SELECT",
                 "type": "RESERVED_COMMAND",
               },
@@ -112,23 +122,29 @@ describe('Parser', () => {
               "children": Array [
                 Object {
                   "arrayToken": Object {
+                    "end": 15,
                     "raw": "my_array",
+                    "start": 7,
                     "text": "my_array",
                     "type": "IDENTIFIER",
                   },
                   "parenthesis": Object {
                     "children": Array [
                       Object {
                         "nameToken": Object {
+                          "end": 22,
                           "raw": "OFFSET",
+                          "start": 16,
                           "text": "OFFSET",
                           "type": "RESERVED_FUNCTION_NAME",
                         },
                         "parenthesis": Object {
                           "children": Array [
                             Object {
                               "token": Object {
+                                "end": 24,
                                 "raw": "5",
+                                "start": 23,
                                 "text": "5",
                                 "type": "NUMBER",
                               },
@@ -150,7 +166,9 @@ describe('Parser', () => {
                 },
               ],
               "nameToken": Object {
+                "end": 6,
                 "raw": "SELECT",
+                "start": 0,
                 "text": "SELECT",
                 "type": "RESERVED_COMMAND",
               },
@@ -175,15 +193,19 @@ describe('Parser', () => {
                   "children": Array [
                     Object {
                       "token": Object {
+                        "end": 18,
                         "raw": "birth_year",
+                        "start": 8,
                         "text": "birth_year",
                         "type": "IDENTIFIER",
                       },
                       "type": "token",
                     },
                     Object {
                       "token": Object {
+                        "end": 20,
                         "raw": "-",
+                        "start": 19,
                         "text": "-",
                         "type": "OPERATOR",
                       },
@@ -193,23 +215,29 @@ describe('Parser', () => {
                       "children": Array [
                         Object {
                           "token": Object {
+                            "end": 34,
                             "raw": "CURRENT_DATE",
+                            "start": 22,
                             "text": "CURRENT_DATE",
                             "type": "IDENTIFIER",
                           },
                           "type": "token",
                         },
                         Object {
                           "token": Object {
+                            "end": 36,
                             "raw": "+",
+                            "start": 35,
                             "text": "+",
                             "type": "OPERATOR",
                           },
                           "type": "token",
                         },
                         Object {
                           "token": Object {
+                            "end": 38,
                             "raw": "1",
+                            "start": 37,
                             "text": "1",
                             "type": "NUMBER",
                           },
@@ -227,7 +255,9 @@ describe('Parser', () => {
                 },
               ],
               "nameToken": Object {
+                "end": 6,
                 "raw": "SELECT",
+                "start": 0,
                 "text": "SELECT",
                 "type": "RESERVED_COMMAND",
               },
@@ -250,38 +280,50 @@ describe('Parser', () => {
               "children": Array [
                 Object {
                   "token": Object {
+                    "end": 9,
                     "raw": "age",
+                    "start": 6,
                     "text": "age",
                     "type": "IDENTIFIER",
                   },
                   "type": "token",
                 },
                 Object {
                   "andToken": Object {
+                    "end": 24,
                     "raw": "and",
+                    "start": 21,
                     "text": "AND",
                     "type": "RESERVED_LOGICAL_OPERATOR",
                   },
                   "betweenToken": Object {
+                    "end": 17,
                     "raw": "BETWEEN",
+                    "start": 10,
                     "text": "BETWEEN",
                     "type": "RESERVED_KEYWORD",
                   },
                   "expr1": Object {
+                    "end": 20,
                     "raw": "10",
+                    "start": 18,
                     "text": "10",
                     "type": "NUMBER",
                   },
                   "expr2": Object {
+                    "end": 27,
                     "raw": "15",
+                    "start": 25,
                     "text": "15",
                     "type": "NUMBER",
                   },
                   "type": "between_predicate",
                 },
               ],
               "nameToken": Object {
+                "end": 5,
                 "raw": "WHERE",
+                "start": 0,
                 "text": "WHERE",
                 "type": "RESERVED_COMMAND",
               },
@@ -304,15 +346,19 @@ describe('Parser', () => {
               "count": Array [
                 Object {
                   "token": Object {
+                    "end": 8,
                     "raw": "10",
+                    "start": 6,
                     "text": "10",
                     "type": "NUMBER",
                   },
                   "type": "token",
                 },
               ],
               "limitToken": Object {
+                "end": 5,
                 "raw": "LIMIT",
+                "start": 0,
                 "text": "LIMIT",
                 "type": "RESERVED_COMMAND",
               },
@@ -335,22 +381,28 @@ describe('Parser', () => {
               "count": Array [
                 Object {
                   "token": Object {
+                    "end": 13,
                     "raw": "10",
+                    "start": 11,
                     "text": "10",
                     "type": "NUMBER",
                   },
                   "type": "token",
                 },
               ],
               "limitToken": Object {
+                "end": 5,
                 "raw": "LIMIT",
+                "start": 0,
                 "text": "LIMIT",
                 "type": "RESERVED_COMMAND",
               },
               "offset": Array [
                 Object {
                   "token": Object {
+                    "end": 9,
                     "raw": "200",
+                    "start": 6,
                     "text": "200",
                     "type": "NUMBER",
                   },
@@ -379,7 +431,9 @@ describe('Parser', () => {
                 },
               ],
               "nameToken": Object {
+                "end": 6,
                 "raw": "SELECT",
+                "start": 0,
                 "text": "SELECT",
                 "type": "RESERVED_COMMAND",
               },
@@ -405,7 +459,9 @@ describe('Parser', () => {
                 },
               ],
               "nameToken": Object {
+                "end": 15,
                 "raw": "SELECT DISTINCT",
+                "start": 0,
                 "text": "SELECT DISTINCT",
                 "type": "RESERVED_COMMAND",
               },