Skip to content

Commit

Permalink
Add TokenType.RESERVED_DATA_TYPE and dataTypeCase
Browse files Browse the repository at this point in the history
  • Loading branch information
karlhorky committed Nov 30, 2023
1 parent 10b4451 commit a555575
Show file tree
Hide file tree
Showing 10 changed files with 80 additions and 14 deletions.
3 changes: 3 additions & 0 deletions src/FormatOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@ export type KeywordCase = 'preserve' | 'upper' | 'lower';

export type IdentifierCase = 'preserve' | 'upper' | 'lower';

export type DataTypeCase = 'preserve' | 'upper' | 'lower';

export type LogicalOperatorNewline = 'before' | 'after';

export interface FormatOptions {
tabWidth: number;
useTabs: boolean;
keywordCase: KeywordCase;
identifierCase: IdentifierCase;
dataTypeCase: DataTypeCase;
indentStyle: IndentStyle;
logicalOperatorNewline: LogicalOperatorNewline;
expressionWidth: number;
Expand Down
22 changes: 21 additions & 1 deletion src/formatter/ExpressionFormatter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import {
CaseExpressionNode,
CaseWhenNode,
CaseElseNode,
DataTypeNode,
} from '../parser/ast.js';

import Layout, { WS } from './Layout.js';
Expand Down Expand Up @@ -130,6 +131,8 @@ export default class ExpressionFormatter {
return this.formatLineComment(node);
case NodeType.block_comment:
return this.formatBlockComment(node);
case NodeType.data_type:
return this.formatDataType(node);
case NodeType.keyword:
return this.formatKeywordNode(node);
}
Expand All @@ -145,7 +148,9 @@ export default class ExpressionFormatter {
private formatArraySubscript(node: ArraySubscriptNode) {
this.withComments(node.array, () => {
this.layout.add(
node.array.type === NodeType.keyword
node.array.type === NodeType.data_type
? this.showDataType(node.array)
: node.array.type === NodeType.keyword
? this.showKw(node.array)
: this.showIdentifier(node.array)
);
Expand Down Expand Up @@ -489,6 +494,10 @@ export default class ExpressionFormatter {
}
}

private formatDataType(node: DataTypeNode) {
this.layout.add(this.showDataType(node), WS.SPACE);
}

private showKw(node: KeywordNode): string {
if (isTabularToken(node.tokenType)) {
return toTabularFormat(this.showNonTabularKw(node), this.cfg.indentStyle);
Expand Down Expand Up @@ -523,4 +532,15 @@ export default class ExpressionFormatter {
}
}
}

private showDataType(node: DataTypeNode): string {
switch (this.cfg.dataTypeCase) {
case 'preserve':
return equalizeWhitespace(node.raw);
case 'upper':
return node.text;
case 'lower':
return node.text.toLowerCase();
}
}
}
15 changes: 10 additions & 5 deletions src/lexer/Tokenizer.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import { Token, TokenType } from './token.js';
import * as regex from './regexFactory.js';
import { ParamTypes, TokenizerOptions } from './TokenizerOptions.js';
import TokenizerEngine, { TokenRule } from './TokenizerEngine.js';
import { escapeRegExp, patternToRegex } from './regexUtil.js';
import { equalizeWhitespace, Optional } from '../utils.js';
import { NestedComment } from './NestedComment.js';
import * as regex from './regexFactory.js';
import { escapeRegExp, patternToRegex } from './regexUtil.js';
import { Token, TokenType } from './token.js';
import TokenizerEngine, { TokenRule } from './TokenizerEngine.js';
import { ParamTypes, TokenizerOptions } from './TokenizerOptions.js';

type OptionalTokenRule = Optional<TokenRule, 'regex'>;

Expand Down Expand Up @@ -130,6 +130,11 @@ export default class Tokenizer {
regex: regex.reservedWord(cfg.reservedFunctionNames, cfg.identChars),
text: toCanonical,
},
{
type: TokenType.RESERVED_DATA_TYPE,
regex: regex.reservedWord(cfg.reservedDataTypes ?? [], cfg.identChars),
text: toCanonical,
},
{
type: TokenType.RESERVED_KEYWORD,
regex: regex.reservedWord(cfg.reservedKeywords, cfg.identChars),
Expand Down
2 changes: 2 additions & 0 deletions src/lexer/TokenizerOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ export interface TokenizerOptions {
reservedPhrases?: string[];
// built in function names
reservedFunctionNames: string[];
// data types
reservedDataTypes?: string[];
// all other reserved words (not included to any of the above lists)
reservedKeywords: string[];
// Types of quotes to use for strings
Expand Down
5 changes: 5 additions & 0 deletions src/lexer/disambiguateTokens.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ const keywordToArrayKeyword = (token: Token, i: number, tokens: Token[]): Token
if (nextToken && isOpenBracket(nextToken)) {
return { ...token, type: TokenType.ARRAY_KEYWORD };
}
} else if (token.type === TokenType.RESERVED_DATA_TYPE) {
const nextToken = nextNonCommentToken(tokens, i);
if (nextToken && isOpenBracket(nextToken)) {
return { ...token, type: TokenType.ARRAY_DATA_TYPE };
}
}
return token;
};
Expand Down
8 changes: 6 additions & 2 deletions src/lexer/token.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export enum TokenType {
IDENTIFIER = 'IDENTIFIER',
STRING = 'STRING',
VARIABLE = 'VARIABLE',
RESERVED_DATA_TYPE = 'RESERVED_DATA_TYPE',
RESERVED_KEYWORD = 'RESERVED_KEYWORD',
RESERVED_FUNCTION_NAME = 'RESERVED_FUNCTION_NAME',
RESERVED_PHRASE = 'RESERVED_PHRASE',
Expand All @@ -12,6 +13,7 @@ export enum TokenType {
RESERVED_SELECT = 'RESERVED_SELECT',
RESERVED_JOIN = 'RESERVED_JOIN',
ARRAY_IDENTIFIER = 'ARRAY_IDENTIFIER', // IDENTIFIER token in front of [
ARRAY_DATA_TYPE = 'ARRAY_DATA_TYPE', // RESERVED_DATA_TYPE token in front of [
ARRAY_KEYWORD = 'ARRAY_KEYWORD', // RESERVED_KEYWORD token in front of [
CASE = 'CASE',
END = 'END',
Expand Down Expand Up @@ -73,23 +75,25 @@ export const testToken =

/** Util object that allows for easy checking of Reserved Keywords */
export const isToken = {
ARRAY: testToken({ text: 'ARRAY', type: TokenType.RESERVED_KEYWORD }),
ARRAY: testToken({ text: 'ARRAY', type: TokenType.RESERVED_DATA_TYPE }),
BY: testToken({ text: 'BY', type: TokenType.RESERVED_KEYWORD }),
SET: testToken({ text: 'SET', type: TokenType.RESERVED_CLAUSE }),
STRUCT: testToken({ text: 'STRUCT', type: TokenType.RESERVED_KEYWORD }),
STRUCT: testToken({ text: 'STRUCT', type: TokenType.RESERVED_DATA_TYPE }),
WINDOW: testToken({ text: 'WINDOW', type: TokenType.RESERVED_CLAUSE }),
VALUES: testToken({ text: 'VALUES', type: TokenType.RESERVED_CLAUSE }),
};

/** Checks if token is any Reserved Keyword or Clause */
export const isReserved = (type: TokenType): boolean =>
type === TokenType.RESERVED_DATA_TYPE ||
type === TokenType.RESERVED_KEYWORD ||
type === TokenType.RESERVED_FUNCTION_NAME ||
type === TokenType.RESERVED_PHRASE ||
type === TokenType.RESERVED_CLAUSE ||
type === TokenType.RESERVED_SELECT ||
type === TokenType.RESERVED_SET_OPERATION ||
type === TokenType.RESERVED_JOIN ||
type === TokenType.ARRAY_DATA_TYPE ||
type === TokenType.ARRAY_KEYWORD ||
type === TokenType.CASE ||
type === TokenType.END ||
Expand Down
10 changes: 9 additions & 1 deletion src/parser/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export enum NodeType {
literal = 'literal',
identifier = 'identifier',
keyword = 'keyword',
data_type = 'data_type',
parameter = 'parameter',
operator = 'operator',
comma = 'comma',
Expand Down Expand Up @@ -56,7 +57,7 @@ export interface FunctionCallNode extends BaseNode {
// <ident>[<expr>]
export interface ArraySubscriptNode extends BaseNode {
type: NodeType.array_subscript;
array: IdentifierNode | KeywordNode;
array: IdentifierNode | KeywordNode | DataTypeNode;
parenthesis: ParenthesisNode;
}

Expand Down Expand Up @@ -129,6 +130,12 @@ export interface IdentifierNode extends BaseNode {
text: string;
}

export interface DataTypeNode extends BaseNode {
type: NodeType.data_type;
text: string;
raw: string;
}

export interface KeywordNode extends BaseNode {
type: NodeType.keyword;
tokenType: TokenType;
Expand Down Expand Up @@ -180,6 +187,7 @@ export type AstNode =
| AllColumnsAsteriskNode
| LiteralNode
| IdentifierNode
| DataTypeNode
| KeywordNode
| ParameterNode
| OperatorNode
Expand Down
21 changes: 20 additions & 1 deletion src/parser/grammar.ne
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
@preprocessor typescript
@{%
import LexerAdapter from './LexerAdapter.js';
import { NodeType, AstNode, CommentNode, KeywordNode, IdentifierNode } from './ast.js';
import { NodeType, AstNode, CommentNode, KeywordNode, IdentifierNode, DataTypeNode } from './ast.js';
import { Token, TokenType } from '../lexer/token.js';

// The lexer here is only to provide the has() method,
Expand All @@ -23,6 +23,12 @@ const toKeywordNode = (token: Token): KeywordNode => ({
raw: token.raw,
});

const toDataTypeNode = (token: Token): DataTypeNode => ({
type: NodeType.data_type,
text: token.text,
raw: token.raw,
});

interface CommentAttachments {
leading?: CommentNode[];
trailing?: CommentNode[];
Expand Down Expand Up @@ -197,6 +203,7 @@ atomic_expression ->
| identifier
| parameter
| literal
| data_type
| keyword ) {% unwrap %}

array_subscript -> %ARRAY_IDENTIFIER _ square_brackets {%
Expand All @@ -206,6 +213,13 @@ array_subscript -> %ARRAY_IDENTIFIER _ square_brackets {%
parenthesis: brackets,
})
%}
array_subscript -> %ARRAY_DATA_TYPE _ square_brackets {%
([arrayToken, _, brackets]) => ({
type: NodeType.array_subscript,
array: addComments(toDataTypeNode(arrayToken), { trailing: _ }),
parenthesis: brackets,
})
%}
array_subscript -> %ARRAY_KEYWORD _ square_brackets {%
([arrayToken, _, brackets]) => ({
type: NodeType.array_subscript,
Expand Down Expand Up @@ -329,6 +343,11 @@ keyword ->
([[token]]) => toKeywordNode(token)
%}

data_type ->
( %RESERVED_DATA_TYPE ) {%
([[token]]) => toDataTypeNode(token)
%}

logic_operator ->
( %AND
| %OR
Expand Down
4 changes: 2 additions & 2 deletions src/sqlFormatter.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import * as allDialects from './allDialects.js';

import { FormatOptions } from './FormatOptions.js';
import { createDialect, DialectOptions } from './dialect.js';
import { FormatOptions } from './FormatOptions.js';
import Formatter from './formatter/Formatter.js';
import { ConfigError, validateConfig } from './validateConfig.js';

Expand Down Expand Up @@ -42,6 +41,7 @@ const defaultOptions: FormatOptions = {
useTabs: false,
keywordCase: 'preserve',
identifierCase: 'preserve',
dataTypeCase: 'preserve',
indentStyle: 'standard',
logicalOperatorNewline: 'before',
expressionWidth: 50,
Expand Down
4 changes: 2 additions & 2 deletions test/snowflake.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,12 @@ describe('SnowflakeFormatter', () => {
`);
});

it('detects data types as keywords', () => {
it('detects data types as data types', () => {
expect(
format(
`CREATE TABLE tbl (first_column double Precision, second_column numBer (38, 0), third String);`,
{
keywordCase: 'upper',
dataTypeCase: 'upper',
}
)
).toBe(dedent`
Expand Down

0 comments on commit a555575

Please sign in to comment.