-
Notifications
You must be signed in to change notification settings - Fork 2
/
get-statements.js
109 lines (97 loc) · 2.88 KB
/
get-statements.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
const moo = require("moo");
const keywords = require("./keywords");
const Statement = require("./statement");
// Incoming values will also be compared as lower case to make keyword matching case insensitive
const caseInsensitiveKeywords = (defs) => {
const defineKeywords = moo.keywords(defs);
return (value) => defineKeywords(value.toLowerCase());
};
const lexer = moo.compile({
whitespace: [
/[ \t]+/u,
{ match: /\r\n/u, lineBreaks: true },
{ match: /\n/u, lineBreaks: true },
],
// First expression is --line comment, second is /* multi line */
comment: [/--.*?$/u, /\/\*[^]*?\*\//u],
lparen: "(",
rparen: ")",
comma: ",",
period: ".",
number: /0|[1-9][0-9]*/u,
// ; is standard, \g is a shortcut used in psql and Actian tooling
// Are there others?
terminator: [";", "\\g"],
// text == original text
// value == value inside quotes
quotedIdentifier: [
{
match: /".*?"/u,
value: (x) => x.slice(1, -1),
},
{
match: /\[.*?\]/u,
value: (x) => x.slice(1, -1),
},
{
match: /`.*?`/u,
value: (x) => x.slice(1, -1),
},
],
// Updated to allow multi-line strings,
// which is allowed by some database drivers (sqlite, actian)
// This does not correctly handle escaped doublequotes, however the end result is ok for sql-limiter
// Instead of a single string token we get 2 separate string tokens back-to-back
string: [
{
match: /'[^']*'/u,
lineBreaks: true,
},
],
// Remaining test is assumed to be an identifier of some kinds (column or table)
// UNLESS it matches a keyword case insensitively
// The value of these tokens are converted to lower case
identifier: [
{
// This is added to handle non-english identifiers.
// This range may be too broad
// eslint-disable-next-line no-control-regex
match: /(?:\w|[^\u0000-\u007F])+/u,
type: caseInsensitiveKeywords({
keyword: keywords,
}),
value: (s) => s.toLowerCase(),
},
],
// Any combination of special characters is to be treated as an operator (as a guess anyways)
// Initially these were being noted here but the list is large
// and there is no way to know all operators since this supports anything that is SQL-ish
operator: {
match: /[<>~!@#$%^?&|`*\-{}+=:/\\[\]]+/u,
lineBreaks: false,
},
});
/**
* Takes SQL text and generates an array of tokens using moo
* @param {string} sqlText
*/
function getStatements(sqlText) {
const statements = [];
let statement = new Statement();
lexer.reset(sqlText);
let next = lexer.next();
while (next) {
statement.appendToken(next);
if (statement.endReached) {
statements.push(statement);
statement = new Statement();
}
next = lexer.next();
}
// push last set
if (statement.tokens.length) {
statements.push(statement);
}
return statements;
}
module.exports = getStatements;