Skip to content

Commit

Permalink
feat: SQL comments support and new tokenizer
Browse files Browse the repository at this point in the history
Add SQL comments support, including MySQL # comments.

Faster and simpler tokenization.

Refs: #133
  • Loading branch information
Qtax committed Sep 12, 2023
1 parent 569b2b1 commit bd43335
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 85 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ The following options may be passed to the `highlight` function.
string: '\x1b[32m', // Strings
special: '\x1b[33m', // Special characters
bracket: '\x1b[33m', // Brackets (parentheses)
comment: '\x1b[2m\x1b[90m', // Comments
clear: '\x1b[0m' // Clear (inserted after each match)
}
```
Expand Down
136 changes: 51 additions & 85 deletions lib/index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/* eslint-disable no-regex-spaces */
'use strict'

const keywords = require('./keywords')
Expand All @@ -14,110 +15,74 @@ const DEFAULT_OPTIONS = {
string: '\x1b[32m',
special: '\x1b[33m',
bracket: '\x1b[33m',
comment: '\x1b[2m\x1b[90m',
clear: '\x1b[0m'
}
}

const SPLIT_CHARS = '[^a-zA-Z_]'

const DEFAULT_KEYWORD = 'default'

const highlighters = [
{
name: 'keyword',
group: 1,
regex: new RegExp(`(^|${SPLIT_CHARS})(${keywords.join('|')})(?=${SPLIT_CHARS}|$)`, 'gi')
},
{
name: 'special',
regex: /(=|!=|%|\/|\*|-|,|;|:|\+|<|>)/g
},
{
name: 'function',
regex: /(\w+?)\(/g,
trimEnd: 1
},
{
name: 'number',
regex: /(\b\d+(?:\.\d+)?)/g
},
{
name: 'string',
regex: /(['](?:\\'|.)*?[']|["](?:\\"|.)*?["]|[`](?:\\`|.)*?[`])/g
},
{
name: 'bracket',
regex: /([()])/g
}
]
/\b(?<number> \d+ (?:\.\d+)? )\b/,

function getSegments (sqlString) {
const matches = []

for (const hl of highlighters) {
let match

// This is probably the one time when an assignment inside a condition makes sense
// eslint-disable-next-line no-cond-assign
while (match = hl.regex.exec(sqlString)) {
let text = match[0]
let boringLength = 0

// If a specific group is requested, use that group instead, and make sure
// we offset the index by the length of the preceding groups
if (hl.group) {
text = match[hl.group + 1]
for (let i = 1; i <= hl.group; i++) {
boringLength += match[i].length
}
}
// Note: Repeating string escapes like 'sql''server' will also work as they are just repeating strings
/(?<string> '(?: [^'\\] | \\. )*' | "(?: [^"\\] | \\. )*" | `(?: [^`\\] | \\. )*` )/,

const trimmedText = hl.trimEnd
? text.substring(0, text.length - hl.trimEnd)
: text
matches.push({
name: hl.name,
start: match.index + boringLength,
length: trimmedText.length
})
}
}
/(?<comment> --[^\n\r]* | #[^\n\r]* | \/\* (?: [^*] | \* (?!\/) )* \*\/ )/,

// Future improvement: Comments should be allowed between the function name and the opening parenthesis
/\b(?<function> \w+ ) (?= \s*\( )/,

/(?<bracket> [()] )/,

/(?<special> != | [=%*/\-+,;:<>] )/
]

const sortedMatches = matches.slice().sort((a, b) => a.start - b.start)
function getRegexString (regex) {
const str = regex.toString()
return str.replace(/^\/|\/\w*$|[\t ]+/g, '')
}

// Regex of the shape /(.*?)|((?<token1>...)|(?<token2>...)|...|$)/y
const tokenizer = new RegExp(
'(.*?)(' +
'\\b(?<keyword>' + keywords.join('|') + ')\\b|' +
highlighters.map(getRegexString).join('|') +
'|$)', // $ needed to to match "default" till the end of string
'isy'
)

// filter/exclude nested matches (matches within the last match)
function getSegments (sqlString) {
const segments = []
let upperBound = 0
for (let i = 0; i < sortedMatches.length; i++) {
if (sortedMatches[i].start < upperBound) { continue }
let match

// If no match, add a default segment
if (sortedMatches[i].start > upperBound) {
// Reset the starting position
tokenizer.lastIndex = 0

// This is probably the one time when an assignment inside a condition makes sense
// eslint-disable-next-line no-cond-assign
while (match = tokenizer.exec(sqlString)) {
if (match[1]) {
segments.push({
name: DEFAULT_KEYWORD,
content: sqlString.substring(upperBound, sortedMatches[i].start)
content: match[1]
})
}

segments.push({
name: sortedMatches[i].name,
content: sqlString.substring(
sortedMatches[i].start,
sortedMatches[i].start + sortedMatches[i].length
)
})
upperBound = sortedMatches[i].start + sortedMatches[i].length
}
if (match[2]) {
const name = Object.keys(match.groups).find(key => match.groups[key])
segments.push({
name,
content: match.groups[name]
})
}

if (upperBound < sqlString.length - 1) {
segments.push({
name: DEFAULT_KEYWORD,
content: sqlString.substring(
upperBound,
upperBound + sqlString.length + 1
)
})
// Stop at the end of string
if (match.index + match[0].length >= sqlString.length) {
break
}
}

return segments
}

Expand All @@ -140,5 +105,6 @@ function highlight (sqlString, options) {

module.exports = {
getSegments,
highlight
highlight,
DEFAULT_OPTIONS
}
2 changes: 2 additions & 0 deletions test/debug.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ console.log(highlight('DROP PROCEDURE IF EXISTS `some-database`.`some-table`;'))

console.log(highlight('SELECT * FROM a;SELECT * FROM b;'))

console.log(highlight('SELECT foo /* comment, not "keyword" WHERE GROUP */ FROM bar; -- comment\nSELECT * FROM baz;'))

console.log(highlight("select * from a where b = 'array<map<string,string>>';", { html: true }))
41 changes: 41 additions & 0 deletions test/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const OPTIONS = {
string: '[string]',
special: '[special]',
bracket: '[bracket]',
comment: '[comment]',
clear: '[clear]'
}
}
Expand Down Expand Up @@ -114,6 +115,26 @@ describe('unicode', () => {
expect(hlUni('SELECT * FROM a;SELECT * FROM b;'))
.toBe('[keyword]SELECT[clear] [special]*[clear] [keyword]FROM[clear] a[special];[clear][keyword]SELECT[clear] [special]*[clear] [keyword]FROM[clear] b[special];[clear]')
})

it('comment single line', () => {
expect(hlUni('-- comment 1 "comment" /* still */ comment 2\nSELECT `not comment`; -- comment 3'))
.toBe('[comment]-- comment 1 "comment" /* still */ comment 2[clear]\n[keyword]SELECT[clear] [string]`not comment`[clear][special];[clear] [comment]-- comment 3[clear]')
})

it('comment mysql', () => {
expect(hlUni('# comment 1 "comment" /* still */ comment 2\nSELECT `not comment`; # comment 3'))
.toBe('[comment]# comment 1 "comment" /* still */ comment 2[clear]\n[keyword]SELECT[clear] [string]`not comment`[clear][special];[clear] [comment]# comment 3[clear]')
})

it('comment multiline', () => {
expect(hlUni('SELECT /* this is, a "comment" */ "not /*comment*/" /***also*comment***/'))
.toBe('[keyword]SELECT[clear] [comment]/* this is, a "comment" */[clear] [string]"not /*comment*/"[clear] [comment]/***also*comment***/[clear]')
})

it('not a comment', () => {
expect(hlUni('"id -- not comment /* still */ not"'))
.toBe('[string]"id -- not comment /* still */ not"[clear]')
})
})

describe('html', () => {
Expand Down Expand Up @@ -211,6 +232,26 @@ describe('html', () => {
expect(hlHtml("select * from a where b = 'array<map<string,string>>';"))
.toBe('<span class="sql-hl-keyword">select</span> <span class="sql-hl-special">*</span> <span class="sql-hl-keyword">from</span> a <span class="sql-hl-keyword">where</span> b <span class="sql-hl-special">=</span> <span class="sql-hl-string">&#39;array&lt;map&lt;string,string&gt;&gt;&#39;</span><span class="sql-hl-special">;</span>')
})

it('comment single line', () => {
expect(hlHtml('-- comment 1 "comment" /* still */ comment 2\nSELECT `not comment`; -- comment 3'))
.toBe('<span class="sql-hl-comment">-- comment 1 &quot;comment&quot; /* still */ comment 2</span>\n<span class="sql-hl-keyword">SELECT</span> <span class="sql-hl-string">`not comment`</span><span class="sql-hl-special">;</span> <span class="sql-hl-comment">-- comment 3</span>')
})

it('comment mysql', () => {
expect(hlHtml('# comment 1 "comment" /* still */ comment 2\nSELECT `not comment`; # comment 3'))
.toBe('<span class="sql-hl-comment"># comment 1 &quot;comment&quot; /* still */ comment 2</span>\n<span class="sql-hl-keyword">SELECT</span> <span class="sql-hl-string">`not comment`</span><span class="sql-hl-special">;</span> <span class="sql-hl-comment"># comment 3</span>')
})

it('comment multiline', () => {
expect(hlHtml('SELECT /* this is, a "comment" */ "not /*comment*/" /***also*comment***/'))
.toBe('<span class="sql-hl-keyword">SELECT</span> <span class="sql-hl-comment">/* this is, a &quot;comment&quot; */</span> <span class="sql-hl-string">&quot;not /*comment*/&quot;</span> <span class="sql-hl-comment">/***also*comment***/</span>')
})

it('not a comment', () => {
expect(hlHtml('"id -- not comment /* still */ not"'))
.toBe('<span class="sql-hl-string">&quot;id -- not comment /* still */ not&quot;</span>')
})
})

describe('getSegments', () => {
Expand Down

0 comments on commit bd43335

Please sign in to comment.