Skip to content

Commit

Permalink
feat: add length and time limit to tokenizer (#588)
Browse files Browse the repository at this point in the history
Co-authored-by: Anthony Fu <anthonyfu117@hotmail.com>
  • Loading branch information
ije and antfu committed Feb 18, 2024
1 parent ecb36e2 commit 2803f89
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 3 deletions.
19 changes: 18 additions & 1 deletion packages/core/src/code-to-tokens-base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ export function tokenizeWithTheme(
...options?.colorReplacements,
}

const {
tokenizeMaxLineLength = 0,
tokenizeTimeLimit = 500,
} = options

const lines = splitLines(code)

let ruleStack = INITIAL
Expand All @@ -59,6 +64,18 @@ export function tokenizeWithTheme(
continue
}

// Do not attempt to tokenize if the line length is longer than the `tokenizationMaxLineLength`
if (tokenizeMaxLineLength > 0 && line.length >= tokenizeMaxLineLength) {
actual = []
final.push([{
content: line,
offset: lineOffset,
color: '',
fontStyle: 0,
}])
continue
}

let resultWithScopes
let tokensWithScopes
let tokensWithScopesIndex
Expand All @@ -69,7 +86,7 @@ export function tokenizeWithTheme(
tokensWithScopesIndex = 0
}

const result = grammar.tokenizeLine2(line, ruleStack)
const result = grammar.tokenizeLine2(line, ruleStack, tokenizeTimeLimit)

const tokensLength = result.tokens.length / 2
for (let j = 0; j < tokensLength; j++) {
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/types/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ export interface CodeToHastOptionsCommon<Languages extends string = string>
extends
TransformerOptions,
DecorationOptions,
Pick<TokenizeWithThemeOptions, 'colorReplacements'> {
Pick<TokenizeWithThemeOptions, 'colorReplacements' | 'tokenizeMaxLineLength' | 'tokenizeTimeLimit'> {

lang: StringLiteralUnion<Languages | SpecialLanguage>

Expand Down
14 changes: 14 additions & 0 deletions packages/core/src/types/tokens.ts
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,20 @@ export interface TokenizeWithThemeOptions {
* This will be merged with theme's `colorReplacements` if any.
*/
colorReplacements?: Record<string, string>

/**
* Lines above this length will not be tokenized for performance reasons.
*
* @default 0 (no limit)
*/
tokenizeMaxLineLength?: number

/**
* Time limit in milliseconds for tokenizing a single line.
*
* @default 500 (0.5s)
*/
tokenizeTimeLimit?: number
}

/**
Expand Down
17 changes: 16 additions & 1 deletion packages/monaco/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,25 @@ export function shikiToMonaco(
return new TokenizerState(INITIAL, highlighter)
},
tokenize(line, state: TokenizerState) {
// Do not attempt to tokenize if a line is too long
// default to 20000 (as in monaco-editor-core defaults)
const tokenizeMaxLineLength = 20000
const tokenizeTimeLimit = 500

if (line.length >= tokenizeMaxLineLength) {
return {
endState: state,
tokens: [{ startIndex: 0, scopes: '' }],
}
}

const grammar = state.highlighter.getLanguage(lang)
const { colorMap } = state.highlighter.setTheme(currentTheme)
const theme = themeMap.get(currentTheme)
const result = grammar.tokenizeLine2(line, state.ruleStack)
const result = grammar.tokenizeLine2(line, state.ruleStack, tokenizeTimeLimit)

if (result.stoppedEarly)
console.warn(`Time limit reached when tokenizing line: ${line.substring(0, 100)}`)

const colorToScopeMap = new Map<string, string>()

Expand Down
15 changes: 15 additions & 0 deletions packages/shiki/test/general.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,21 @@ describe('should', () => {
}
}
})

it('skip line tokenizing', async () => {
const longText = 'foo'.repeat(50)

expect(await codeToHtml(`const long = ${longText}`, {
theme: 'vitesse-light',
lang: 'javascript',
})).toMatchInlineSnapshot(`"<pre class="shiki vitesse-light" style="background-color:#ffffff;color:#393a34" tabindex="0"><code><span class="line"><span style="color:#AB5959">const</span><span style="color:#B07D48"> long</span><span style="color:#999999"> =</span><span style="color:#B07D48"> ${longText}</span></span></code></pre>"`)

expect(await codeToHtml(`const short = ""\nconst long = ${longText}`, {
theme: 'vitesse-light',
lang: 'javascript',
tokenizeMaxLineLength: 100,
})).toMatchInlineSnapshot(`"<pre class="shiki vitesse-light" style="background-color:#ffffff;color:#393a34" tabindex="0"><code><span class="line"><span style="color:#AB5959">const</span><span style="color:#B07D48"> short</span><span style="color:#999999"> =</span><span style="color:#B5695999"> ""</span></span>\n<span class="line"><span>const long = ${longText}</span></span></code></pre>"`)
})
})

describe('errors', () => {
Expand Down

0 comments on commit 2803f89

Please sign in to comment.