Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check maxTokenizationLineLength in monarchLexer #145979

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/vs/editor/standalone/browser/standaloneLanguages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { IStandaloneThemeService } from 'vs/editor/standalone/common/standaloneT
import { IMarkerData, IMarkerService } from 'vs/platform/markers/common/markers';
import { ILanguageFeaturesService } from 'vs/editor/common/services/languageFeatures';
import { LanguageSelector } from 'vs/editor/common/languageSelector';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';

/**
* Register information about a new language.
Expand Down Expand Up @@ -374,7 +375,7 @@ export function registerTokensProviderFactory(languageId: string, factory: Token
if (isATokensProvider(result)) {
return createTokenizationSupportAdapter(languageId, result);
}
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, result));
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, result), StandaloneServices.get(IConfigurationService));
}
};
return languages.TokenizationRegistry.registerFactory(languageId, adaptedFactory);
Expand Down Expand Up @@ -405,7 +406,7 @@ export function setTokensProvider(languageId: string, provider: TokensProvider |
*/
export function setMonarchTokensProvider(languageId: string, languageDef: IMonarchLanguage | Thenable<IMonarchLanguage>): IDisposable {
const create = (languageDef: IMonarchLanguage) => {
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, languageDef));
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, languageDef), StandaloneServices.get(IConfigurationService));
};
if (isThenable<IMonarchLanguage>(languageDef)) {
return registerTokensProviderFactory(languageId, { create: () => languageDef });
Expand Down
22 changes: 20 additions & 2 deletions src/vs/editor/standalone/common/monarch/monarchLexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@

import { IDisposable } from 'vs/base/common/lifecycle';
import * as languages from 'vs/editor/common/languages';
import { NullState } from 'vs/editor/common/languages/nullTokenize';
import { NullState, nullTokenizeEncoded, nullTokenize } from 'vs/editor/common/languages/nullTokenize';
import { TokenTheme } from 'vs/editor/common/languages/supports/tokenization';
import { ILanguageService } from 'vs/editor/common/languages/language';
import * as monarchCommon from 'vs/editor/standalone/common/monarch/monarchCommon';
import { IStandaloneThemeService } from 'vs/editor/standalone/common/standaloneTheme';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { LanguageId } from 'vs/editor/common/encodedTokenAttributes';

const CACHE_STACK_DEPTH = 5;
Expand Down Expand Up @@ -395,8 +396,9 @@ export class MonarchTokenizer implements languages.ITokenizationSupport {
private readonly _embeddedLanguages: { [languageId: string]: boolean };
public embeddedLoaded: Promise<void>;
private readonly _tokenizationRegistryListener: IDisposable;
private _maxTokenizationLineLength: number;

constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer) {
constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer, @IConfigurationService private readonly _configurationService: IConfigurationService) {
this._languageService = languageService;
this._standaloneThemeService = standaloneThemeService;
this._languageId = languageId;
Expand Down Expand Up @@ -424,6 +426,16 @@ export class MonarchTokenizer implements languages.ITokenizationSupport {
emitting = false;
}
});
this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {
overrideIdentifier: this._languageId
});
this._configurationService.onDidChangeConfiguration(e => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this listener be disposed at some point?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point! I'll do a follow-up PR.

if (e.affectsConfiguration('editor.maxTokenizationLineLength')) {
this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {
overrideIdentifier: this._languageId
});
}
});
}

public dispose(): void {
Expand Down Expand Up @@ -468,12 +480,18 @@ export class MonarchTokenizer implements languages.ITokenizationSupport {
}

public tokenize(line: string, hasEOL: boolean, lineState: languages.IState): languages.TokenizationResult {
if (line.length >= this._maxTokenizationLineLength) {
return nullTokenize(this._languageId, lineState);
}
const tokensCollector = new MonarchClassicTokensCollector();
const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);
return tokensCollector.finalize(endLineState);
}

public tokenizeEncoded(line: string, hasEOL: boolean, lineState: languages.IState): languages.EncodedTokenizationResult {
if (line.length >= this._maxTokenizationLineLength) {
return nullTokenizeEncoded(this._languageService.languageIdCodec.encodeLanguageId(this._languageId), lineState);
}
const tokensCollector = new MonarchModernTokensCollector(this._languageService, this._standaloneThemeService.getColorTheme().tokenTheme);
const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);
return tokensCollector.finalize(endLineState);
Expand Down
59 changes: 50 additions & 9 deletions src/vs/editor/standalone/test/browser/monarch.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ import { compile } from 'vs/editor/standalone/common/monarch/monarchCompile';
import { Token, TokenizationRegistry } from 'vs/editor/common/languages';
import { IMonarchLanguage } from 'vs/editor/standalone/common/monarch/monarchTypes';
import { DisposableStore } from 'vs/base/common/lifecycle';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { StandaloneConfigurationService } from 'vs/editor/standalone/browser/standaloneServices';

suite('Monarch', () => {

function createMonarchTokenizer(languageService: ILanguageService, languageId: string, language: IMonarchLanguage): MonarchTokenizer {
return new MonarchTokenizer(languageService, null!, languageId, compile(languageId, language));
function createMonarchTokenizer(languageService: ILanguageService, languageId: string, language: IMonarchLanguage, configurationService: IConfigurationService): MonarchTokenizer {
return new MonarchTokenizer(languageService, null!, languageId, compile(languageId, language), configurationService);
}

function getTokens(tokenizer: MonarchTokenizer, lines: string[]): Token[][] {
Expand All @@ -32,14 +34,15 @@ suite('Monarch', () => {
test('Ensure @rematch and nextEmbedded can be used together in Monarch grammar', () => {
const disposables = new DisposableStore();
const languageService = disposables.add(new LanguageService());
const configurationService = new StandaloneConfigurationService();
disposables.add(languageService.registerLanguage({ id: 'sql' }));
disposables.add(TokenizationRegistry.register('sql', createMonarchTokenizer(languageService, 'sql', {
tokenizer: {
root: [
[/./, 'token']
]
}
})));
}, configurationService)));
const SQL_QUERY_START = '(SELECT|INSERT|UPDATE|DELETE|CREATE|REPLACE|ALTER|WITH)';
const tokenizer = createMonarchTokenizer(languageService, 'test1', {
tokenizer: {
Expand All @@ -63,7 +66,7 @@ suite('Monarch', () => {
],
endStringWithSQL: [[/"""/, { token: 'string.quote', next: '@popall', nextEmbedded: '@pop', },]],
}
});
}, configurationService);

const lines = [
`mysql_query("""SELECT * FROM table_name WHERE ds = '<DATEID>'""")`,
Expand Down Expand Up @@ -106,6 +109,7 @@ suite('Monarch', () => {
});

test('microsoft/monaco-editor#1235: Empty Line Handling', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();
const tokenizer = createMonarchTokenizer(languageService, 'test', {
tokenizer: {
Expand All @@ -125,7 +129,7 @@ suite('Monarch', () => {
// No possible rule to detect an empty line and @pop?
],
},
});
}, configurationService);

const lines = [
`// This comment \\`,
Expand Down Expand Up @@ -163,6 +167,7 @@ suite('Monarch', () => {
});

test('microsoft/monaco-editor#2265: Exit a state at end of line', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();
const tokenizer = createMonarchTokenizer(languageService, 'test', {
includeLF: true,
Expand All @@ -179,7 +184,7 @@ suite('Monarch', () => {
[/[^\d]+/, '']
]
}
});
}, configurationService);

const lines = [
`PRINT 10 * 20`,
Expand Down Expand Up @@ -211,6 +216,7 @@ suite('Monarch', () => {
});

test('issue #115662: monarchCompile function need an extra option which can control replacement', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();

const tokenizer1 = createMonarchTokenizer(languageService, 'test', {
Expand All @@ -230,7 +236,7 @@ suite('Monarch', () => {
},
],
},
});
}, configurationService);

const tokenizer2 = createMonarchTokenizer(languageService, 'test', {
ignoreCase: false,
Expand All @@ -242,7 +248,7 @@ suite('Monarch', () => {
},
],
},
});
}, configurationService);

const lines = [
`@ham`
Expand All @@ -265,6 +271,7 @@ suite('Monarch', () => {
});

test('microsoft/monaco-editor#2424: Allow to target @@', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();

const tokenizer = createMonarchTokenizer(languageService, 'test', {
Expand All @@ -277,7 +284,7 @@ suite('Monarch', () => {
},
],
},
});
}, configurationService);

const lines = [
`@@`
Expand All @@ -292,4 +299,38 @@ suite('Monarch', () => {
languageService.dispose();
});

test('microsoft/monaco-editor#3025: Check maxTokenizationLineLength before tokenizing', async () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();

// Set maxTokenizationLineLength to 4 so that "ham" works but "hamham" would fail
await configurationService.updateValue('editor.maxTokenizationLineLength', 4);

const tokenizer = createMonarchTokenizer(languageService, 'test', {
tokenizer: {
root: [
{
regex: /ham/,
action: { token: 'ham' }
},
],
},
}, configurationService);

const lines = [
'ham', // length 3, should be tokenized
'hamham' // length 6, should NOT be tokenized
];

const actualTokens = getTokens(tokenizer, lines);
assert.deepStrictEqual(actualTokens, [
[
new Token(0, 'ham.test', 'test'),
], [
new Token(0, '', 'test')
]
]);
languageService.dispose();
});

});