-
Notifications
You must be signed in to change notification settings - Fork 0
/
createTokenizer.ts
101 lines (90 loc) 路 4.09 KB
/
createTokenizer.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import {compileRuleIterator, createRuleTree, Rule, TokenHandler, TokenizerState} from './rules';
/**
* The pure tokenization function compiled from a set of rules.
*
* @template Type The type of the token emitted by the tokenizer.
* @template Stage The tokenizer stage type.
* @template Context The context passed to the tokenizer.
*/
export interface Tokenizer<Type = unknown, Stage = void, Context = void> {
/**
* Reads tokens from the input in a non-streaming fashion.
*
* @param input The input string to tokenize.
* @param handler The callbacks that are invoked when tokens are read from the string.
* @param context The context that should be passed to readers and stage providers.
* @param state The mutable state used by the tokenizer.
* @returns The result state of the tokenizer.
*/
(input: string, handler: TokenHandler<Type, Context>, context: Context, state?: TokenizerState<Stage>): TokenizerState<Stage>;
/**
* Reads tokens from the chunk in a streaming fashion. During streaming, {@link TokenHandler} is triggered only with
* confirmed tokens. Token is confirmed if the consequent token was successfully read.
*
* ```ts
* let state = tokenizer.write('foo', handler);
* tokenizer.write('bar', handler, state);
* tokenizer.end(handler, state);
* ```
*
* @param chunk The input chunk to tokenize.
* @param handler The callbacks that are invoked when tokens are read from the string.
* @param state The mutable state returned by the previous {@link Tokenizer.write} call.
* @param context The context that should be passed to readers and stage providers.
* @returns The result state of the tokenizer.
*/
write(chunk: string, handler: TokenHandler<Type, Context>, state: TokenizerState<Stage> | void, context: Context): TokenizerState<Stage>;
/**
* Reads remaining tokens from the {@link TokenizerState.chunk}.
*
* @param handler The callbacks that are invoked when tokens are read from the string.
* @param state The mutable state returned by the previous {@link Tokenizer.write} call.
* @param context The context that should be passed to readers and stage providers.
* @returns The result state of the tokenizer.
*/
end(handler: TokenHandler<Type, Context>, state: TokenizerState<Stage>, context: Context): TokenizerState<Stage>;
}
/**
* Creates a new pure tokenizer function.
*
* @param rules The list of rules that tokenizer uses to read tokens from the input chunks.
*
* @template Type The type of tokens emitted by the tokenizer.
* @template Context The context that rules may consume.
*/
export function createTokenizer<Type, Context = void>(rules: Rule<Type, void, Context>[]): Tokenizer<Type, void, Context>;
/**
* Creates a new pure tokenizer function.
*
* @param rules The list of rules that tokenizer uses to read tokens from the input chunks.
* @param initialStage The initial state from which tokenization starts.
*
* @template Type The type of tokens emitted by the tokenizer.
* @template Stage The type of stages at which rules are applied.
* @template Context The context that rules may consume.
*/
export function createTokenizer<Type, Stage, Context = void>(rules: Rule<Type, Stage, Context>[], initialStage: Stage): Tokenizer<Type, Stage, Context>;
export function createTokenizer(rules: Rule[], initialStage?: any) {
const ruleIterator = compileRuleIterator(createRuleTree(rules));
const tokenizer: Tokenizer = (chunk, handler, context) => {
const state: TokenizerState = {stage: initialStage, chunk, chunkOffset: 0, offset: 0};
ruleIterator(state, handler, context, false);
return state;
};
tokenizer.write = (chunk, handler, state, context) => {
if (state) {
state.chunk = state.chunk.slice(state.offset) + chunk;
state.chunkOffset += state.offset;
state.offset = 0;
} else {
state = {stage: initialStage, chunk, chunkOffset: 0, offset: 0};
}
ruleIterator(state, handler, context, true);
return state;
};
tokenizer.end = (handler, state, context) => {
ruleIterator(state, handler, context, false);
return state;
};
return tokenizer;
}