-
Notifications
You must be signed in to change notification settings - Fork 312
/
GlobalsHelper.ts
49 lines (40 loc) · 1.32 KB
/
GlobalsHelper.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import { encodingForModel } from "js-tiktoken";
export enum Tokenizers {
CL100K_BASE = "cl100k_base",
}
/**
* @internal Helper class singleton
*/
class GlobalsHelper {
defaultTokenizer: {
encode: (text: string) => Uint32Array;
decode: (tokens: Uint32Array) => string;
};
constructor() {
const encoding = encodingForModel("text-embedding-ada-002"); // cl100k_base
this.defaultTokenizer = {
encode: (text: string) => {
return new Uint32Array(encoding.encode(text));
},
decode: (tokens: Uint32Array) => {
const numberArray = Array.from(tokens);
const text = encoding.decode(numberArray);
const uint8Array = new TextEncoder().encode(text);
return new TextDecoder().decode(uint8Array);
},
};
}
tokenizer(encoding?: Tokenizers) {
if (encoding && encoding !== Tokenizers.CL100K_BASE) {
throw new Error(`Tokenizer encoding ${encoding} not yet supported`);
}
return this.defaultTokenizer!.encode.bind(this.defaultTokenizer);
}
tokenizerDecoder(encoding?: Tokenizers) {
if (encoding && encoding !== Tokenizers.CL100K_BASE) {
throw new Error(`Tokenizer encoding ${encoding} not yet supported`);
}
return this.defaultTokenizer!.decode.bind(this.defaultTokenizer);
}
}
export const globalsHelper = new GlobalsHelper();