-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
regex.ts
122 lines (105 loc) Β· 3.35 KB
/
regex.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import { SerializedFields } from "../load/map_keys.js";
import {
BaseOutputParser,
OutputParserException,
} from "../schema/output_parser.js";
export interface RegExpFields {
pattern: string;
flags?: string;
}
/**
* Interface for the fields required to create a RegexParser instance.
*/
export interface RegexParserFields {
regex: string | RegExp | RegExpFields;
outputKeys: string[];
defaultOutputKey?: string;
}
/**
* Class to parse the output of an LLM call into a dictionary.
* @augments BaseOutputParser
*/
export class RegexParser extends BaseOutputParser<Record<string, string>> {
static lc_name() {
return "RegexParser";
}
lc_namespace = ["langchain", "output_parsers", "regex"];
lc_serializable = true;
get lc_attributes(): SerializedFields | undefined {
return {
regex: this.lc_kwargs.regex,
};
}
regex: string | RegExp;
outputKeys: string[];
defaultOutputKey?: string;
constructor(fields: RegexParserFields);
constructor(
regex: string | RegExp,
outputKeys: string[],
defaultOutputKey?: string
);
constructor(
fields: string | RegExp | RegexParserFields,
outputKeys?: string[],
defaultOutputKey?: string
) {
// eslint-disable-next-line no-instanceof/no-instanceof
if (typeof fields === "string" || fields instanceof RegExp) {
// eslint-disable-next-line no-param-reassign, @typescript-eslint/no-non-null-assertion
fields = { regex: fields, outputKeys: outputKeys!, defaultOutputKey };
}
// eslint-disable-next-line no-instanceof/no-instanceof
if (fields.regex instanceof RegExp) {
// eslint-disable-next-line no-param-reassign
fields.regex = {
pattern: fields.regex.source,
flags: fields.regex.flags,
};
}
super(fields);
this.regex =
// eslint-disable-next-line no-nested-ternary
typeof fields.regex === "string"
? new RegExp(fields.regex)
: "pattern" in fields.regex
? new RegExp(fields.regex.pattern, fields.regex.flags)
: fields.regex;
this.outputKeys = fields.outputKeys;
this.defaultOutputKey = fields.defaultOutputKey;
}
_type() {
return "regex_parser";
}
/**
* Parses the given text using the regex pattern and returns a dictionary
* with the parsed output. If the regex pattern does not match the text
* and no defaultOutputKey is provided, throws an OutputParserException.
* @param text The text to be parsed.
* @returns A dictionary with the parsed output.
*/
async parse(text: string): Promise<Record<string, string>> {
const match = text.match(this.regex);
if (match) {
return this.outputKeys.reduce((acc, key, index) => {
acc[key] = match[index + 1];
return acc;
}, {} as Record<string, string>);
}
if (this.defaultOutputKey === undefined) {
throw new OutputParserException(`Could not parse output: ${text}`, text);
}
return this.outputKeys.reduce((acc, key) => {
acc[key] = key === this.defaultOutputKey ? text : "";
return acc;
}, {} as Record<string, string>);
}
/**
* Returns a string with instructions on how the LLM output should be
* formatted to match the regex pattern.
* @returns A string with formatting instructions.
*/
getFormatInstructions(): string {
return `Your response should match the following regex: ${this.regex}`;
}
}