-
Notifications
You must be signed in to change notification settings - Fork 376
/
CondenseQuestionChatEngine.ts
124 lines (112 loc) · 4.06 KB
/
CondenseQuestionChatEngine.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import type { ChatHistory } from "../../ChatHistory.js";
import { getHistory } from "../../ChatHistory.js";
import type { CondenseQuestionPrompt } from "../../Prompt.js";
import {
defaultCondenseQuestionPrompt,
messagesToHistoryStr,
} from "../../Prompt.js";
import type { Response } from "../../Response.js";
import type { ServiceContext } from "../../ServiceContext.js";
import { llmFromSettingsOrContext } from "../../Settings.js";
import { wrapEventCaller } from "../../internal/context/EventCaller.js";
import type { ChatMessage, LLM } from "../../llm/index.js";
import { extractText, streamReducer } from "../../llm/utils.js";
import { PromptMixin } from "../../prompts/index.js";
import type { QueryEngine } from "../../types.js";
import type {
ChatEngine,
ChatEngineParamsNonStreaming,
ChatEngineParamsStreaming,
} from "./types.js";
/**
* CondenseQuestionChatEngine is used in conjunction with a Index (for example VectorStoreIndex).
* It does two steps on taking a user's chat message: first, it condenses the chat message
* with the previous chat history into a question with more context.
* Then, it queries the underlying Index using the new question with context and returns
* the response.
* CondenseQuestionChatEngine performs well when the input is primarily questions about the
* underlying data. It performs less well when the chat messages are not questions about the
* data, or are very referential to previous context.
*/
export class CondenseQuestionChatEngine
extends PromptMixin
implements ChatEngine
{
queryEngine: QueryEngine;
chatHistory: ChatHistory;
llm: LLM;
condenseMessagePrompt: CondenseQuestionPrompt;
constructor(init: {
queryEngine: QueryEngine;
chatHistory: ChatMessage[];
serviceContext?: ServiceContext;
condenseMessagePrompt?: CondenseQuestionPrompt;
}) {
super();
this.queryEngine = init.queryEngine;
this.chatHistory = getHistory(init?.chatHistory);
this.llm = llmFromSettingsOrContext(init?.serviceContext);
this.condenseMessagePrompt =
init?.condenseMessagePrompt ?? defaultCondenseQuestionPrompt;
}
protected _getPrompts(): { condenseMessagePrompt: CondenseQuestionPrompt } {
return {
condenseMessagePrompt: this.condenseMessagePrompt,
};
}
protected _updatePrompts(promptsDict: {
condenseMessagePrompt: CondenseQuestionPrompt;
}): void {
if (promptsDict.condenseMessagePrompt) {
this.condenseMessagePrompt = promptsDict.condenseMessagePrompt;
}
}
private async condenseQuestion(chatHistory: ChatHistory, question: string) {
const chatHistoryStr = messagesToHistoryStr(
await chatHistory.requestMessages(),
);
return this.llm.complete({
prompt: this.condenseMessagePrompt({
question: question,
chatHistory: chatHistoryStr,
}),
});
}
chat(params: ChatEngineParamsStreaming): Promise<AsyncIterable<Response>>;
chat(params: ChatEngineParamsNonStreaming): Promise<Response>;
@wrapEventCaller
async chat(
params: ChatEngineParamsStreaming | ChatEngineParamsNonStreaming,
): Promise<Response | AsyncIterable<Response>> {
const { message, stream } = params;
const chatHistory = params.chatHistory
? getHistory(params.chatHistory)
: this.chatHistory;
const condensedQuestion = (
await this.condenseQuestion(chatHistory, extractText(message))
).text;
chatHistory.addMessage({ content: message, role: "user" });
if (stream) {
const stream = await this.queryEngine.query({
query: condensedQuestion,
stream: true,
});
return streamReducer({
stream,
initialValue: "",
reducer: (accumulator, part) => (accumulator += part.response),
finished: (accumulator) => {
chatHistory.addMessage({ content: accumulator, role: "assistant" });
},
});
}
const response = await this.queryEngine.query({
query: condensedQuestion,
});
chatHistory.addMessage({ content: response.response, role: "assistant" });
return response;
}
reset() {
this.chatHistory.reset();
}
}