forked from mongodb/chatbot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
conversationsRouter.ts
351 lines (317 loc) · 11.3 KB
/
conversationsRouter.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
import { Request, Router, RequestHandler, Response } from "express";
import { rateLimit, Options as RateLimitOptions } from "express-rate-limit";
import slowDown, { Options as SlowDownOptions } from "express-slow-down";
import validateRequestSchema from "../../middleware/validateRequestSchema";
import { ChatLlm, SystemPrompt } from "../../services/ChatLlm";
import {
ConversationCustomData,
ConversationsService,
} from "../../services/ConversationsService";
import {
CommentMessageRequest,
makeCommentMessageRoute,
} from "./commentMessage";
import { RateMessageRequest, makeRateMessageRoute } from "./rateMessage";
import {
CreateConversationRequest,
makeCreateConversationRoute,
} from "./createConversation";
import {
AddMessageRequest,
makeAddMessageToConversationRoute,
} from "./addMessageToConversation";
import { requireRequestOrigin } from "../../middleware/requireRequestOrigin";
import { NextFunction, ParamsDictionary } from "express-serve-static-core";
import { requireValidIpAddress } from "../../middleware";
import {
FilterPreviousMessages,
GenerateUserPromptFunc,
} from "../../processors";
import {
GetConversationRequest,
makeGetConversationRoute,
} from "./getConversation";
/**
Configuration for rate limiting on the /conversations/* routes.
*/
export interface ConversationsRateLimitConfig {
/**
Configuration for rate limiting on ALL /conversations/* routes.
*/
routerRateLimitConfig?: Partial<RateLimitOptions>;
/**
Configuration for rate limiting on the POST /conversations/:conversationId/messages route.
Since this is the most "expensive" route as it calls the LLM,
it could be more restrictive than the global rate limit.
*/
addMessageRateLimitConfig?: Partial<RateLimitOptions>;
/**
Configuration for slow down on ALL /conversations/* routes.
*/
routerSlowDownConfig?: Partial<SlowDownOptions>;
/**
Configuration for slow down on the POST /conversations/:conversationId/messages route.
Since this is the most "expensive" route as it calls the LLM,
it could be more restrictive than the global slow down.
*/
addMessageSlowDownConfig?: Partial<SlowDownOptions>;
}
/**
Function to add custom data to the {@link Conversation} persisted to the database.
Has access to the Express.js request and response plus the {@link ConversationsRouterLocals}
from the {@link Response.locals} object.
*/
export type AddCustomDataFunc = (
request: Request,
response: ConversationsRouterResponse
) => Promise<ConversationCustomData>;
/**
Express.js Request that exposes the app's {@link ConversationsService}.
This is useful if you want to do authentication or dynamic data validation.
*/
export interface ConversationsRouterLocals {
conversations: ConversationsService;
customData: Record<string, unknown>;
}
/**
Express.js Response from the app's {@link ConversationsService}.
*/
export type ConversationsRouterResponse = Response<
// eslint-disable-next-line @typescript-eslint/no-explicit-any
any,
ConversationsRouterLocals
>;
/**
Middleware to put in front of all the routes in the conversationsRouter.
This middleware is useful for things like authentication, data validation, etc.
It exposes the app's {@link ConversationsService}.
It also lets you access {@link ConversationsRouterLocals} via {@link Response.locals}
([docs](https://expressjs.com/en/api.html#res.locals)).
You can use the locals in other middleware or persist when you create the conversation
with the `POST /conversations` endpoint with the {@link AddCustomDataFunc}.
*/
export type ConversationsMiddleware = RequestHandler<
ParamsDictionary,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
any,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
any,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
any,
ConversationsRouterLocals
>;
/**
Configuration for the /conversations/* routes.
*/
export interface ConversationsRouterParams {
llm: ChatLlm;
conversations: ConversationsService;
systemPrompt: SystemPrompt;
/**
Function to generate the user prompt sent to the {@link ChatLlm}.
You can perform any preprocessing of the user's message
including retrieval augmented generation here.
*/
generateUserPrompt?: GenerateUserPromptFunc;
/**
Maximum number of characters in user input.
Server returns 400 error if user input is longer than this.
*/
maxInputLengthCharacters?: number;
/**
Function to filter which previous messages are sent to the {@link ChatLlm}.
For example, you may only want to send the system prompt to the LLM
with the user message or the system prompt and X prior messages.
Defaults to sending only the system prompt.
*/
filterPreviousMessages?: FilterPreviousMessages;
/**
Maximum number of user-sent messages in a conversation.
Server returns 400 error if user tries to add a message to a conversation
that has this many messages.
*/
maxUserMessagesInConversation?: number;
rateLimitConfig?: ConversationsRateLimitConfig;
/**
Middleware to put in front of all the routes in the conversationsRouter.
You can use this to do things like authentication, data validation, etc.
If you want the middleware to run only on certain routes,
you can add conditional logic inside the middleware. For example:
```ts
const someMiddleware: ConversationsMiddleware = (req, res, next) => {
if (req.path === "/conversations") {
// Do something
}
next();
}
```
*/
middleware?: ConversationsMiddleware[];
/**
Function that takes the request + response and returns any custom data you want to include
in the {@link Conversation} persisted to the database.
For example, you might want to store the user's email address with the conversation.
The custom data is persisted to the database with the Conversation in the
{@link Conversation.customData} field.
*/
createConversationCustomData?: AddCustomDataFunc;
/**
Function that takes the request + response and returns any custom data you want to include
in the {@link Message} persisted to the database.
For example, you might want to store details about what LLM was used to generate the response.
The custom data is persisted to the database with the `Message` in the
{@link Message.customData} field inside of the {@link Conversation.messages} array.
*/
addMessageToConversationCustomData?: AddCustomDataFunc;
/**
Maximum number of characters allowed in a user's comment on an assistant {@link Message}.
If not specified, user comments may be of any length.
*/
maxUserCommentLength?: number;
}
export const rateLimitResponse = {
error: "Too many requests, please try again later.",
};
function keyGenerator(request: Request) {
if (!request.ip) {
throw new Error("Request IP is not defined");
}
return request.ip;
}
const addOriginAndIpToCustomData: AddCustomDataFunc = async (req, res) =>
res.locals.customData.origin
? { origin: res.locals.customData.origin, ip: req.ip }
: undefined;
const addOriginToCustomData: AddCustomDataFunc = async (_, res) =>
res.locals.customData.origin
? { origin: res.locals.customData.origin }
: undefined;
/**
Constructor function to make the /conversations/* Express.js router.
*/
export function makeConversationsRouter({
llm,
conversations,
systemPrompt,
maxInputLengthCharacters,
maxUserMessagesInConversation,
filterPreviousMessages,
rateLimitConfig,
generateUserPrompt,
middleware = [requireValidIpAddress(), requireRequestOrigin()],
createConversationCustomData = addOriginAndIpToCustomData,
addMessageToConversationCustomData = addOriginToCustomData,
maxUserCommentLength,
}: ConversationsRouterParams) {
const conversationsRouter = Router();
// Set the customData and conversations on the response locals
// for use in subsequent middleware.
conversationsRouter.use(((_, res: Response, next: NextFunction) => {
res.locals.conversations = conversations;
res.locals.customData = {};
next();
}) satisfies RequestHandler);
// Add middleware to the conversationsRouter.
middleware?.forEach((middleware) => conversationsRouter.use(middleware));
/*
Global rate limit the requests to the conversationsRouter.
*/
const globalRateLimit = rateLimit({
windowMs: 5 * 60 * 1000,
max: 5000,
standardHeaders: "draft-7", // draft-6: RateLimit-* headers; draft-7: combined RateLimit header
legacyHeaders: true, // X-RateLimit-* headers
message: rateLimitResponse,
keyGenerator,
...(rateLimitConfig?.routerRateLimitConfig ?? {}),
});
conversationsRouter.use(globalRateLimit);
/*
Slow down the response to the conversationsRouter after certain number
of requests in the time window.
*/
const globalSlowDown = slowDown({
windowMs: 60 * 1000,
delayAfter: 20,
delayMs: 500,
keyGenerator,
...(rateLimitConfig?.routerSlowDownConfig ?? {}),
});
conversationsRouter.use(globalSlowDown);
// Create new conversation.
conversationsRouter.post(
"/",
validateRequestSchema(CreateConversationRequest),
makeCreateConversationRoute({
conversations,
createConversationCustomData,
systemPrompt,
})
);
/*
Rate limit the requests to the addMessageToConversationRoute.
Rate limit should be more restrictive than global rate limiter to limit expensive requests to the LLM.
*/
const addMessageRateLimit = rateLimit({
windowMs: 5 * 60 * 1000,
max: 2500,
standardHeaders: "draft-7", // draft-6: RateLimit-* headers; draft-7: combined RateLimit header
legacyHeaders: true, // X-RateLimit-* headers
message: rateLimitResponse,
keyGenerator,
...(rateLimitConfig?.addMessageRateLimitConfig ?? {}),
});
/*
Slow down the response to the addMessageToConversationRoute after certain number
of requests in the time window. Rate limit should be more restrictive than global slow down
to limit expensive requests to the LLM.
*/
const addMessageSlowDown = slowDown({
windowMs: 60 * 1000,
delayAfter: 10,
delayMs: 1500,
keyGenerator,
...(rateLimitConfig?.addMessageSlowDownConfig ?? {}),
});
/*
Create a new message from the user and get response from the LLM.
*/
const addMessageToConversationRoute = makeAddMessageToConversationRoute({
conversations,
llm,
maxInputLengthCharacters,
maxUserMessagesInConversation,
addMessageToConversationCustomData,
generateUserPrompt,
filterPreviousMessages,
});
conversationsRouter.post(
"/:conversationId/messages",
addMessageRateLimit,
addMessageSlowDown,
validateRequestSchema(AddMessageRequest),
addMessageToConversationRoute
);
// Get conversations by conversation ID.
conversationsRouter.get(
"/:conversationId",
validateRequestSchema(GetConversationRequest),
makeGetConversationRoute({ conversations })
);
// Rate a message.
conversationsRouter.post(
"/:conversationId/messages/:messageId/rating",
validateRequestSchema(RateMessageRequest),
makeRateMessageRoute({ conversations })
);
// Comment on a message.
conversationsRouter.post(
"/:conversationId/messages/:messageId/comment",
validateRequestSchema(CommentMessageRequest),
makeCommentMessageRoute({
conversations,
maxCommentLength: maxUserCommentLength,
})
);
return conversationsRouter;
}