-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
chatgpt.ts
109 lines (97 loc) · 2.84 KB
/
chatgpt.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import { Document } from "@langchain/core/documents";
import { TextLoader } from "./text.js";
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
/* #__PURE__ */ logVersion020MigrationWarning({
oldEntrypointName: "document_loaders/fs/chatgpt",
newPackageName: "@langchain/community",
});
interface ChatGPTMessage {
author: {
role: string;
};
content: {
parts: string[];
};
create_time: number;
}
interface ChatGPTLog {
title: string;
mapping: Record<string, { message: ChatGPTMessage }>;
}
function concatenateRows(message: ChatGPTMessage, title: string): string {
/**
* Combine message information in a readable format ready to be used.
* @param {ChatGPTMessage} message - Message to be concatenated
* @param {string} title - Title of the conversation
*
* @returns {string} Concatenated message
*/
if (!message) {
return "";
}
const sender = message.author ? message.author.role : "unknown";
const text = message.content.parts[0];
const date = new Date(message.create_time * 1000)
.toISOString()
.slice(0, 19)
.replace("T", " ");
return `${title} - ${sender} on ${date}: ${text}\n\n`;
}
export class ChatGPTLoader extends TextLoader {
public numLogs: number;
constructor(filePathOrBlob: string | Blob, numLogs = 0) {
super(filePathOrBlob);
this.numLogs = numLogs;
}
protected async parse(raw: string): Promise<string[]> {
let data;
try {
data = JSON.parse(raw);
} catch (e) {
console.error(e);
throw new Error("Failed to parse JSON");
}
const truncatedData = this.numLogs > 0 ? data.slice(0, this.numLogs) : data;
return truncatedData.map((d: ChatGPTLog) =>
Object.values(d.mapping)
.filter(
(msg, idx) => !(idx === 0 && msg.message.author.role === "system")
)
.map((msg) => concatenateRows(msg.message, d.title))
.join("")
);
}
public async load(): Promise<Document[]> {
let text: string;
let metadata: Record<string, string>;
if (typeof this.filePathOrBlob === "string") {
const { readFile } = await TextLoader.imports();
try {
text = await readFile(this.filePathOrBlob, "utf8");
} catch (e) {
console.error(e);
throw new Error("Failed to read file");
}
metadata = { source: this.filePathOrBlob };
} else {
try {
text = await this.filePathOrBlob.text();
} catch (e) {
console.error(e);
throw new Error("Failed to read blob");
}
metadata = { source: "blob", blobType: this.filePathOrBlob.type };
}
const parsed = await this.parse(text);
return parsed.map(
(pageContent, i) =>
new Document({
pageContent,
metadata: {
...metadata,
logIndex: i + 1,
},
})
);
}
}