-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
confluence.ts
241 lines (213 loc) Β· 6.37 KB
/
confluence.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import { htmlToText } from "html-to-text";
import { Document } from "@langchain/core/documents";
import { BaseDocumentLoader } from "../base.js";
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
/* #__PURE__ */ logVersion020MigrationWarning({
oldEntrypointName: "document_loaders/web/confluence",
newPackageName: "@langchain/community",
});
/**
* Interface representing the parameters for configuring the
* ConfluencePagesLoader.
*/
export interface ConfluencePagesLoaderParams {
baseUrl: string;
spaceKey: string;
username?: string;
accessToken?: string;
personalAccessToken?: string;
limit?: number;
expand?: string;
}
/**
* Interface representing a Confluence page.
*/
export interface ConfluencePage {
id: string;
title: string;
type: string;
body: {
storage: {
value: string;
};
};
status: string;
version?: {
number: number;
when: string;
by: {
displayName: string;
};
};
}
/**
* Interface representing the response from the Confluence API.
*/
export interface ConfluenceAPIResponse {
size: number;
results: ConfluencePage[];
}
/**
* Class representing a document loader for loading pages from Confluence.
* @example
* ```typescript
* const loader = new ConfluencePagesLoader({
* baseUrl: "https:
* spaceKey: "~EXAMPLE362906de5d343d49dcdbae5dEXAMPLE",
* username: "your-username",
* accessToken: "your-access-token",
* });
* const documents = await loader.load();
* console.log(documents);
* ```
*/
export class ConfluencePagesLoader extends BaseDocumentLoader {
public readonly baseUrl: string;
public readonly spaceKey: string;
public readonly username?: string;
public readonly accessToken?: string;
public readonly limit: number;
/**
* expand parameter for confluence rest api
* description can be found at https://developer.atlassian.com/server/confluence/expansions-in-the-rest-api/
*/
public readonly expand?: string;
public readonly personalAccessToken?: string;
constructor({
baseUrl,
spaceKey,
username,
accessToken,
limit = 25,
expand = "body.storage,version",
personalAccessToken,
}: ConfluencePagesLoaderParams) {
super();
this.baseUrl = baseUrl;
this.spaceKey = spaceKey;
this.username = username;
this.accessToken = accessToken;
this.limit = limit;
this.expand = expand;
this.personalAccessToken = personalAccessToken;
}
/**
* Returns the authorization header for the request.
* @returns The authorization header as a string, or undefined if no credentials were provided.
*/
private get authorizationHeader(): string | undefined {
if (this.personalAccessToken) {
return `Bearer ${this.personalAccessToken}`;
} else if (this.username && this.accessToken) {
const authToken = Buffer.from(
`${this.username}:${this.accessToken}`
).toString("base64");
return `Basic ${authToken}`;
}
return undefined;
}
/**
* Fetches all the pages in the specified space and converts each page to
* a Document instance.
* @param options the extra options of the load function
* @param options.limit The limit parameter to overwrite the size to fetch pages.
* @param options.start The start parameter to set inital offset to fetch pages.
* @returns Promise resolving to an array of Document instances.
*/
public async load(options?: {
start?: number;
limit?: number;
}): Promise<Document[]> {
try {
const pages = await this.fetchAllPagesInSpace(
options?.start,
options?.limit
);
return pages.map((page) => this.createDocumentFromPage(page));
} catch (error) {
console.error("Error:", error);
return [];
}
}
/**
* Fetches data from the Confluence API using the provided URL.
* @param url The URL to fetch data from.
* @returns Promise resolving to the JSON response from the API.
*/
protected async fetchConfluenceData(
url: string
): Promise<ConfluenceAPIResponse> {
try {
const initialHeaders: HeadersInit = {
"Content-Type": "application/json",
Accept: "application/json",
};
const authHeader = this.authorizationHeader;
if (authHeader) {
initialHeaders.Authorization = authHeader;
}
const response = await fetch(url, {
headers: initialHeaders,
});
if (!response.ok) {
throw new Error(
`Failed to fetch ${url} from Confluence: ${response.status}`
);
}
return await response.json();
} catch (error) {
throw new Error(`Failed to fetch ${url} from Confluence: ${error}`);
}
}
/**
* Recursively fetches all the pages in the specified space.
* @param start The start parameter to paginate through the results.
* @returns Promise resolving to an array of ConfluencePage objects.
*/
private async fetchAllPagesInSpace(
start = 0,
limit = this.limit
): Promise<ConfluencePage[]> {
const url = `${this.baseUrl}/rest/api/content?spaceKey=${this.spaceKey}&limit=${limit}&start=${start}&expand=${this.expand}`;
const data = await this.fetchConfluenceData(url);
if (data.size === 0) {
return [];
}
const nextPageStart = start + data.size;
const nextPageResults = await this.fetchAllPagesInSpace(
nextPageStart,
limit
);
return data.results.concat(nextPageResults);
}
/**
* Creates a Document instance from a ConfluencePage object.
* @param page The ConfluencePage object to convert.
* @returns A Document instance.
*/
private createDocumentFromPage(page: ConfluencePage): Document {
// Convert the HTML content to plain text
const plainTextContent = htmlToText(page.body.storage.value, {
wordwrap: false,
preserveNewlines: false,
});
// Remove empty lines
const textWithoutEmptyLines = plainTextContent.replace(/^\s*[\r\n]/gm, "");
// Generate the URL
const pageUrl = `${this.baseUrl}/spaces/${this.spaceKey}/pages/${page.id}`;
// Return a langchain document
return new Document({
pageContent: textWithoutEmptyLines,
metadata: {
id: page.id,
status: page.status,
title: page.title,
type: page.type,
url: pageUrl,
version: page.version?.number,
updated_by: page.version?.by?.displayName,
updated_at: page.version?.when,
},
});
}
}