-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
browserbase.ts
82 lines (72 loc) · 2.09 KB
/
browserbase.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import { Document, type DocumentInterface } from "@langchain/core/documents";
import Browserbase, { BrowserbaseLoadOptions } from "@browserbasehq/sdk";
import { BaseDocumentLoader } from "../base.js";
import type { DocumentLoader } from "../base.js";
interface BrowserbaseLoaderOptions extends BrowserbaseLoadOptions {
apiKey?: string;
}
/**
* Load pre-rendered web pages using a headless browser hosted on Browserbase.
*
* Depends on `@browserbasehq/sdk` package.
* Get your API key from https://browserbase.com
*
* @example
* ```typescript
* import { BrowserbaseLoader } from "langchain/document_loaders/web/browserbase";
*
* const loader = new BrowserbaseLoader(["https://example.com"], {
* apiKey: process.env.BROWSERBASE_API_KEY,
* textContent: true,
* });
*
* const docs = await loader.load();
* ```
*
* @param {string[]} urls - The URLs of the web pages to load.
* @param {BrowserbaseLoaderOptions} [options] - Browserbase client options.
*/
export class BrowserbaseLoader
extends BaseDocumentLoader
implements DocumentLoader
{
urls: string[];
options: BrowserbaseLoaderOptions;
browserbase: Browserbase;
constructor(urls: string[], options: BrowserbaseLoaderOptions = {}) {
super();
this.urls = urls;
this.options = options;
this.browserbase = new Browserbase(options.apiKey);
}
/**
* Load pages from URLs.
*
* @returns {Promise<DocumentInterface[]>} - A promise which resolves to a list of documents.
*/
async load(): Promise<DocumentInterface[]> {
const documents: DocumentInterface[] = [];
for await (const doc of this.lazyLoad()) {
documents.push(doc);
}
return documents;
}
/**
* Load pages from URLs.
*
* @returns {Generator<DocumentInterface>} - A generator that yields documents.
*/
async *lazyLoad() {
const pages = await this.browserbase.loadURLs(this.urls, this.options);
let index = 0;
for await (const page of pages) {
yield new Document({
pageContent: page,
metadata: {
url: this.urls[index],
},
});
index += index + 1;
}
}
}