-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
gcs.ts
100 lines (83 loc) · 2.92 KB
/
gcs.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { Storage, File } from "@google-cloud/storage";
import { Document } from "@langchain/core/documents";
import { Docstore } from "langchain/stores/doc/base";
/**
* Interface that defines the configuration for the
* GoogleCloudStorageDocstore. It includes the bucket name and an optional
* prefix.
*/
export interface GoogleCloudStorageDocstoreConfiguration {
/** The identifier for the GCS bucket */
bucket: string;
/**
* An optional prefix to prepend to each object name.
* Often used to create a pseudo-hierarchy.
*/
prefix?: string;
}
/**
* Class that provides an interface for interacting with Google Cloud
* Storage (GCS) as a document store. It extends the Docstore class and
* implements methods to search, add, and add a document to the GCS
* bucket.
*/
export class GoogleCloudStorageDocstore extends Docstore {
bucket: string;
prefix = "";
storage: Storage;
constructor(config: GoogleCloudStorageDocstoreConfiguration) {
super();
this.bucket = config.bucket;
this.prefix = config.prefix ?? this.prefix;
this.storage = new Storage();
}
/**
* Searches for a document in the GCS bucket and returns it as a Document
* instance.
* @param search The name of the document to search for in the GCS bucket
* @returns A Promise that resolves to a Document instance representing the found document
*/
async search(search: string): Promise<Document> {
const file = this.getFile(search);
const [fileMetadata] = await file.getMetadata();
const metadata = fileMetadata?.metadata;
const [dataBuffer] = await file.download();
const pageContent = dataBuffer.toString();
const ret = new Document({
pageContent,
metadata,
});
return ret;
}
/**
* Adds multiple documents to the GCS bucket.
* @param texts An object where each key is the name of a document and the value is the Document instance to be added
* @returns A Promise that resolves when all documents have been added
*/
async add(texts: Record<string, Document>): Promise<void> {
await Promise.all(
Object.keys(texts).map((key) => this.addDocument(key, texts[key]))
);
}
/**
* Adds a single document to the GCS bucket.
* @param name The name of the document to be added
* @param document The Document instance to be added
* @returns A Promise that resolves when the document has been added
*/
async addDocument(name: string, document: Document): Promise<void> {
const file = this.getFile(name);
await file.save(document.pageContent);
await file.setMetadata({ metadata: document.metadata });
}
/**
* Gets a file from the GCS bucket.
* @param name The name of the file to get from the GCS bucket
* @returns A File instance representing the fetched file
*/
private getFile(name: string): File {
const filename = this.prefix + name;
const file = this.storage.bucket(this.bucket).file(filename);
return file;
}
}