-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
azure_blob_storage_file.ts
132 lines (116 loc) Β· 3.93 KB
/
azure_blob_storage_file.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import * as fs from "node:fs";
import * as path from "node:path";
import * as os from "node:os";
import { BlobServiceClient } from "@azure/storage-blob";
import { BaseDocumentLoader } from "../base.js";
import {
UnstructuredLoader,
UnstructuredLoaderOptions,
} from "../fs/unstructured.js";
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
/* #__PURE__ */ logVersion020MigrationWarning({
oldEntrypointName: "document_loaders/web/azure_blog_storage_file",
newPackageName: "@langchain/community",
});
/**
* Interface representing the configuration for accessing a specific file
* in Azure Blob Storage.
*/
interface AzureBlobStorageFileConfig {
connectionString: string;
container: string;
blobName: string;
}
/**
* Interface representing the configuration for the
* AzureBlobStorageFileLoader. It contains the Azure Blob Storage file
* configuration and the options for the UnstructuredLoader.
*/
interface AzureBlobStorageFileLoaderConfig {
azureConfig: AzureBlobStorageFileConfig;
unstructuredConfig?: UnstructuredLoaderOptions;
}
/**
* @deprecated - Import from "@langchain/community/document_loaders/web/azure_blog_storage_file" instead. This entrypoint will be removed in 0.3.0.
* Class representing a document loader that loads a specific file from
* Azure Blob Storage. It extends the BaseDocumentLoader class and
* implements the DocumentLoader interface.
* @example
* ```typescript
* const loader = new AzureBlobStorageFileLoader({
* azureConfig: {
* connectionString: "{connectionString}",
* container: "{containerName}",
* blobName: "{blobName}",
* },
* });
* const docs = await loader.load();
* ```
*/
export class AzureBlobStorageFileLoader extends BaseDocumentLoader {
private readonly connectionString: string;
private readonly container: string;
private readonly blobName: string;
private readonly unstructuredConfig?: UnstructuredLoaderOptions;
constructor({
azureConfig,
unstructuredConfig,
}: AzureBlobStorageFileLoaderConfig) {
super();
this.connectionString = azureConfig.connectionString;
this.container = azureConfig.container;
this.blobName = azureConfig.blobName;
this.unstructuredConfig = unstructuredConfig;
}
/**
* Method to load a specific file from Azure Blob Storage. It creates a
* temporary directory, constructs the file path, downloads the file, and
* loads the documents using the UnstructuredLoader. The loaded documents
* are returned, and the temporary directory is deleted.
* @returns An array of documents loaded from the file in Azure Blob Storage.
*/
public async load() {
const tempDir = fs.mkdtempSync(
path.join(os.tmpdir(), "azureblobfileloader-")
);
const filePath = path.join(tempDir, this.blobName);
try {
const blobServiceClient = BlobServiceClient.fromConnectionString(
this.connectionString,
{
userAgentOptions: {
userAgentPrefix: "langchainjs-blob-storage-file",
},
}
);
const containerClient = blobServiceClient.getContainerClient(
this.container
);
const blobClient = containerClient.getBlobClient(this.blobName);
fs.mkdirSync(path.dirname(filePath), { recursive: true });
await blobClient.downloadToFile(filePath);
} catch (e: unknown) {
throw new Error(
`Failed to download file ${
this.blobName
} from Azure Blob Storage container ${this.container}: ${
(e as Error).message
}`
);
}
try {
const unstructuredLoader = new UnstructuredLoader(
filePath,
this.unstructuredConfig
);
const docs = await unstructuredLoader.load();
return docs;
} catch {
throw new Error(
`Failed to load file ${filePath} using unstructured loader.`
);
} finally {
fs.rmSync(path.dirname(filePath), { recursive: true, force: true });
}
}
}