diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx b/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx new file mode 100644 index 00000000000..950a01fa354 --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx @@ -0,0 +1,104 @@ +--- +hide_table_of_contents: true +sidebar_class_name: node-only +--- + +# Couchbase + +[Couchbase](http://couchbase.com/) is an award-winning distributed NoSQL cloud database that delivers unmatched versatility, performance, scalability, and financial value for all of your cloud, mobile, AI, and edge computing applications. + +This guide shows how to use load documents from couchbase database. + +# Installation + +```bash npm2yarn +npm install couchbase +``` + +## Usage + +### Querying for Documents from Couchbase + +For more details on connecting to a Couchbase cluster, please check the [Node.js SDK documentation](https://docs.couchbase.com/nodejs-sdk/current/howtos/managing-connections.html#connection-strings). + +For help with querying for documents using SQL++ (SQL for JSON), please check the [documentation](https://docs.couchbase.com/server/current/n1ql/n1ql-language-reference/index.html). + +```typescript +import { CouchbaseDocumentLoader } from "langchain/document_loaders/web/couchbase"; +import { Cluster } from "couchbase"; + +const connectionString = "couchbase://localhost"; // valid couchbase connection string +const dbUsername = "Administrator"; // valid database user with read access to the bucket being queried +const dbPassword = "Password"; // password for the database user + +// query is a valid SQL++ query +const query = ` + SELECT h.* FROM \`travel-sample\`.inventory.hotel h + WHERE h.country = 'United States' + LIMIT 1 +`; +``` + +### Connect to Couchbase Cluster + +```typescript +const couchbaseClient = await Cluster.connect(connectionString, { + username: dbUsername, + password: dbPassword, + configProfile: "wanDevelopment", +}); +``` + +### Create the Loader + +```typescript +const loader = new CouchbaseDocumentLoader( + couchbaseClient, // The connected couchbase cluster client + query // A valid SQL++ query which will return the required data +); +``` + +### Load Documents + +You can fetch the documents by calling the `load` method of the loader. It will return a list with all the documents. If you want to avoid this blocking call, you can call `lazy_load` method that returns an Iterator. + +```typescript +// using load method +docs = await loader.load(); +console.log(docs); +``` + +```typescript +// using lazy_load +for await (const doc of this.lazyLoad()) { + console.log(doc); + break; // break based on required condition +} +``` + +### Specifying Fields with Content and Metadata + +The fields that are part of the Document content can be specified using the `pageContentFields` parameter. +The metadata fields for the Document can be specified using the `metadataFields` parameter. + +```typescript +const loaderWithSelectedFields = new CouchbaseDocumentLoader( + couchbaseClient, + query, + // pageContentFields + [ + "address", + "name", + "city", + "phone", + "country", + "geo", + "description", + "reviews", + ], + ["id"] // metadataFields +); + +const filtered_docs = await loaderWithSelectedFields.load(); +console.log(filtered_docs); +``` diff --git a/langchain/.gitignore b/langchain/.gitignore index fe3c2a67a82..3bf13f3a9c3 100644 --- a/langchain/.gitignore +++ b/langchain/.gitignore @@ -602,6 +602,10 @@ document_loaders/web/confluence.cjs document_loaders/web/confluence.js document_loaders/web/confluence.d.ts document_loaders/web/confluence.d.cts +document_loaders/web/couchbase.cjs +document_loaders/web/couchbase.js +document_loaders/web/couchbase.d.ts +document_loaders/web/couchbase.d.cts document_loaders/web/searchapi.cjs document_loaders/web/searchapi.js document_loaders/web/searchapi.d.ts diff --git a/langchain/langchain.config.js b/langchain/langchain.config.js index 6875556bca3..6f8d0df5067 100644 --- a/langchain/langchain.config.js +++ b/langchain/langchain.config.js @@ -199,6 +199,7 @@ export const config = { "document_loaders/web/sitemap": "document_loaders/web/sitemap", "document_loaders/web/sonix_audio": "document_loaders/web/sonix_audio", "document_loaders/web/confluence": "document_loaders/web/confluence", + "document_loaders/web/couchbase": "document_loaders/web/couchbase", "document_loaders/web/searchapi": "document_loaders/web/searchapi", "document_loaders/web/serpapi": "document_loaders/web/serpapi", "document_loaders/web/sort_xyz_blockchain": @@ -643,6 +644,7 @@ export const config = { "document_loaders/web/sitemap", "document_loaders/web/sonix_audio", "document_loaders/web/confluence", + "document_loaders/web/couchbase", "document_loaders/web/youtube", "document_loaders/fs/directory", "document_loaders/fs/buffer", diff --git a/langchain/package.json b/langchain/package.json index 649f22f3cbc..3c1548e121a 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -614,6 +614,10 @@ "document_loaders/web/confluence.js", "document_loaders/web/confluence.d.ts", "document_loaders/web/confluence.d.cts", + "document_loaders/web/couchbase.cjs", + "document_loaders/web/couchbase.js", + "document_loaders/web/couchbase.d.ts", + "document_loaders/web/couchbase.d.cts", "document_loaders/web/searchapi.cjs", "document_loaders/web/searchapi.js", "document_loaders/web/searchapi.d.ts", @@ -1247,6 +1251,7 @@ "cheerio": "^1.0.0-rc.12", "chromadb": "^1.5.3", "convex": "^1.3.1", + "couchbase": "^4.2.10", "d3-dsv": "^2.0.0", "dotenv": "^16.0.3", "dpdm": "^3.12.0", @@ -1317,6 +1322,7 @@ "cheerio": "^1.0.0-rc.12", "chromadb": "*", "convex": "^1.3.1", + "couchbase": "^4.2.10", "d3-dsv": "^2.0.0", "epub2": "^3.0.1", "fast-xml-parser": "^4.2.7", @@ -1411,6 +1417,9 @@ "convex": { "optional": true }, + "couchbase": { + "optional": true + }, "d3-dsv": { "optional": true }, @@ -2899,6 +2908,15 @@ "import": "./document_loaders/web/confluence.js", "require": "./document_loaders/web/confluence.cjs" }, + "./document_loaders/web/couchbase": { + "types": { + "import": "./document_loaders/web/couchbase.d.ts", + "require": "./document_loaders/web/couchbase.d.cts", + "default": "./document_loaders/web/couchbase.d.ts" + }, + "import": "./document_loaders/web/couchbase.js", + "require": "./document_loaders/web/couchbase.cjs" + }, "./document_loaders/web/searchapi": { "types": { "import": "./document_loaders/web/searchapi.d.ts", diff --git a/langchain/src/document_loaders/tests/couchbase.int.test.ts b/langchain/src/document_loaders/tests/couchbase.int.test.ts new file mode 100644 index 00000000000..7147955df9a --- /dev/null +++ b/langchain/src/document_loaders/tests/couchbase.int.test.ts @@ -0,0 +1,36 @@ +import { test, expect } from "@jest/globals"; +import { Cluster } from "couchbase"; +import { CouchbaseDocumentLoader } from "../web/couchbase.js"; + +test("Test Couchbase Cluster connection ", async () => { + const connectionString = ""; + const databaseUsername = ""; + const databasePassword = ""; + const query = ` + SELECT h.* FROM \`travel-sample\`.inventory.hotel h + WHERE h.country = 'United States' + LIMIT 10 + `; + const validPageContentFields = ["country", "name", "description"]; + const validMetadataFields = ["id"]; + + const couchbaseClient = await Cluster.connect(connectionString, { + username: databaseUsername, + password: databasePassword, + configProfile: "wanDevelopment", + }); + const loader = new CouchbaseDocumentLoader( + couchbaseClient, + query, + validPageContentFields, + validMetadataFields + ); + const docs = await loader.load(); + expect(docs.length).toBeGreaterThan(0); + + for (const doc of docs) { + expect(doc.pageContent).not.toBe(""); // Assuming valid page content fields + expect(doc.metadata).toHaveProperty("id"); // Assuming metadata has id field + expect(doc.metadata.id).not.toBe(""); + } +}); diff --git a/langchain/src/document_loaders/web/couchbase.ts b/langchain/src/document_loaders/web/couchbase.ts new file mode 100644 index 00000000000..a18ccc4c1bb --- /dev/null +++ b/langchain/src/document_loaders/web/couchbase.ts @@ -0,0 +1,88 @@ +import { Cluster, QueryResult } from "couchbase"; +import { Document } from "@langchain/core/documents"; +import { BaseDocumentLoader, DocumentLoader } from "../base.js"; + +/** + * loader for couchbase document + */ +export class CouchbaseDocumentLoader + extends BaseDocumentLoader + implements DocumentLoader +{ + private cluster: Cluster; + + private query: string; + + private pageContentFields?: string[]; + + private metadataFields?: string[]; + + /** + * construct Couchbase document loader with a requirement for couchbase cluster client + * @param client { Cluster } [ couchbase connected client to connect to database ] + * @param query { string } [ query to get results from while loading the data ] + * @param pageContentFields { Array } [ filters fields of the document and shows these only ] + * @param metadataFields { Array } [ metadata fields required ] + */ + constructor( + client: Cluster, + query: string, + pageContentFields?: string[], + metadataFields?: string[] + ) { + super(); + if (!client) { + throw new Error("Couchbase client cluster must be provided."); + } + this.cluster = client; + this.query = query; + this.pageContentFields = pageContentFields; + this.metadataFields = metadataFields; + } + + /** + * Function to load document based on query from couchbase + * @returns {Promise} [ Returns a promise of all the documents as array ] + */ + async load(): Promise { + const documents: Document[] = []; + for await (const doc of this.lazyLoad()) { + documents.push(doc); + } + return documents; + } + + /** + * Function to load documents based on iterator rather than full load + * @returns {AsyncIterable} [ Returns an iterator to fetch documents ] + */ + async *lazyLoad(): AsyncIterable { + // Run SQL++ Query + const result: QueryResult = await this.cluster.query(this.query); + for await (const row of result.rows) { + let { metadataFields, pageContentFields } = this; + + if (!pageContentFields) { + pageContentFields = Object.keys(row); + } + + if (!metadataFields) { + metadataFields = []; + } + + const metadata = metadataFields.reduce( + (obj, field) => ({ ...obj, [field]: row[field] }), + {} + ); + + const document = pageContentFields + .map((k) => `${k}: ${JSON.stringify(row[k])}`) + .join("\n"); + + yield new Document({ + pageContent: document, + metadata, + }); + } + } +} diff --git a/langchain/src/load/import_constants.ts b/langchain/src/load/import_constants.ts index 1adaaaa2510..81b09d272a0 100644 --- a/langchain/src/load/import_constants.ts +++ b/langchain/src/load/import_constants.ts @@ -100,6 +100,7 @@ export const optionalImportEntrypoints: string[] = [ "langchain/document_loaders/web/sitemap", "langchain/document_loaders/web/sonix_audio", "langchain/document_loaders/web/confluence", + "langchain/document_loaders/web/couchbase", "langchain/document_loaders/web/youtube", "langchain/document_loaders/fs/directory", "langchain/document_loaders/fs/buffer", diff --git a/yarn.lock b/yarn.lock index 173063f17bb..bcfd2dcc5ea 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6645,6 +6645,48 @@ __metadata: languageName: node linkType: hard +"@couchbase/couchbase-darwin-arm64-napi@npm:4.2.10": + version: 4.2.10 + resolution: "@couchbase/couchbase-darwin-arm64-napi@npm:4.2.10" + conditions: os=darwin & cpu=arm64 + languageName: node + linkType: hard + +"@couchbase/couchbase-darwin-x64-napi@npm:4.2.10": + version: 4.2.10 + resolution: "@couchbase/couchbase-darwin-x64-napi@npm:4.2.10" + conditions: os=darwin & cpu=x64 + languageName: node + linkType: hard + +"@couchbase/couchbase-linux-arm64-napi@npm:4.2.10": + version: 4.2.10 + resolution: "@couchbase/couchbase-linux-arm64-napi@npm:4.2.10" + conditions: os=linux & cpu=arm64 + languageName: node + linkType: hard + +"@couchbase/couchbase-linux-x64-napi@npm:4.2.10": + version: 4.2.10 + resolution: "@couchbase/couchbase-linux-x64-napi@npm:4.2.10" + conditions: os=linux & cpu=x64 + languageName: node + linkType: hard + +"@couchbase/couchbase-linuxmusl-x64-napi@npm:4.2.10": + version: 4.2.10 + resolution: "@couchbase/couchbase-linuxmusl-x64-napi@npm:4.2.10" + conditions: os=linux & cpu=x64 + languageName: node + linkType: hard + +"@couchbase/couchbase-win32-x64-napi@npm:4.2.10": + version: 4.2.10 + resolution: "@couchbase/couchbase-win32-x64-napi@npm:4.2.10" + conditions: os=win32 & cpu=x64 + languageName: node + linkType: hard + "@crawlee/types@npm:^3.3.0": version: 3.3.1 resolution: "@crawlee/types@npm:3.3.1" @@ -18500,6 +18542,35 @@ __metadata: languageName: node linkType: hard +"couchbase@npm:^4.2.10": + version: 4.2.10 + resolution: "couchbase@npm:4.2.10" + dependencies: + "@couchbase/couchbase-darwin-arm64-napi": 4.2.10 + "@couchbase/couchbase-darwin-x64-napi": 4.2.10 + "@couchbase/couchbase-linux-arm64-napi": 4.2.10 + "@couchbase/couchbase-linux-x64-napi": 4.2.10 + "@couchbase/couchbase-linuxmusl-x64-napi": 4.2.10 + "@couchbase/couchbase-win32-x64-napi": 4.2.10 + cmake-js: ^7.2.1 + node-addon-api: ^7.0.0 + dependenciesMeta: + "@couchbase/couchbase-darwin-arm64-napi": + optional: true + "@couchbase/couchbase-darwin-x64-napi": + optional: true + "@couchbase/couchbase-linux-arm64-napi": + optional: true + "@couchbase/couchbase-linux-x64-napi": + optional: true + "@couchbase/couchbase-linuxmusl-x64-napi": + optional: true + "@couchbase/couchbase-win32-x64-napi": + optional: true + checksum: 1cc4725c5f16c3173691a9e4f702e479df545473deac694f7a8627f58a63a92718824d018730b51a7d4d6a0a8e125b0ef5f3f81cf995a831b8a3adfa05e9ecc7 + languageName: node + linkType: hard + "create-langchain-integration@workspace:libs/create-langchain-integration": version: 0.0.0-use.local resolution: "create-langchain-integration@workspace:libs/create-langchain-integration" @@ -25789,6 +25860,7 @@ __metadata: cheerio: ^1.0.0-rc.12 chromadb: ^1.5.3 convex: ^1.3.1 + couchbase: ^4.2.10 d3-dsv: ^2.0.0 dotenv: ^16.0.3 dpdm: ^3.12.0 @@ -25871,6 +25943,7 @@ __metadata: cheerio: ^1.0.0-rc.12 chromadb: "*" convex: ^1.3.1 + couchbase: ^4.2.10 d3-dsv: ^2.0.0 epub2: ^3.0.1 fast-xml-parser: ^4.2.7 @@ -25943,6 +26016,8 @@ __metadata: optional: true convex: optional: true + couchbase: + optional: true d3-dsv: optional: true epub2: