From 65639460469bec12635c64e84df2aede3e482539 Mon Sep 17 00:00:00 2001 From: Lokesh Goel Date: Thu, 8 Feb 2024 13:54:19 +0530 Subject: [PATCH 1/8] added couchbase document loader --- langchain/package.json | 13 ++++ .../document_loaders/tests/couchbase.test.ts | 12 +++ .../src/document_loaders/web/couchbase.ts | 70 +++++++++++++++++ yarn.lock | 75 +++++++++++++++++++ 4 files changed, 170 insertions(+) create mode 100644 langchain/src/document_loaders/tests/couchbase.test.ts create mode 100644 langchain/src/document_loaders/web/couchbase.ts diff --git a/langchain/package.json b/langchain/package.json index 359579835be..30aba5dba64 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -460,6 +460,9 @@ "document_loaders/web/confluence.cjs", "document_loaders/web/confluence.js", "document_loaders/web/confluence.d.ts", + "document_loaders/web/couchbase.cjs", + "document_loaders/web/couchbase.js", + "document_loaders/web/couchbase.d.ts", "document_loaders/web/searchapi.cjs", "document_loaders/web/searchapi.js", "document_loaders/web/searchapi.d.ts", @@ -952,6 +955,7 @@ "cheerio": "^1.0.0-rc.12", "chromadb": "^1.5.3", "convex": "^1.3.1", + "couchbase": "^4.2.10", "d3-dsv": "^2.0.0", "dotenv": "^16.0.3", "dpdm": "^3.12.0", @@ -1021,6 +1025,7 @@ "cheerio": "^1.0.0-rc.12", "chromadb": "*", "convex": "^1.3.1", + "couchbase": "^4.2.10", "d3-dsv": "^2.0.0", "epub2": "^3.0.1", "fast-xml-parser": "^4.2.7", @@ -1116,6 +1121,9 @@ "convex": { "optional": true }, + "couchbase": { + "optional": true + }, "d3-dsv": { "optional": true }, @@ -1997,6 +2005,11 @@ "types": "./document_loaders/web/confluence.d.ts", "import": "./document_loaders/web/confluence.js", "require": "./document_loaders/web/confluence.cjs" + }, + "./document_loaders/web/couchbase": { + "types": "./document_loaders/web/couchbase.d.ts", + "import": "./document_loaders/web/couchbase.js", + "require": "./document_loaders/web/couchbase.cjs" }, "./document_loaders/web/searchapi": { "types": "./document_loaders/web/searchapi.d.ts", diff --git a/langchain/src/document_loaders/tests/couchbase.test.ts b/langchain/src/document_loaders/tests/couchbase.test.ts new file mode 100644 index 00000000000..7a45ec90e2b --- /dev/null +++ b/langchain/src/document_loaders/tests/couchbase.test.ts @@ -0,0 +1,12 @@ +import { test, expect } from "@jest/globals"; +import { Cluster } from "couchbase"; +import { CouchbaseDocumentLoader } from "../web/couchbase.js"; + + +test("Test Couchbase Cluster connection ", async ()=>{ + const couchbaseClient = await Cluster.connect(""); + const loader = new CouchbaseDocumentLoader(couchbaseClient, ""); + const doc = await loader.load(); + console.log(doc); + expect(doc.length).toBeGreaterThan(0); +}) \ No newline at end of file diff --git a/langchain/src/document_loaders/web/couchbase.ts b/langchain/src/document_loaders/web/couchbase.ts new file mode 100644 index 00000000000..580fd707b59 --- /dev/null +++ b/langchain/src/document_loaders/web/couchbase.ts @@ -0,0 +1,70 @@ +import { Cluster, QueryResult } from "couchbase"; +import { Document } from "../../document.js"; +import { BaseDocumentLoader, DocumentLoader } from "../base.js"; + +export class CouchbaseDocumentLoader + extends BaseDocumentLoader + implements DocumentLoader +{ + private cluster: Cluster; + + private query: string; + + private pageContentFields?: string[]; + + private metadataFields?: string[]; + + constructor( + client: Cluster, + query: string, + pageContentFields?: string[], + metadataFields?: string[] + ) { + super(); + if (!client) { + throw new Error("Couchbase client cluster must be provided."); + } + this.cluster = client; + this.query = query; + this.pageContentFields = pageContentFields; + this.metadataFields = metadataFields; + } + + async load(): Promise { + const documents: Document[] = []; + for await (const doc of this.lazyLoad()) { + documents.push(doc); + } + return documents; + } + + async *lazyLoad(): AsyncIterable { + // Run SQL++ Query + const result: QueryResult = await this.cluster.query(this.query); + for await (const row of result.rows) { + let { metadataFields, pageContentFields } = this; + + if (!pageContentFields) { + pageContentFields = Object.keys(row); + } + + if (!metadataFields) { + metadataFields = []; + } + + const metadata = metadataFields.reduce( + (obj, field) => ({ ...obj, [field]: row[field] }), + {} + ); + + const document = pageContentFields + .map((k) => `${k}: ${row[k]}`) + .join("\n"); + + yield new Document({ + pageContent: document, + metadata, + }); + } + } +} diff --git a/yarn.lock b/yarn.lock index d05202de300..6174564e68e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6558,6 +6558,48 @@ __metadata: languageName: node linkType: hard +"@couchbase/couchbase-darwin-arm64-napi@npm:4.2.10": + version: 4.2.10 + resolution: "@couchbase/couchbase-darwin-arm64-napi@npm:4.2.10" + conditions: os=darwin & cpu=arm64 + languageName: node + linkType: hard + +"@couchbase/couchbase-darwin-x64-napi@npm:4.2.10": + version: 4.2.10 + resolution: "@couchbase/couchbase-darwin-x64-napi@npm:4.2.10" + conditions: os=darwin & cpu=x64 + languageName: node + linkType: hard + +"@couchbase/couchbase-linux-arm64-napi@npm:4.2.10": + version: 4.2.10 + resolution: "@couchbase/couchbase-linux-arm64-napi@npm:4.2.10" + conditions: os=linux & cpu=arm64 + languageName: node + linkType: hard + +"@couchbase/couchbase-linux-x64-napi@npm:4.2.10": + version: 4.2.10 + resolution: "@couchbase/couchbase-linux-x64-napi@npm:4.2.10" + conditions: os=linux & cpu=x64 + languageName: node + linkType: hard + +"@couchbase/couchbase-linuxmusl-x64-napi@npm:4.2.10": + version: 4.2.10 + resolution: "@couchbase/couchbase-linuxmusl-x64-napi@npm:4.2.10" + conditions: os=linux & cpu=x64 + languageName: node + linkType: hard + +"@couchbase/couchbase-win32-x64-napi@npm:4.2.10": + version: 4.2.10 + resolution: "@couchbase/couchbase-win32-x64-napi@npm:4.2.10" + conditions: os=win32 & cpu=x64 + languageName: node + linkType: hard + "@crawlee/types@npm:^3.3.0": version: 3.3.1 resolution: "@crawlee/types@npm:3.3.1" @@ -18005,6 +18047,35 @@ __metadata: languageName: node linkType: hard +"couchbase@npm:^4.2.10": + version: 4.2.10 + resolution: "couchbase@npm:4.2.10" + dependencies: + "@couchbase/couchbase-darwin-arm64-napi": 4.2.10 + "@couchbase/couchbase-darwin-x64-napi": 4.2.10 + "@couchbase/couchbase-linux-arm64-napi": 4.2.10 + "@couchbase/couchbase-linux-x64-napi": 4.2.10 + "@couchbase/couchbase-linuxmusl-x64-napi": 4.2.10 + "@couchbase/couchbase-win32-x64-napi": 4.2.10 + cmake-js: ^7.2.1 + node-addon-api: ^7.0.0 + dependenciesMeta: + "@couchbase/couchbase-darwin-arm64-napi": + optional: true + "@couchbase/couchbase-darwin-x64-napi": + optional: true + "@couchbase/couchbase-linux-arm64-napi": + optional: true + "@couchbase/couchbase-linux-x64-napi": + optional: true + "@couchbase/couchbase-linuxmusl-x64-napi": + optional: true + "@couchbase/couchbase-win32-x64-napi": + optional: true + checksum: 1cc4725c5f16c3173691a9e4f702e479df545473deac694f7a8627f58a63a92718824d018730b51a7d4d6a0a8e125b0ef5f3f81cf995a831b8a3adfa05e9ecc7 + languageName: node + linkType: hard + "create-langchain-integration@workspace:libs/create-langchain-integration": version: 0.0.0-use.local resolution: "create-langchain-integration@workspace:libs/create-langchain-integration" @@ -25203,6 +25274,7 @@ __metadata: cheerio: ^1.0.0-rc.12 chromadb: ^1.5.3 convex: ^1.3.1 + couchbase: ^4.2.10 d3-dsv: ^2.0.0 dotenv: ^16.0.3 dpdm: ^3.12.0 @@ -25284,6 +25356,7 @@ __metadata: cheerio: ^1.0.0-rc.12 chromadb: "*" convex: ^1.3.1 + couchbase: ^4.2.10 d3-dsv: ^2.0.0 epub2: ^3.0.1 fast-xml-parser: ^4.2.7 @@ -25357,6 +25430,8 @@ __metadata: optional: true convex: optional: true + couchbase: + optional: true d3-dsv: optional: true epub2: From 65974da61a6d7dd47210d69dafb962da584ca637 Mon Sep 17 00:00:00 2001 From: Lokesh Goel Date: Thu, 8 Feb 2024 18:21:03 +0530 Subject: [PATCH 2/8] fixed loader to use stringify --- langchain/package.json | 8 ++++---- langchain/src/document_loaders/web/couchbase.ts | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/langchain/package.json b/langchain/package.json index 30aba5dba64..80a9e78a18b 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -2006,10 +2006,10 @@ "import": "./document_loaders/web/confluence.js", "require": "./document_loaders/web/confluence.cjs" }, - "./document_loaders/web/couchbase": { - "types": "./document_loaders/web/couchbase.d.ts", - "import": "./document_loaders/web/couchbase.js", - "require": "./document_loaders/web/couchbase.cjs" + "./document_loaders/web/couchbase": { + "types": "./document_loaders/web/couchbase.d.ts", + "import": "./document_loaders/web/couchbase.js", + "require": "./document_loaders/web/couchbase.cjs" }, "./document_loaders/web/searchapi": { "types": "./document_loaders/web/searchapi.d.ts", diff --git a/langchain/src/document_loaders/web/couchbase.ts b/langchain/src/document_loaders/web/couchbase.ts index 580fd707b59..4c90cee5be0 100644 --- a/langchain/src/document_loaders/web/couchbase.ts +++ b/langchain/src/document_loaders/web/couchbase.ts @@ -1,5 +1,5 @@ import { Cluster, QueryResult } from "couchbase"; -import { Document } from "../../document.js"; +import { Document } from "@langchain/core/documents"; import { BaseDocumentLoader, DocumentLoader } from "../base.js"; export class CouchbaseDocumentLoader @@ -58,7 +58,7 @@ export class CouchbaseDocumentLoader ); const document = pageContentFields - .map((k) => `${k}: ${row[k]}`) + .map((k) => `${k}: ${JSON.stringify(row[k])}`) .join("\n"); yield new Document({ From ba71e7e92f70e21ec2cbb2126b92e170fbb9d949 Mon Sep 17 00:00:00 2001 From: Lokesh Goel Date: Sun, 11 Feb 2024 10:48:03 +0530 Subject: [PATCH 3/8] add doc file --- .../web_loaders/couchbase.mdx | 102 ++++++++++++++++++ langchain/langchain.config.js | 2 + .../tests/couchbase.int.test.ts | 18 ++++ .../src/document_loaders/web/couchbase.ts | 16 +++ langchain/src/load/import_constants.ts | 1 + langchain/src/load/import_type.d.ts | 3 + 6 files changed, 142 insertions(+) create mode 100644 docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx create mode 100644 langchain/src/document_loaders/tests/couchbase.int.test.ts diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx b/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx new file mode 100644 index 00000000000..acf0ed077ad --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx @@ -0,0 +1,102 @@ +--- +hide_table_of_contents: true +sidebar_class_name: node-only +--- + +# Couchbase + +[Couchbase](http://couchbase.com/) is an award-winning distributed NoSQL cloud database that delivers unmatched versatility, performance, scalability, and financial value for all of your cloud, mobile, AI, and edge computing applications. + +This guide shows how to use load documents from couchbase database. + +# Installation + +```bash npm2yarn +npm install couchbase +``` + + +## Usage + +### Querying for Documents from Couchbase +For more details on connecting to a Couchbase cluster, please check the [Node.js SDK documentation](https://docs.couchbase.com/nodejs-sdk/current/howtos/managing-connections.html#connection-strings). + +For help with querying for documents using SQL++ (SQL for JSON), please check the [documentation](https://docs.couchbase.com/server/current/n1ql/n1ql-language-reference/index.html). + +```typescript +import { CouchbaseDocumentLoader } from "langchain/document_loaders/web/couchbase"; + +const connectionString = "couchbase://localhost"; // valid couchbase connection string +const dbUsername = "Administrator" // valid database user with read access to the bucket being queried +const dbPassword = "Password" // password for the database user + +// query is a valid SQL++ query +const query = ` + SELECT h.* FROM `travel-sample`.inventory.hotel h + WHERE h.country = 'United States' + LIMIT 1 + `; +``` + +### Connect to Couchbase Cluster + +```typescript + const couchbaseClient = await Cluster.connect(connectionString, { + username: dbUsername, + password: dbPassword, + }); + +``` + +### Create the Loader + +```typescript +const loader = new CouchbaseDocumentLoader( + couchbaseClient, // The connected couchbase cluster client + query, // A valid SQL++ query which will return the required data +); +``` + +### Load Documents + +You can fetch the documents by calling the `load` method of the loader. It will return a list with all the documents. If you want to avoid this blocking call, you can call `lazy_load` method that returns an Iterator. + +```typescript +// using load method +docs = await loader.load(); +console.log(docs) +``` + +```typescript +// using lazy_load +for await (const doc of this.lazyLoad()){ + console.log(doc); + break; // break based on required condition +} +``` + +### Specifying Fields with Content and Metadata +The fields that are part of the Document content can be specified using the `pageContentFields` parameter. +The metadata fields for the Document can be specified using the `metadataFields` parameter. + +```typescript +const loaderWithSelectedFields = new CouchbaseDocumentLoader( + couchbaseClient, + query, + // pageContentFields + [ + "address", + "name", + "city", + "phone", + "country", + "geo", + "description", + "reviews", + ], + ["id"] // metadataFields +); + +const filtered_docs = await loaderWithSelectedFields.load() +console.log(filtered_docs) +``` \ No newline at end of file diff --git a/langchain/langchain.config.js b/langchain/langchain.config.js index 34693bb0251..2b88a7e0399 100644 --- a/langchain/langchain.config.js +++ b/langchain/langchain.config.js @@ -183,6 +183,7 @@ export const config = { "document_loaders/web/azure_blob_storage_file": "document_loaders/web/azure_blob_storage_file", "document_loaders/web/cheerio": "document_loaders/web/cheerio", + "document_loaders/web/couchbase": "document_loaders/web/couchbase", "document_loaders/web/puppeteer": "document_loaders/web/puppeteer", "document_loaders/web/playwright": "document_loaders/web/playwright", "document_loaders/web/college_confidential": @@ -627,6 +628,7 @@ export const config = { "document_loaders/web/azure_blob_storage_container", "document_loaders/web/azure_blob_storage_file", "document_loaders/web/cheerio", + "document_loaders/web/couchbase", "document_loaders/web/puppeteer", "document_loaders/web/playwright", "document_loaders/web/college_confidential", diff --git a/langchain/src/document_loaders/tests/couchbase.int.test.ts b/langchain/src/document_loaders/tests/couchbase.int.test.ts new file mode 100644 index 00000000000..573e54a5a5f --- /dev/null +++ b/langchain/src/document_loaders/tests/couchbase.int.test.ts @@ -0,0 +1,18 @@ +import { test, expect } from "@jest/globals"; +import { Cluster } from "couchbase"; +import { CouchbaseDocumentLoader } from "../web/couchbase.js"; + +test("Test Couchbase Cluster connection ", async () => { + const couchbaseClient = await Cluster.connect("couchbase://localhost", { + username: "Administrator", + password: "password", + }); + const loader = new CouchbaseDocumentLoader( + couchbaseClient, + "Select r.* from `travel-sample`.`inventory`.`route` as r limit 10", + ["airline", "sourceairport"] + ); + const doc = await loader.load(); + console.log(doc); + expect(doc.length).toBeGreaterThan(0); +}); diff --git a/langchain/src/document_loaders/web/couchbase.ts b/langchain/src/document_loaders/web/couchbase.ts index 4c90cee5be0..805e5fdac01 100644 --- a/langchain/src/document_loaders/web/couchbase.ts +++ b/langchain/src/document_loaders/web/couchbase.ts @@ -2,6 +2,9 @@ import { Cluster, QueryResult } from "couchbase"; import { Document } from "@langchain/core/documents"; import { BaseDocumentLoader, DocumentLoader } from "../base.js"; +/** + * loader for couchbase document + */ export class CouchbaseDocumentLoader extends BaseDocumentLoader implements DocumentLoader @@ -14,6 +17,13 @@ export class CouchbaseDocumentLoader private metadataFields?: string[]; + /** + * construct Couchbase document loader with a requirement for couchbase cluster client + * @param client { Cluster } [couchbase connected client to connect to database] + * @param query { string } [query to get results from while loading the data] + * @param pageContentFields { Array } [filters fields of the document and shows these only] + * @param metadataFields { Array } [metadata fields required] + */ constructor( client: Cluster, query: string, @@ -30,6 +40,9 @@ export class CouchbaseDocumentLoader this.metadataFields = metadataFields; } + /** + * Function to load document based on query from couchbase + */ async load(): Promise { const documents: Document[] = []; for await (const doc of this.lazyLoad()) { @@ -38,6 +51,9 @@ export class CouchbaseDocumentLoader return documents; } + /** + * Helper function to load each document + */ async *lazyLoad(): AsyncIterable { // Run SQL++ Query const result: QueryResult = await this.cluster.query(this.query); diff --git a/langchain/src/load/import_constants.ts b/langchain/src/load/import_constants.ts index e2a54f81c42..2216953a83d 100644 --- a/langchain/src/load/import_constants.ts +++ b/langchain/src/load/import_constants.ts @@ -99,6 +99,7 @@ export const optionalImportEntrypoints = [ "langchain/document_loaders/web/s3", "langchain/document_loaders/web/sonix_audio", "langchain/document_loaders/web/confluence", + "langchain/document_loadrs/web/couchbase", "langchain/document_loaders/web/youtube", "langchain/document_loaders/fs/directory", "langchain/document_loaders/fs/buffer", diff --git a/langchain/src/load/import_type.d.ts b/langchain/src/load/import_type.d.ts index d9295b9e5f8..0fbaf3a75ee 100644 --- a/langchain/src/load/import_type.d.ts +++ b/langchain/src/load/import_type.d.ts @@ -295,6 +295,9 @@ export interface OptionalImportMap { "langchain/document_loaders/web/confluence"?: | typeof import("../document_loaders/web/confluence.js") | Promise; + "langchain/document_loaders/web/couchbase"?: + | typeof import("../document_loaders/web/couchbase.js") + | Promise; "langchain/document_loaders/web/youtube"?: | typeof import("../document_loaders/web/youtube.js") | Promise; From a3b5262fd5475a3ed04a4075b52d0fad2cace611 Mon Sep 17 00:00:00 2001 From: Lokesh Goel Date: Sun, 11 Feb 2024 11:10:14 +0530 Subject: [PATCH 4/8] updated tests --- .../web_loaders/couchbase.mdx | 2 ++ .../tests/couchbase.int.test.ts | 36 ++++++++++++++----- .../document_loaders/tests/couchbase.test.ts | 12 ------- 3 files changed, 30 insertions(+), 20 deletions(-) delete mode 100644 langchain/src/document_loaders/tests/couchbase.test.ts diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx b/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx index acf0ed077ad..c3fbe5be44f 100644 --- a/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx +++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx @@ -25,6 +25,7 @@ For help with querying for documents using SQL++ (SQL for JSON), please check th ```typescript import { CouchbaseDocumentLoader } from "langchain/document_loaders/web/couchbase"; +import { Cluster } from "couchbase" const connectionString = "couchbase://localhost"; // valid couchbase connection string const dbUsername = "Administrator" // valid database user with read access to the bucket being queried @@ -44,6 +45,7 @@ const query = ` const couchbaseClient = await Cluster.connect(connectionString, { username: dbUsername, password: dbPassword, + configProfile: "wanDevelopment" }); ``` diff --git a/langchain/src/document_loaders/tests/couchbase.int.test.ts b/langchain/src/document_loaders/tests/couchbase.int.test.ts index 573e54a5a5f..ea4f147cdec 100644 --- a/langchain/src/document_loaders/tests/couchbase.int.test.ts +++ b/langchain/src/document_loaders/tests/couchbase.int.test.ts @@ -3,16 +3,36 @@ import { Cluster } from "couchbase"; import { CouchbaseDocumentLoader } from "../web/couchbase.js"; test("Test Couchbase Cluster connection ", async () => { - const couchbaseClient = await Cluster.connect("couchbase://localhost", { - username: "Administrator", - password: "password", + const connectionString = ""; + const databaseUsername = ""; + const databasePassword = ""; + const query = ` + SELECT h.* FROM \`travel-sample\`.inventory.hotel h + WHERE h.country = 'United States' + LIMIT 10 + `; + const validPageContentFields = ["country", "name", "description"]; + const validMetadataFields = ["id"] + + const couchbaseClient = await Cluster.connect(connectionString, { + username: databaseUsername, + password: databasePassword, + configProfile: "wanDevelopment" }); const loader = new CouchbaseDocumentLoader( couchbaseClient, - "Select r.* from `travel-sample`.`inventory`.`route` as r limit 10", - ["airline", "sourceairport"] + query, + validPageContentFields, + validMetadataFields ); - const doc = await loader.load(); - console.log(doc); - expect(doc.length).toBeGreaterThan(0); + const docs = await loader.load(); + console.log(docs); + expect(docs.length).toBeGreaterThan(0); + + for (const doc of docs) { + console.log(doc); + expect(doc.pageContent).not.toBe(""); // Assuming valid page content fields + expect(doc.metadata).toHaveProperty('id'); // Assuming metadata has id field + expect(doc.metadata.id).not.toBe(""); + } }); diff --git a/langchain/src/document_loaders/tests/couchbase.test.ts b/langchain/src/document_loaders/tests/couchbase.test.ts deleted file mode 100644 index 7a45ec90e2b..00000000000 --- a/langchain/src/document_loaders/tests/couchbase.test.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { Cluster } from "couchbase"; -import { CouchbaseDocumentLoader } from "../web/couchbase.js"; - - -test("Test Couchbase Cluster connection ", async ()=>{ - const couchbaseClient = await Cluster.connect(""); - const loader = new CouchbaseDocumentLoader(couchbaseClient, ""); - const doc = await loader.load(); - console.log(doc); - expect(doc.length).toBeGreaterThan(0); -}) \ No newline at end of file From 4cb8591c01a44032504c1bb291c133a7818f99a5 Mon Sep 17 00:00:00 2001 From: Lokesh Goel Date: Sun, 11 Feb 2024 11:20:42 +0530 Subject: [PATCH 5/8] update types as per new requirement --- langchain/.gitignore | 4 ++++ langchain/langchain.config.js | 4 ++-- langchain/package.json | 7 ++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/langchain/.gitignore b/langchain/.gitignore index fe3c2a67a82..3bf13f3a9c3 100644 --- a/langchain/.gitignore +++ b/langchain/.gitignore @@ -602,6 +602,10 @@ document_loaders/web/confluence.cjs document_loaders/web/confluence.js document_loaders/web/confluence.d.ts document_loaders/web/confluence.d.cts +document_loaders/web/couchbase.cjs +document_loaders/web/couchbase.js +document_loaders/web/couchbase.d.ts +document_loaders/web/couchbase.d.cts document_loaders/web/searchapi.cjs document_loaders/web/searchapi.js document_loaders/web/searchapi.d.ts diff --git a/langchain/langchain.config.js b/langchain/langchain.config.js index bb29a1c25c1..6f8d0df5067 100644 --- a/langchain/langchain.config.js +++ b/langchain/langchain.config.js @@ -182,7 +182,6 @@ export const config = { "document_loaders/web/azure_blob_storage_file": "document_loaders/web/azure_blob_storage_file", "document_loaders/web/cheerio": "document_loaders/web/cheerio", - "document_loaders/web/couchbase": "document_loaders/web/couchbase", "document_loaders/web/puppeteer": "document_loaders/web/puppeteer", "document_loaders/web/playwright": "document_loaders/web/playwright", "document_loaders/web/college_confidential": @@ -200,6 +199,7 @@ export const config = { "document_loaders/web/sitemap": "document_loaders/web/sitemap", "document_loaders/web/sonix_audio": "document_loaders/web/sonix_audio", "document_loaders/web/confluence": "document_loaders/web/confluence", + "document_loaders/web/couchbase": "document_loaders/web/couchbase", "document_loaders/web/searchapi": "document_loaders/web/searchapi", "document_loaders/web/serpapi": "document_loaders/web/serpapi", "document_loaders/web/sort_xyz_blockchain": @@ -628,7 +628,6 @@ export const config = { "document_loaders/web/azure_blob_storage_container", "document_loaders/web/azure_blob_storage_file", "document_loaders/web/cheerio", - "document_loaders/web/couchbase", "document_loaders/web/puppeteer", "document_loaders/web/playwright", "document_loaders/web/college_confidential", @@ -645,6 +644,7 @@ export const config = { "document_loaders/web/sitemap", "document_loaders/web/sonix_audio", "document_loaders/web/confluence", + "document_loaders/web/couchbase", "document_loaders/web/youtube", "document_loaders/fs/directory", "document_loaders/fs/buffer", diff --git a/langchain/package.json b/langchain/package.json index a5a63fbd9f6..002c7113f0b 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -617,6 +617,7 @@ "document_loaders/web/couchbase.cjs", "document_loaders/web/couchbase.js", "document_loaders/web/couchbase.d.ts", + "document_loaders/web/couchbase.d.cts", "document_loaders/web/searchapi.cjs", "document_loaders/web/searchapi.js", "document_loaders/web/searchapi.d.ts", @@ -2908,7 +2909,11 @@ "require": "./document_loaders/web/confluence.cjs" }, "./document_loaders/web/couchbase": { - "types": "./document_loaders/web/couchbase.d.ts", + "types": { + "import": "./document_loaders/web/couchbase.d.ts", + "require": "./document_loaders/web/couchbase.d.cts", + "default": "./document_loaders/web/couchbase.d.ts" + }, "import": "./document_loaders/web/couchbase.js", "require": "./document_loaders/web/couchbase.cjs" }, From 6852c2289ed7e7f0321848ce9780a57252ffee3c Mon Sep 17 00:00:00 2001 From: Lokesh Goel Date: Sun, 11 Feb 2024 11:47:08 +0530 Subject: [PATCH 6/8] update comments for typedoc --- langchain/src/document_loaders/web/couchbase.ts | 12 +++++++----- langchain/src/load/import_constants.ts | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/langchain/src/document_loaders/web/couchbase.ts b/langchain/src/document_loaders/web/couchbase.ts index 805e5fdac01..a18ccc4c1bb 100644 --- a/langchain/src/document_loaders/web/couchbase.ts +++ b/langchain/src/document_loaders/web/couchbase.ts @@ -19,10 +19,10 @@ export class CouchbaseDocumentLoader /** * construct Couchbase document loader with a requirement for couchbase cluster client - * @param client { Cluster } [couchbase connected client to connect to database] - * @param query { string } [query to get results from while loading the data] - * @param pageContentFields { Array } [filters fields of the document and shows these only] - * @param metadataFields { Array } [metadata fields required] + * @param client { Cluster } [ couchbase connected client to connect to database ] + * @param query { string } [ query to get results from while loading the data ] + * @param pageContentFields { Array } [ filters fields of the document and shows these only ] + * @param metadataFields { Array } [ metadata fields required ] */ constructor( client: Cluster, @@ -42,6 +42,7 @@ export class CouchbaseDocumentLoader /** * Function to load document based on query from couchbase + * @returns {Promise} [ Returns a promise of all the documents as array ] */ async load(): Promise { const documents: Document[] = []; @@ -52,7 +53,8 @@ export class CouchbaseDocumentLoader } /** - * Helper function to load each document + * Function to load documents based on iterator rather than full load + * @returns {AsyncIterable} [ Returns an iterator to fetch documents ] */ async *lazyLoad(): AsyncIterable { // Run SQL++ Query diff --git a/langchain/src/load/import_constants.ts b/langchain/src/load/import_constants.ts index 26d81dfbb12..81b09d272a0 100644 --- a/langchain/src/load/import_constants.ts +++ b/langchain/src/load/import_constants.ts @@ -100,7 +100,7 @@ export const optionalImportEntrypoints: string[] = [ "langchain/document_loaders/web/sitemap", "langchain/document_loaders/web/sonix_audio", "langchain/document_loaders/web/confluence", - "langchain/document_loadrs/web/couchbase", + "langchain/document_loaders/web/couchbase", "langchain/document_loaders/web/youtube", "langchain/document_loaders/fs/directory", "langchain/document_loaders/fs/buffer", From 54fb39e118c12024fe12a8411d46ae939d118d04 Mon Sep 17 00:00:00 2001 From: Lokesh Goel Date: Tue, 13 Feb 2024 15:22:35 +0530 Subject: [PATCH 7/8] fix formatting issues and remove print in tests --- .../document_loaders/web_loaders/couchbase.mdx | 12 ++++++------ .../tests/couchbase.int.test.ts | 18 ++++++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx b/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx index c3fbe5be44f..9cad48ed03a 100644 --- a/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx +++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx @@ -33,20 +33,20 @@ const dbPassword = "Password" // password for the database user // query is a valid SQL++ query const query = ` - SELECT h.* FROM `travel-sample`.inventory.hotel h - WHERE h.country = 'United States' - LIMIT 1 - `; + SELECT h.* FROM \`travel-sample\`.inventory.hotel h + WHERE h.country = 'United States' + LIMIT 1 +`; ``` ### Connect to Couchbase Cluster ```typescript - const couchbaseClient = await Cluster.connect(connectionString, { +const couchbaseClient = await Cluster.connect(connectionString, { username: dbUsername, password: dbPassword, configProfile: "wanDevelopment" - }); +}); ``` diff --git a/langchain/src/document_loaders/tests/couchbase.int.test.ts b/langchain/src/document_loaders/tests/couchbase.int.test.ts index ea4f147cdec..7147955df9a 100644 --- a/langchain/src/document_loaders/tests/couchbase.int.test.ts +++ b/langchain/src/document_loaders/tests/couchbase.int.test.ts @@ -7,17 +7,17 @@ test("Test Couchbase Cluster connection ", async () => { const databaseUsername = ""; const databasePassword = ""; const query = ` - SELECT h.* FROM \`travel-sample\`.inventory.hotel h - WHERE h.country = 'United States' - LIMIT 10 - `; + SELECT h.* FROM \`travel-sample\`.inventory.hotel h + WHERE h.country = 'United States' + LIMIT 10 + `; const validPageContentFields = ["country", "name", "description"]; - const validMetadataFields = ["id"] - + const validMetadataFields = ["id"]; + const couchbaseClient = await Cluster.connect(connectionString, { username: databaseUsername, password: databasePassword, - configProfile: "wanDevelopment" + configProfile: "wanDevelopment", }); const loader = new CouchbaseDocumentLoader( couchbaseClient, @@ -26,13 +26,11 @@ test("Test Couchbase Cluster connection ", async () => { validMetadataFields ); const docs = await loader.load(); - console.log(docs); expect(docs.length).toBeGreaterThan(0); for (const doc of docs) { - console.log(doc); expect(doc.pageContent).not.toBe(""); // Assuming valid page content fields - expect(doc.metadata).toHaveProperty('id'); // Assuming metadata has id field + expect(doc.metadata).toHaveProperty("id"); // Assuming metadata has id field expect(doc.metadata.id).not.toBe(""); } }); From 9c71b504fabd96fdb33950fb6c6f85177904a28c Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 15 Feb 2024 16:46:22 -0800 Subject: [PATCH 8/8] Format --- .../web_loaders/couchbase.mdx | 66 +++++++++---------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx b/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx index 9cad48ed03a..950a01fa354 100644 --- a/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx +++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/couchbase.mdx @@ -15,21 +15,21 @@ This guide shows how to use load documents from couchbase database. npm install couchbase ``` - ## Usage ### Querying for Documents from Couchbase + For more details on connecting to a Couchbase cluster, please check the [Node.js SDK documentation](https://docs.couchbase.com/nodejs-sdk/current/howtos/managing-connections.html#connection-strings). For help with querying for documents using SQL++ (SQL for JSON), please check the [documentation](https://docs.couchbase.com/server/current/n1ql/n1ql-language-reference/index.html). -```typescript +```typescript import { CouchbaseDocumentLoader } from "langchain/document_loaders/web/couchbase"; -import { Cluster } from "couchbase" +import { Cluster } from "couchbase"; -const connectionString = "couchbase://localhost"; // valid couchbase connection string -const dbUsername = "Administrator" // valid database user with read access to the bucket being queried -const dbPassword = "Password" // password for the database user +const connectionString = "couchbase://localhost"; // valid couchbase connection string +const dbUsername = "Administrator"; // valid database user with read access to the bucket being queried +const dbPassword = "Password"; // password for the database user // query is a valid SQL++ query const query = ` @@ -43,19 +43,18 @@ const query = ` ```typescript const couchbaseClient = await Cluster.connect(connectionString, { - username: dbUsername, - password: dbPassword, - configProfile: "wanDevelopment" + username: dbUsername, + password: dbPassword, + configProfile: "wanDevelopment", }); - ``` ### Create the Loader ```typescript const loader = new CouchbaseDocumentLoader( - couchbaseClient, // The connected couchbase cluster client - query, // A valid SQL++ query which will return the required data + couchbaseClient, // The connected couchbase cluster client + query // A valid SQL++ query which will return the required data ); ``` @@ -66,39 +65,40 @@ You can fetch the documents by calling the `load` method of the loader. It will ```typescript // using load method docs = await loader.load(); -console.log(docs) +console.log(docs); ``` ```typescript // using lazy_load -for await (const doc of this.lazyLoad()){ - console.log(doc); - break; // break based on required condition +for await (const doc of this.lazyLoad()) { + console.log(doc); + break; // break based on required condition } ``` ### Specifying Fields with Content and Metadata + The fields that are part of the Document content can be specified using the `pageContentFields` parameter. The metadata fields for the Document can be specified using the `metadataFields` parameter. ```typescript const loaderWithSelectedFields = new CouchbaseDocumentLoader( - couchbaseClient, - query, - // pageContentFields - [ - "address", - "name", - "city", - "phone", - "country", - "geo", - "description", - "reviews", - ], - ["id"] // metadataFields + couchbaseClient, + query, + // pageContentFields + [ + "address", + "name", + "city", + "phone", + "country", + "geo", + "description", + "reviews", + ], + ["id"] // metadataFields ); -const filtered_docs = await loaderWithSelectedFields.load() -console.log(filtered_docs) -``` \ No newline at end of file +const filtered_docs = await loaderWithSelectedFields.load(); +console.log(filtered_docs); +```