Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Improved ChromaDB docs and tests #766

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
32 changes: 30 additions & 2 deletions docs/modules/chromadb.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,37 @@
npm install @testcontainers/chromadb --save-dev
```

## Example
## Resources

* [GitHub](https://github.com/chroma-core/chroma)
* [Node.js Client](https://www.npmjs.com/package/chromadb)
* [Docs](https://docs.trychroma.com)
* [Discord](https://discord.gg/MMeYNTmh3x)
* [Cookbook](https://cookbook.chromadb.dev)

## Examples

<!--codeinclude-->
[Connect to Chroma:](../../packages/modules/chromadb/src/chromadb-container.test.ts)
inside_block:simpleConnect
<!--/codeinclude-->

<!--codeinclude-->
[Create Collection:](../../packages/modules/chromadb/src/chromadb-container.test.ts)
inside_block:createCollection
<!--/codeinclude-->

<!--codeinclude-->
[Query Collection with Embedding Function:](../../packages/modules/chromadb/src/chromadb-container.test.ts)
inside_block:queryCollectionWithEmbeddingFunction
<!--/codeinclude-->

<!--codeinclude-->
[Work with persistent directory:](../../packages/modules/chromadb/src/chromadb-container.test.ts)
inside_block:persistentData
<!--/codeinclude-->

<!--codeinclude-->
[](../../packages/modules/chromadb/src/chromadb-container.test.ts) inside_block:docs
[Work with authentication:](../../packages/modules/chromadb/src/chromadb-container.test.ts) inside_block:auth
<!--/codeinclude-->

2 changes: 1 addition & 1 deletion packages/modules/chromadb/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"build": "tsc --project tsconfig.build.json"
},
"devDependencies": {
"chromadb": "^1.8.1"
"chromadb": "^1.9.1"
},
"dependencies": {
"testcontainers": "^10.9.0"
Expand Down
170 changes: 121 additions & 49 deletions packages/modules/chromadb/src/chromadb-container.test.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,133 @@
import { ChromaClient, AdminClient } from "chromadb";
import { ChromaDBContainer } from "./chromadb-container";

import { ChromaClient, AdminClient, OllamaEmbeddingFunction } from "chromadb";
import { ChromaDBContainer, StartedChromaDBContainer } from "./chromadb-container";
import * as path from "node:path";
import { GenericContainer } from "testcontainers";
import * as os from "node:os";
import * as fs from "node:fs";
// run tests with NODE_OPTIONS=--experimental-vm-modules jest packages/modules/chromadb/src/chromadb-container.test.ts
describe("ChromaDB", () => {
jest.setTimeout(180_000);
jest.setTimeout(240_000);

// startContainer {
it("should connect", async () => {
const container = await new ChromaDBContainer().start();
const client = await connectTo(container);
expect(await client.heartbeat()).toBeDefined();
// Do something with the client
await container.stop();
});
// }

// simpleConnect {
async function connectTo(container: StartedChromaDBContainer) {
const client = new ChromaClient({
path: container.getHttpUrl(),
});
const hb = await client.heartbeat();
expect(hb).toBeDefined();
return client;
}
// }

// docs {
it("should connect and return a query result", async () => {
// createCollection {
it("should create collection and get data", async () => {
const container = await new ChromaDBContainer().start();
const client = await connectTo(container);
const collection = await client.createCollection({ name: "test", metadata: { "hnsw:space": "cosine" } });
expect(collection.name).toBe("test");
expect(collection.metadata).toBeDefined();
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
expect(collection.metadata["hnsw:space"]).toBe("cosine");
await collection.add({ ids: ["1"], embeddings: [[1, 2, 3]], documents: ["my doc"], metadatas: [{ key: "value" }] });
const getResults = await collection.get({ ids: ["1"] });
expect(getResults.ids[0]).toBe("1");
expect(getResults.documents[0]).toStrictEqual("my doc");
expect(getResults.metadatas).toBeDefined();
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
expect(getResults.metadatas[0].key).toStrictEqual("value");
await container.stop();
});
// }

// queryCollectionWithEmbeddingFunction {
it("should create collection and query", async () => {
const container = await new ChromaDBContainer().start();
const ollama = await new GenericContainer("ollama/ollama").withExposedPorts(11434).start();
await ollama.exec(["ollama", "pull", "nomic-embed-text"]);
const client = await connectTo(container);
const embedder = new OllamaEmbeddingFunction({
url: `http://${ollama.getHost()}:${ollama.getMappedPort(11434)}/api/embeddings`,
model: "nomic-embed-text",
});
const collection = await client.createCollection({
name: "test",
metadata: { "hnsw:space": "cosine" },
embeddingFunction: embedder,
});
expect(collection.name).toBe("test");
await collection.add({
ids: ["1", "2"],
documents: [
"This is a document about dogs. Dogs are awesome.",
"This is a document about cats. Cats are awesome.",
],
});
const results = await collection.query({ queryTexts: ["Tell me about dogs"], nResults: 1 });
expect(results).toBeDefined();
expect(results.ids[0]).toEqual(["1"]);
expect(results.ids[0][0]).toBe("1");
await container.stop();
});

// persistentData {
it("should reconnect with volume and persistence data", async () => {
const sourcePath = fs.mkdtempSync(path.join(os.tmpdir(), "chroma-temp"));
const container = await new ChromaDBContainer()
.withBindMounts([{ source: sourcePath, target: "/chroma/chroma" }])
.start();
const client = await connectTo(container);
const collection = await client.createCollection({ name: "test", metadata: { "hnsw:space": "cosine" } });
expect(collection.name).toBe("test");
expect(collection.metadata).toBeDefined();
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
expect(collection.metadata["hnsw:space"]).toBe("cosine");
await collection.add({ ids: ["1"], embeddings: [[1, 2, 3]], documents: ["my doc"] });
const getResults = await collection.get({ ids: ["1"] });
expect(getResults.ids[0]).toBe("1");
expect(getResults.documents[0]).toStrictEqual("my doc");
await container.stop();
expect(fs.existsSync(`${sourcePath}/chroma.sqlite3`)).toBe(true);
try {
fs.rmSync(sourcePath, { force: true, recursive: true });
} catch (e) {
//Ignore clean up, when have no access on fs.
console.log(e);
}
});
// }

// auth {
it("should use auth", async () => {
const tenant = "test-tenant";
const key = "test-key";
const database = "test-db";
const container = await new ChromaDBContainer()
.withEnvironment({
CHROMA_SERVER_AUTHN_CREDENTIALS: key,
CHROMA_SERVER_AUTHN_PROVIDER: "chromadb.auth.token_authn.TokenAuthenticationServerProvider",
CHROMA_AUTH_TOKEN_TRANSPORT_HEADER: "X_CHROMA_TOKEN",
})
.start();

const adminClient = new AdminClient({
tenant: tenant,
auth: {
provider: "token",
credentials: key,
providerOptions: {
headerType: "X_CHROMA_TOKEN",
},
tokenHeaderType: "X_CHROMA_TOKEN",
},
path: container.getHttpUrl(),
});
Expand All @@ -30,52 +140,14 @@ describe("ChromaDB", () => {
auth: {
provider: "token",
credentials: key,
providerOptions: {
headerType: "X_CHROMA_TOKEN",
},
tokenHeaderType: "X_CHROMA_TOKEN",
},
path: container.getHttpUrl(),
database,
});

const collection = await dbClient.createCollection({ name: "test-collection" });

await collection.add({
ids: ["1", "2", "3"],
documents: ["apple", "oranges", "pineapple"],
embeddings: [
[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
],
});

const result = await collection.get({ ids: ["1", "2", "3"] });

expect(result).toMatchInlineSnapshot(`
{
"data": null,
"documents": [
"apple",
"oranges",
"pineapple",
],
"embeddings": null,
"ids": [
"1",
"2",
"3",
],
"metadatas": [
null,
null,
null,
],
"uris": null,
}
`);

await container.stop();
expect(collection.name).toBe("test-collection");
});
// }
});
2 changes: 1 addition & 1 deletion packages/modules/chromadb/src/chromadb-container.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { AbstractStartedContainer, GenericContainer, StartedTestContainer, Wait
const CHROMADB_PORT = 8000;

export class ChromaDBContainer extends GenericContainer {
constructor(image = "chromadb/chroma:0.4.22") {
constructor(image = "chromadb/chroma:0.5.0") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC, this could bring a breaking change with the env var for auth and also for those relying on version 0.4.22.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @eddumelendez, indeed Auth is a breaking change in 0.5.0

super(image);
this.withExposedPorts(CHROMADB_PORT)
.withWaitStrategy(Wait.forHttp("/api/v1/heartbeat", CHROMADB_PORT))
Expand Down