Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions bin/commands/init.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,23 +63,26 @@ export async function init(configPath) {
type: 'select',
name: 'provider',
message: 'Select an Embedding Provider:',
choices: ['openai', 'deepseek', 'ollama'],
choices: ['voyage','openai', 'deepseek', 'ollama'],
helpMessage: "Available embedding providers:\n" +
"- Voyage: Best MongoDB compatible, requires API key\n" +
"- OpenAI: Most popular, requires API key\n" +
"- DeepSeek: Alternative provider, requires API key\n" +
"- Ollama: Local deployment, no API key needed"
});

// Provider-specific configuration
if (responses.provider === 'openai' || responses.provider === 'deepseek') {
if (responses.provider === 'openai' || responses.provider === 'deepseek' || responses.provider === 'voyage') {
responses.apiKey = await promptWithValidation({
type: 'password',
name: 'apiKey',
message: `Enter your ${responses.provider === 'openai' ? 'OpenAI' : 'DeepSeek'} API Key:`,
message: `Enter your ${responses.provider === 'openai' ? 'OpenAI' : responses.provider === 'voyage' ? 'Voyage' : 'DeepSeek'} API Key:`,
validate: (input) => input && input.length > 0 ? true : 'API key is required',
helpMessage: responses.provider === 'openai'
? "OpenAI API key format: sk-....\n- Get your key from: https://platform.openai.com/api-keys"
: "DeepSeek API key format: dk-....\n- Get your key from DeepSeek's platform"
: (responses.provider === 'voyage'
? "VoyageAI API key format: pa-....\n- Get your key from VoyageAI's platform"
: "DeepSeek API key format: dk-....\n- Get your key from DeepSeek's platform")
});
} else if (responses.provider === 'ollama') {
const availableModels = getOllamaModels();
Expand All @@ -104,7 +107,7 @@ export async function init(configPath) {
provider: responses.provider,
...(responses.apiKey && { apiKey: responses.apiKey }),
...(responses.model && { model: responses.model }),
dimensions: 1536,
dimensions: responses.provider === 'voyage' ? 1024 : 1536,
batchSize: 100
},
search: {
Expand Down
17 changes: 12 additions & 5 deletions bin/utils/prompts.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ export async function promptForConfigEdits(currentConfig) {
type: 'select',
name: 'provider',
message: 'Select an Embedding Provider:',
choices: ['openai', 'deepseek', 'ollama'],
choices: ['voyage','openai', 'deepseek', 'ollama'],
initial: currentConfig.embedding.provider
}
]);
Expand Down Expand Up @@ -140,7 +140,7 @@ export async function promptForProviderConfig() {
type: 'select',
name: 'provider',
message: 'Select an embedding provider:',
choices: ['openai', 'ollama', 'anthropic', 'deepseek'],
choices: ['voyage','openai', 'ollama', 'anthropic', 'deepseek'],
initial: 'openai'
});

Expand All @@ -165,7 +165,8 @@ export async function promptForProviderConfig() {
modelChoices = {
'openai': ['text-embedding-3-small', 'text-embedding-3-large'],
'anthropic': ['claude-3-opus-20240229', 'claude-3-sonnet-20240229'],
'deepseek': ['deepseek-coder', 'deepseek-chat']
'deepseek': ['deepseek-coder', 'deepseek-chat'],
'voyage': ['voyage-3', 'voyage-3-large', 'voyage-3-lite', 'voyage-code-3', 'voyage-finance-2', 'voyage-law-2']
}[providerResponse.provider] || [];
defaultModel = modelChoices[0];
}
Expand Down Expand Up @@ -204,7 +205,13 @@ export async function promptForProviderConfig() {
'llama2': '4096',
'mistral': '4096',
'mixtral': '4096',
'codellama': '4096'
'codellama': '4096',
'voyage-3': '1024',
'voyage-3-large': '1024',
'voyage-3-lite': '1024',
'voyage-code-3': '1024',
'voyage-finance-2': '1024',
'voyage-law-2': '1024'
}[answers.model] || '4096';
return dims.toString();
},
Expand All @@ -227,4 +234,4 @@ export async function promptForProviderConfig() {
baseUrl: config.baseUrl,
dimensions: config.dimensions
};
}
}
7 changes: 6 additions & 1 deletion bin/utils/providers.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export async function testProvider(config) {
return await testOllamaProvider(config);
case 'openai':
case 'deepseek':
case 'voyage':
return await testApiProvider(config);
default:
throw new Error(`Unknown provider: ${config.embedding.provider}`);
Expand Down Expand Up @@ -80,6 +81,8 @@ export function getDefaultDimensions(provider) {
return 1536; // For text-embedding-3-small
case 'deepseek':
return 1024;
case 'voyage':
return 1024; // For voyage models
case 'ollama':
return 4096; // For llama2 models
default:
Expand All @@ -93,9 +96,11 @@ export function getProviderModels(provider) {
return ['text-embedding-3-small', 'text-embedding-3-large'];
case 'deepseek':
return ['deepseek-embedding'];
case 'voyage':
return ['voyage-3', 'voyage-3-large', 'voyage-3-lite', 'voyage-code-3', 'voyage-finance-2', 'voyage-law-2'];
case 'ollama':
return getOllamaModels();
default:
return [];
}
}
}
4 changes: 2 additions & 2 deletions examples/basic-usage.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ async function runExample() {
embedding: {
provider: process.env.EMBEDDING_PROVIDER || 'openai',
apiKey: process.env.EMBEDDING_API_KEY,
dimensions: 1536,
dimensions: 1024,
model: process.env.EMBEDDING_MODEL || 'text-embedding-3-small'
}
});
Expand Down Expand Up @@ -65,7 +65,7 @@ async function runExample() {

console.log('\nSearch results:');
results.forEach((result, i) => {
console.log(`\n${i + 1}. Score: ${result.score.toFixed(3)}`);
//console.log(`\n${i + 1}. Score: ${result.score.toFixed(3)}`);
console.log(`Content: ${result.content}`);
console.log(`Metadata: ${JSON.stringify(result.metadata)}`);
});
Expand Down
12 changes: 8 additions & 4 deletions mongodb-rag-docs/docs/api-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,13 @@ const rag = new MongoRAG({
- `config.database` (string, required): Default MongoDB database name.
- `config.collection` (string, required): Default MongoDB collection name.
- `config.embedding` (object, required):
- `provider` (string, required): Embedding provider (`openai` is supported).
- `apiKey` (string, required): API key for the embedding provider.
- `model` (string, optional): Model name (default: `'text-embedding-3-small'`).
- `provider` (string, required): Embedding provider (`openai`, `ollama`, or `voyage` are supported).
- `apiKey` (string, required): API key for the embedding provider (not required for `ollama`).
- `model` (string, optional): Model name. Defaults depend on provider:
- OpenAI: `'text-embedding-3-small'`
- Voyage: `'voyage-3'` (other options: `voyage-3-large`, `voyage-3-lite`, `voyage-code-3`, `voyage-finance-2`, `voyage-law-2`)
- Ollama: requires model specification
- `baseUrl` (string, optional): Base URL for Ollama API (default: `'http://localhost:11434'`).
- `batchSize` (number, optional): Batch size for embedding generation (default: `100`).
- `dimensions` (number, optional): Number of dimensions in the embedding space (default: `1536`).
- `config.search` (object, optional):
Expand Down Expand Up @@ -171,4 +175,4 @@ try {

For more detailed examples and use cases, refer to:
- [Basic Example](./examples/basic-example.md)
- [Advanced Example](./examples/advanced-example.md)
- [Advanced Example](./examples/advanced-example.md)
191 changes: 191 additions & 0 deletions mongodb-rag-docs/docs/examples/voyage-example.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
---
id: voyage-example
title: Using VoyageAI Embeddings
sidebar_position: 3
---

# Using VoyageAI Embeddings with MongoDB-RAG

This example demonstrates how to use VoyageAI's embedding models with MongoDB-RAG for vector search.

## Prerequisites

1. Install the mongodb-rag package:
```bash
npm install mongodb-rag voyageai
```

2. Get a VoyageAI API key from [VoyageAI's website](https://www.voyageai.com/).

3. Set up your environment variables:
```bash
export VOYAGE_API_KEY=your_api_key_here
```

## Basic Usage

```javascript
import MongoRAG from 'mongodb-rag';
import dotenv from 'dotenv';

// Load environment variables
dotenv.config();

async function main() {
// Initialize MongoRAG with VoyageAI provider
const rag = new MongoRAG({
mongoUrl: 'mongodb+srv://your-connection-string',
database: 'ragdb',
collection: 'documents',
embedding: {
provider: 'voyage',
apiKey: process.env.VOYAGE_API_KEY,
model: 'voyage-3' // This is the default model
}
});

// Connect to MongoDB
await rag.connect();

// Ingest some documents
await rag.ingestBatch([
{
documentId: 'doc1',
content: 'MongoDB is a document database with the scalability and flexibility that you want with the querying and indexing that you need.',
metadata: { source: 'MongoDB Website', category: 'Database' }
},
{
documentId: 'doc2',
content: 'Vector search in MongoDB allows you to search for documents based on semantic similarity using vector embeddings.',
metadata: { source: 'MongoDB Documentation', category: 'Search' }
}
]);

// Perform a search
const results = await rag.search('How does vector search work?');
console.log(results);

// Close the connection
await rag.close();
}

main().catch(console.error);
```

## Using Different VoyageAI Models

VoyageAI offers several embedding models optimized for different use cases:

```javascript
// For general purpose (default)
const rag = new MongoRAG({
// MongoDB configuration...
embedding: {
provider: 'voyage',
apiKey: process.env.VOYAGE_API_KEY,
model: 'voyage-3' // Default model
}
});

// For higher quality embeddings
const ragLarge = new MongoRAG({
// MongoDB configuration...
embedding: {
provider: 'voyage',
apiKey: process.env.VOYAGE_API_KEY,
model: 'voyage-3-large' // Higher quality, larger model
}
});

// For faster, more efficient embeddings
const ragLite = new MongoRAG({
// MongoDB configuration...
embedding: {
provider: 'voyage',
apiKey: process.env.VOYAGE_API_KEY,
model: 'voyage-3-lite' // Faster, more efficient
}
});

// For code-specific embeddings
const ragCode = new MongoRAG({
// MongoDB configuration...
embedding: {
provider: 'voyage',
apiKey: process.env.VOYAGE_API_KEY,
model: 'voyage-code-3' // Optimized for code
}
});

// For finance-specific embeddings
const ragFinance = new MongoRAG({
// MongoDB configuration...
embedding: {
provider: 'voyage',
apiKey: process.env.VOYAGE_API_KEY,
model: 'voyage-finance-2' // Optimized for finance
}
});

// For legal-specific embeddings
const ragLaw = new MongoRAG({
// MongoDB configuration...
embedding: {
provider: 'voyage',
apiKey: process.env.VOYAGE_API_KEY,
model: 'voyage-law-2' // Optimized for legal content
}
});
```

## Advanced Configuration

You can combine VoyageAI embeddings with advanced search options:

```javascript
const rag = new MongoRAG({
mongoUrl: 'mongodb+srv://your-connection-string',
database: 'ragdb',
collection: 'documents',
embedding: {
provider: 'voyage',
apiKey: process.env.VOYAGE_API_KEY,
model: 'voyage-3',
batchSize: 50 // Process 50 documents at a time
},
search: {
maxResults: 10,
minScore: 0.75,
similarityMetric: 'cosine'
}
});

// Search with metadata filtering
const results = await rag.search('vector search techniques', {
filter: { category: 'Search' }
});
```

## Error Handling

```javascript
try {
const rag = new MongoRAG({
// MongoDB configuration...
embedding: {
provider: 'voyage',
apiKey: process.env.VOYAGE_API_KEY,
model: 'voyage-3'
}
});

await rag.connect();
const results = await rag.search('vector search');
} catch (error) {
if (error.message.includes('VoyageAI API error')) {
console.error('Error with VoyageAI API:', error.message);
// Handle VoyageAI specific errors
} else {
console.error('General error:', error);
}
}
Loading
Loading