# 03 — Multi-Modal Embeddings

`voyage-multimodal-3.5` embeds **text and images into the same vector space**, enabling:
- Text query → find matching images
- Image query → find similar images

All search is done via MongoDB's `$vectorSearch` aggregation stage.

In [None]:
import { MongoClient } from 'mongodb';

// ← Paste your VoyageAI API key here (get one at https://dash.voyageai.com)
const VOYAGE_API_KEY = 'pa-...';
const MM_MODEL       = 'voyage-multimodal-3.5';
const DIMS           = 1024;
const INDEX_NAME     = 'multimodal_vector_index';

const client = new MongoClient(process.env.MONGODB_URI!);
await client.connect();
const db  = client.db('voyage_lab');
const col = db.collection<{ _id: string; [key: string]: unknown }>('listings');

console.log('Connected. Model:', MM_MODEL);

In [None]:
type ContentItem =
  | { type: 'text';      text: string }
  | { type: 'image_url'; url: string  };

async function embedMultimodal(
  inputs: ContentItem[][],
  inputType: 'document' | 'query' = 'document',
): Promise<number[][]> {
  const res = await fetch('https://api.voyageai.com/v1/multimodalembeddings', {
    method: 'POST',
    headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${VOYAGE_API_KEY}` },
    body: JSON.stringify({ inputs, model: MM_MODEL, input_type: inputType }),
  });
  if (!res.ok) throw new Error(await res.text());
  const json = await res.json() as { data: { embedding: number[] }[] };
  return json.data.map(d => d.embedding);
}

console.log('Helper defined.');

## Embed listing images and store in MongoDB

Each listing has a `picture_url`. We embed that URL directly — VoyageAI fetches and encodes the image.

In [None]:
// ── Embed cover images for all listings ──────────────────────────────────────
const listings = await col
  .find({ 'images.picture_url': { $ne: '' } }, { projection: { _id: 1, name: 1, images: 1, description: 1 } })
  .toArray();

console.log(`Embedding images for ${listings.length} listings...`);

const BATCH = 10;  // multimodal requests are heavier
let done = 0;

for (let i = 0; i < listings.length; i += BATCH) {
  const batch  = listings.slice(i, i + BATCH);
  const inputs = batch.map(l => [{ type: 'image_url' as const, url: (l.images as any).picture_url as string }]);
  const vecs   = await embedMultimodal(inputs, 'document');
  for (let j = 0; j < batch.length; j++) {
    await col.updateOne({ _id: batch[j]._id }, { $set: { embedding_mm: vecs[j] } });
  }
  done += batch.length;
  console.log(`Stored ${done}/${listings.length}`);
}
console.log('Image embeddings stored.');

## Create a Vector Search index on `embedding_mm`

In [None]:
try {
  await col.dropSearchIndex(INDEX_NAME);
  await new Promise(r => setTimeout(r, 2000));
} catch { /* didn't exist */ }

await col.createSearchIndex({
  name: INDEX_NAME,
  type: 'vectorSearch',
  definition: {
    fields: [
      { type: 'vector', path: 'embedding_mm', numDimensions: DIMS, similarity: 'cosine' },
      { type: 'filter', path: 'property_type' },
    ],
  },
});

console.log('Waiting for index...');
for (let i = 0; i < 30; i++) {
  await new Promise(r => setTimeout(r, 2000));
  const [idx] = await col.listSearchIndexes(INDEX_NAME).toArray();
  console.log(' status:', idx?.status);
  if (idx?.status === 'READY') break;
}

## Text → image cross-modal search


In [None]:
const textQuery = 'bright open space with natural light and minimalist decor';
const [qVec]    = await embedMultimodal([[{ type: 'text', text: textQuery }]], 'query');

const results = await col.aggregate([
  {
    $vectorSearch: {
      index:         INDEX_NAME,
      path:          'embedding_mm',
      queryVector:   qVec,
      numCandidates: 50,
      limit:         5,
    },
  },
  {
    $project: {
      name:          1,
      property_type: 1,
      picture_url:   '$images.picture_url',
      score:         { $meta: 'vectorSearchScore' },
    },
  },
]).toArray();

console.log(`Cross-modal results for: "${textQuery}"\n`);
console.table(results.map(r => ({ name: r.name, type: r.property_type, score: (r.score as number).toFixed(4) })));

## Image → image search

In [None]:
const anchor = listings[0];  // use the first listing as the query image
const anchorUrl = (anchor.images as any).picture_url as string;

const [imgQueryVec] = await embedMultimodal([[{ type: 'image_url', url: anchorUrl }]], 'query');

const similar = await col.aggregate([
  {
    $vectorSearch: {
      index:         INDEX_NAME,
      path:          'embedding_mm',
      queryVector:   imgQueryVec,
      numCandidates: 50,
      limit:         6,  // first result will be the query image itself
    },
  },
  {
    $project: {
      name:  1,
      score: { $meta: 'vectorSearchScore' },
    },
  },
]).toArray();

console.log(`Visually similar to: "${anchor.name}"\n`);
similar.forEach((r, i) => console.log(`  ${i === 0 ? '→ (query)' : `  ${i}.    `} [${(r.score as number).toFixed(4)}] ${r.name}`));

In [None]:
// ── Cleanup ───────────────────────────────────────────────────────────────────
await client.close();
console.log('Done.');