Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,57 @@ static RegexSearchOperator regex(final Iterable<? extends SearchPath> paths, fin
.append("query", queryIterator.hasNext() ? queries : firstQuery));
}

/**
* Returns a {@link SearchOperator} that performs vector search within the {@code $search} pipeline stage.
* This is the approximate (ANN) variant with {@code numCandidates}.
*
* @param path The indexed vector field to search.
Comment thread
rozza marked this conversation as resolved.
* @param queryVector The query vector. The number of dimensions must match the index field.
* @param limit The number of results to return.
* @param numCandidates The number of nearest neighbors to consider during ANN search.
* Must be greater than or equal to {@code limit}. The server may impose an upper bound.
* @return The requested {@link VectorSearchOperator}.
* @mongodb.atlas.manual atlas-search/vector-search/ vectorSearch operator
* @since 5.8
*/
static VectorSearchOperator vectorSearch(
final FieldSearchPath path,
final Iterable<Double> queryVector,
final int limit,
final int numCandidates) {
notNull("path", path);
notNull("queryVector", queryVector);
isTrueArgument("numCandidates must be >= limit", numCandidates >= limit);
return new VectorSearchOperatorConstructibleBsonElement("vectorSearch",
new Document("path", path.toValue())
.append("queryVector", queryVector)
.append("limit", limit)
.append("numCandidates", numCandidates));
}

/**
* Returns a {@link SearchOperator} that performs exact (ENN) vector search within the {@code $search} pipeline stage.
*
* @param path The indexed vector field to search.
* @param queryVector The query vector. The number of dimensions must match the index field.
* @param limit The number of results to return.
* @return The requested {@link VectorSearchOperator}.
* @mongodb.atlas.manual atlas-search/vector-search/ vectorSearch operator
* @since 5.8
*/
static VectorSearchOperator vectorSearchExact(
final FieldSearchPath path,
final Iterable<Double> queryVector,
final int limit) {
notNull("path", path);
notNull("queryVector", queryVector);
return new VectorSearchOperatorConstructibleBsonElement("vectorSearch",
new Document("path", path.toValue())
.append("queryVector", queryVector)
.append("limit", limit)
.append("exact", true));
}

/**
* Creates a {@link SearchOperator} from a {@link Bson} in situations when there is no builder method that better satisfies your needs.
* This method cannot be used to validate the syntax.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright 2008-present MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mongodb.client.model.search;

import com.mongodb.annotations.Beta;
import com.mongodb.annotations.Reason;
import com.mongodb.annotations.Sealed;

/**
* A {@link SearchOperator} that performs vector search within the {@code $search} pipeline stage.
*
* @mongodb.atlas.manual atlas-search/operators-and-collectors/#operators Search operators
* @since 5.8
*/
@Sealed
@Beta(Reason.CLIENT)
public interface VectorSearchOperator extends SearchOperator {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what about knnVector ? From the doc I understood that index could be created with knnVector field

{
  "mappings": {
    "dynamic": false,
    "fields": {
      "embeddings": {
        "type": "knnVector",

Should we add a fluent index builder that can set type to knnVector ?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The index work was covered in #1960 - this is for creating the query eg:

{
  $search: {
    "index": "<index name>", // optional, defaults to "default"
    "vectorSearch": {
      "exact": true | false,
      "path": "<field-to-search>",
      "queryVector": [<array-of-numbers>],
      "filter": {<filter-specification>},
      "limit": <number-of-results>,
      "numCandidates": <number-of-candidates>,
      "score": {<options>}
    }
  }
}

Which can be done via:

  SearchOperator.vectorSearch(
          fieldPath("embedding"),
          asList(1.0, 2.0),
          10,
          50
  ).filter(SearchOperator.text(fieldPath("title"), "hello"))
          .score(boost(2f))

I added VectorSearchOperator to allow for overrides that vector search can do but aren't general for all SearchOperator instances. VectorSearchOperatorConstructibleBsonElement is the actual implementation for creating the bson.

This follows the existing SearchOperator conventions and builds upon them for vector search.


/**
* Creates a new {@link VectorSearchOperator} with the filter specified.
*
* @param filter A search operator to filter documents.
* @return A new {@link VectorSearchOperator}.
*/
VectorSearchOperator filter(SearchOperator filter);

/**
* Creates a new {@link VectorSearchOperator} with the scoring modifier specified.
*
* @param modifier The scoring modifier.
* @return A new {@link VectorSearchOperator}.
*/
@Override
VectorSearchOperator score(SearchScore modifier);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright 2008-present MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mongodb.client.model.search;

import com.mongodb.internal.client.model.AbstractConstructibleBsonElement;
import org.bson.conversions.Bson;

import static com.mongodb.assertions.Assertions.notNull;

final class VectorSearchOperatorConstructibleBsonElement
extends AbstractConstructibleBsonElement<VectorSearchOperatorConstructibleBsonElement>
implements VectorSearchOperator {

VectorSearchOperatorConstructibleBsonElement(final String name, final Bson value) {
super(name, value);
}

private VectorSearchOperatorConstructibleBsonElement(final Bson baseElement, final Bson appendedElementValue) {
super(baseElement, appendedElementValue);
}

@Override
protected VectorSearchOperatorConstructibleBsonElement newSelf(final Bson baseElement, final Bson appendedElementValue) {
return new VectorSearchOperatorConstructibleBsonElement(baseElement, appendedElementValue);
}

@Override
public VectorSearchOperator filter(final SearchOperator filter) {
return newWithAppendedValue("filter", notNull("filter", filter));
}

@Override
public VectorSearchOperatorConstructibleBsonElement score(final SearchScore modifier) {
return newWithAppendedValue("score", notNull("modifier", modifier));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package com.mongodb.client.model.search;

import com.mongodb.MongoClientSettings;
import com.mongodb.client.model.Aggregates;
import com.mongodb.client.model.geojson.Point;
import com.mongodb.client.model.geojson.Position;
import org.bson.BsonArray;
Expand Down Expand Up @@ -1002,6 +1003,129 @@ void regex() {
);
}

@Test
void vectorSearch() {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one of the acceptance criteria from the doc is to

Implement all necessary testing (unit, integration, e2e) and metrics.
we don't have e2e tests , is it because feature is not available in atlas ?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't have e2e testing yet at a drivers level, I'm expecting this work flow should be convered by a Drivers rather than the doc.

I'll make sure to add a DRIVERS ticket to ensure its documented in the Specs repo and / or unified tests are added.

assertAll(
() -> assertThrows(IllegalArgumentException.class, () ->
// path must not be null
SearchOperator.vectorSearch(null, asList(1.0), 10, 50)
),
() -> assertThrows(IllegalArgumentException.class, () ->
// queryVector must not be null
SearchOperator.vectorSearch(fieldPath("embedding"), null, 10, 50)
),
() -> assertThrows(IllegalArgumentException.class, () ->
// numCandidates must be >= limit
SearchOperator.vectorSearch(fieldPath("embedding"), asList(1.0), 100, 50)
),
() -> assertEquals(
new BsonDocument("vectorSearch",
new BsonDocument("path", new BsonString("embedding"))
.append("queryVector", new BsonArray(asList(
new BsonDouble(1.0), new BsonDouble(2.0), new BsonDouble(3.0))))
.append("limit", new BsonInt32(10))
.append("numCandidates", new BsonInt32(100))),
SearchOperator.vectorSearch(
fieldPath("embedding"),
asList(1.0, 2.0, 3.0),
10,
100
).toBsonDocument()
),
() -> assertEquals(
new BsonDocument("vectorSearch",
new BsonDocument("path", new BsonString("embedding"))
.append("queryVector", new BsonArray(asList(
new BsonDouble(1.0), new BsonDouble(2.0))))
.append("limit", new BsonInt32(10))
.append("numCandidates", new BsonInt32(50))
.append("filter", new BsonDocument("text",
new BsonDocument("query", new BsonString("hello"))
.append("path", new BsonString("title"))))
.append("score", new BsonDocument("boost",
new BsonDocument("value", new BsonDouble(2.0))))),
SearchOperator.vectorSearch(
fieldPath("embedding"),
asList(1.0, 2.0),
10,
50
).filter(SearchOperator.text(fieldPath("title"), "hello"))
.score(boost(2f))
.toBsonDocument()
)
);
}

@Test
void vectorSearchExact() {
assertAll(
() -> assertThrows(IllegalArgumentException.class, () ->
// path must not be null
SearchOperator.vectorSearchExact(null, asList(1.0), 10)
),
() -> assertThrows(IllegalArgumentException.class, () ->
// queryVector must not be null
SearchOperator.vectorSearchExact(fieldPath("embedding"), null, 10)
),
() -> assertEquals(
new BsonDocument("vectorSearch",
new BsonDocument("path", new BsonString("embedding"))
.append("queryVector", new BsonArray(asList(
new BsonDouble(1.0), new BsonDouble(2.0), new BsonDouble(3.0))))
.append("limit", new BsonInt32(5))
.append("exact", BsonBoolean.TRUE)),
SearchOperator.vectorSearchExact(
fieldPath("embedding"),
asList(1.0, 2.0, 3.0),
5
).toBsonDocument()
),
() -> assertEquals(
new BsonDocument("vectorSearch",
new BsonDocument("path", new BsonString("embedding"))
.append("queryVector", new BsonArray(asList(
new BsonDouble(1.0), new BsonDouble(2.0))))
.append("limit", new BsonInt32(10))
.append("exact", BsonBoolean.TRUE)
.append("filter", new BsonDocument("text",
new BsonDocument("query", new BsonString("hello"))
.append("path", new BsonString("title"))))
.append("score", new BsonDocument("boost",
new BsonDocument("value", new BsonDouble(2.0))))),
SearchOperator.vectorSearchExact(
fieldPath("embedding"),
asList(1.0, 2.0),
10
).filter(SearchOperator.text(fieldPath("title"), "hello"))
.score(boost(2f))
.toBsonDocument()
)
);
}

@Test
void vectorSearchInsideSearchStage() {
assertEquals(
new BsonDocument("$search",
new BsonDocument("index", new BsonString("myIndex"))
.append("vectorSearch",
new BsonDocument("path", new BsonString("embedding"))
.append("queryVector", new BsonArray(asList(
new BsonDouble(1.0), new BsonDouble(2.0), new BsonDouble(3.0))))
.append("limit", new BsonInt32(10))
.append("numCandidates", new BsonInt32(100)))),
Aggregates.search(
SearchOperator.vectorSearch(
fieldPath("embedding"),
asList(1.0, 2.0, 3.0),
10,
100
),
SearchOptions.searchOptions().index("myIndex")
).toBsonDocument()
);
}

private static SearchOperator docExamplePredefined() {
return SearchOperator.exists(
fieldPath("fieldName"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,44 @@ object SearchOperator {
def regex(paths: Iterable[_ <: SearchPath], queries: Iterable[String]): RegexSearchOperator =
JSearchOperator.regex(paths.asJava, queries.asJava)

/**
* Returns a `SearchOperator` that performs vector search within the `\$search` pipeline stage.
* This is the approximate (ANN) variant with `numCandidates`.
*
* @param path The indexed vector field to search.
* @param queryVector The query vector. The number of dimensions must match the index field.
* @param limit The number of results to return.
* @param numCandidates The number of nearest neighbors to consider during ANN search.
Comment thread
rozza marked this conversation as resolved.
* Must be greater than or equal to `limit`. The server may impose an upper bound.
* @return The requested `VectorSearchOperator`.
* @see [[https://www.mongodb.com/docs/atlas/atlas-search/vector-search/ vectorSearch operator]]
* @since 5.8
*/
def vectorSearch(
path: FieldSearchPath,
queryVector: Iterable[Double],
limit: Int,
numCandidates: Int
): VectorSearchOperator =
JSearchOperator.vectorSearch(path, queryVector.map(Double.box).asJava, limit, numCandidates)

/**
* Returns a `SearchOperator` that performs exact (ENN) vector search within the `\$search` pipeline stage.
*
* @param path The indexed vector field to search.
* @param queryVector The query vector. The number of dimensions must match the index field.
* @param limit The number of results to return.
* @return The requested `VectorSearchOperator`.
* @see [[https://www.mongodb.com/docs/atlas/atlas-search/vector-search/ vectorSearch operator]]
* @since 5.8
*/
def vectorSearchExact(
path: FieldSearchPath,
queryVector: Iterable[Double],
limit: Int
): VectorSearchOperator =
JSearchOperator.vectorSearchExact(path, queryVector.map(Double.box).asJava, limit)

/**
* Creates a `SearchOperator` from a `Bson` in situations when there is no builder method that better satisfies your needs.
* This method cannot be used to validate the syntax.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,16 @@ package object search {
@Beta(Array(Reason.CLIENT))
type QueryStringSearchOperator = com.mongodb.client.model.search.QueryStringSearchOperator

/**
* A `SearchOperator` that performs vector search within the `\$search` pipeline stage.
*
* @see `SearchOperator.vectorSearch`
* @since 5.8
*/
@Sealed
@Beta(Array(Reason.CLIENT))
type VectorSearchOperator = com.mongodb.client.model.search.VectorSearchOperator

/**
* Fuzzy search options that may be used with some [[SearchOperator]]s.
*
Expand Down
Loading