Skip to content

Commit

Permalink
Vector query and cosine similarity
Browse files Browse the repository at this point in the history
1. Dense vector

PUT dindex
{
  "mappings": {
    "_doc": {
      "properties": {
        "my_vector": {
          "type": "dense_vector"
        },
        "my_text" : {
          "type" : "keyword"
        }
      }
    }
  }
}

PUT dinex/_doc/1
{
  "my_text" : "text1",
  "my_vector" : [ 0.5, 10, 6 ]
}

PUT dindex/_doc/2
{
  "my_text" : "text2",
  "my_vector" : [ 0.5, 10, 10]
}

GET dindex/_search
{
  "query" : {
        "vector" : {
            "field" : "my_vector",
            "query_vector": [ 0.5, 10, 10]
        }
    }
}

Result:
....
"hits": [
    {
        "_index": "dindex",
        "_type": "_doc",
        "_id": "2",
        "_score": 1.0000001,
        "_source": {
            "my_text": "text1",
            "my_vector": [
                0.5,
                10,
                10
            ]
        }
    },
    {
        "_index": "dindex",
        "_type": "_doc",
        "_id": "1",
        "_score": 0.97016037,
        "_source": {
            "my_text": "text1",
            "my_vector": [
                0.5,
                10,
                6
            ]
        }
    }
]

2. Sparse vector

PUT sindex
{
  "mappings": {
    "_doc": {
      "properties": {
        "my_vector": {
          "type": "sparse_vector"
        },
        "my_text" : {
          "type" : "keyword"
        }
      }
    }
  }
}

PUT sindex/_doc/1
{
  "my_text" : "text1",
  "my_vector" : {"1": 0.5, "99": -0.5,  "5": 1}
}

PUT sindex/_doc/2
{
  "my_text" : "text2",
  "my_vector" : {"103": 0.5, "4": -0.5,  "5": 1}
}

GET sindex/_search
{
  "query" : {
        "vector" : {
            "field" : "my_vector",
            "query_vector": {"99": -0.5,  "1": 0.5,  "5": 1}
        }
    }
}

Result:
"hits": [
    {
        "_index": "sindex",
        "_type": "_doc",
        "_id": "1",
        "_score": 0.99999994,
        "_source": {
            "my_text": "text1",
            "my_vector": {
                "1": 0.5,
                "99": -0.5,
                "5": 1
            }
        }
    },
    {
        "_index": "sindex",
        "_type": "_doc",
        "_id": "2",
        "_score": 0.6666666,
        "_source": {
            "my_text": "text2",
            "my_vector": {
                "103": 0.5,
                "4": -0.5,
                "5": 1
            }
        }
    }
]

Search with filter:

GET sindex/_search
{
  "query": {
    "bool": {
      "must" : {
        "match": {
          "my_text": "text2"
        }
      },
      "should" : {
        "vector" : {
            "field" : "my_vector",
            "query_vector": {"99": -0.5,  "1": 0.5,  "5": 1}
        }
      }
    }
  }
}

Result:
"hits": [
    {
        "_index": "sindex",
        "_type": "_doc",
        "_id": "2",
        "_score": 0.6931472,
        "_source": {
            "my_text": "text2",
            "my_vector": {
                "103": 0.5,
                "4": -0.5,
                "5": 1
            }
        }
    }
]

3. Implementation details

3.1 Dense Vector
- BinaryDocValuesField
- byte array ->
    - integer (number of dimensions)
    - array of integers (encoded array of float values)

3.2 Sparse Vector
- BinaryDocValuesField
- byte array ->
    - integer (number of dimenstions)
    - array of integers (encoded array of float values)
    - array of integers (array of integer dimensions)

Relates to elastic#31615
  • Loading branch information
mayya-sharipova committed Aug 21, 2018
1 parent 2acb191 commit ddfb50e
Showing 1 changed file with 0 additions and 1 deletion.
Expand Up @@ -54,7 +54,6 @@ public final class VectorQueryBuilder extends AbstractQueryBuilder<VectorQueryBu
public static final String NAME = "vector";
private static final ParseField FIELD_FIELD = new ParseField("field");
private static final ParseField QUERY_VECTOR_FIELD = new ParseField("query_vector");
private static final ParseField FILTER_FIELD = new ParseField("filter");

private static final ObjectParser<VectorQueryBuilder, Void> PARSER = new ObjectParser<>(NAME, true, VectorQueryBuilder::new);
static {
Expand Down

1 comment on commit ddfb50e

@LiuGangR
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checkout this branch origin/vector-fied-query. And build new zip in 'distribution/archives/zip/build/distributions'. But this branch also print err
{"error":{"root_cause":[{"type":"parsing_exception","reason":"no [query] registered for [vector]","line":4,"col":20}],"type":"parsing_exception","reason":"no [query] registered for [vector]","line":4,"col":20},"status":400}

Please sign in to comment.