In [5]:
from elasticsearch import Elasticsearch
from elasticsearch import helpers

from FlagEmbedding import FlagModel
import json

In [22]:
# load bge language model to embed the text in chunks
model = FlagModel('BAAI/bge-small-zh-v1.5', use_fp16 = True)

In [2]:
# instantiate Python client for Elastic search
client = Elasticsearch("http://elasticsearch:9200")

In [3]:
client.indices.exists(index = "text_embeddings_index")

HeadApiResponse(True)

In [14]:
resp = client.search(
    index = "text_embeddings_index",
    query = {
        "bool": {
            "must": [
                {
                    "match": {
                        "chunk": "gavroche",
                    }
                },
                {
                    "match": {
                        "chunk": "javert",
                    }
                }
            ]
        }
    },
    source = ["chunk"]
)

In [21]:
resp["hits"]["hits"]

[{'_index': 'text_embeddings_index',
  '_id': 'NyXj0pMBJCf2OsYvxsYT',
  '_score': 7.365013,
  '_source': {'chunk': 'When the corpse passed near Javert, who was still impassive, Enjolras said to the spy:— “It will be your turn presently!” During all this time, Little Gavroche, who alone had not quitted his post, but had remained on guard, thought he espied some men stealthily approaching the barricade.'}},
 {'_index': 'text_embeddings_index',
  '_id': 'USXi0pMBJCf2OsYvjMRL',
  '_score': 7.2688136,
  '_source': {'chunk': 'The search ended, they lifted Javert to his feet, bound his arms behind his back, and fastened him to that celebrated post in the middle of the room which had formerly given the wine-shop its name. Gavroche, who had looked on at the whole of this scene and had approved of everything with a silent toss of his head, stepped up to Javert and said to him:— “It’s the mouse who has caught the cat.”'}},
 {'_index': 'text_embeddings_index',
  '_id': 'UCXi0pMBJCf2OsYvjMRL',
  '_

In [16]:
# print(json.dumps(resp, indent = 3))

In [20]:
resp.body

{'took': 6,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 4, 'relation': 'eq'},
  'max_score': 7.365013,
  'hits': [{'_index': 'text_embeddings_index',
    '_id': 'NyXj0pMBJCf2OsYvxsYT',
    '_score': 7.365013,
    '_source': {'chunk': 'When the corpse passed near Javert, who was still impassive, Enjolras said to the spy:— “It will be your turn presently!” During all this time, Little Gavroche, who alone had not quitted his post, but had remained on guard, thought he espied some men stealthily approaching the barricade.'}},
   {'_index': 'text_embeddings_index',
    '_id': 'USXi0pMBJCf2OsYvjMRL',
    '_score': 7.2688136,
    '_source': {'chunk': 'The search ended, they lifted Javert to his feet, bound his arms behind his back, and fastened him to that celebrated post in the middle of the room which had formerly given the wine-shop its name. Gavroche, who had looked on at the whole of this scene and had approved o

In [53]:
queries = ["who is valjean?"]

In [54]:
q_embeddings = model.encode_queries(queries)

In [55]:
# q_embeddings.tolist()[0]

In [58]:
resp_knn = client.search(
    index = "text_embeddings_index",
    # size = 3,
    query = {
        "knn": {
            "field": "embedding_vector",
            "query_vector": q_embeddings.tolist()[0],
            "k": 10,
            "num_candidates": 100,
        }
    },
)

In [59]:
for hit in resp_knn["hits"]["hits"]:
    print(hit["_score"], hit["_source"]["chunk"])
    print()

0.8136349 “Where?” “Into the vault.” “What vault?”

0.80684566 more! more! Empty the vase!

0.80605316 Did those Thénardiers keep her clean? How have they fed her? Oh!

0.8045044 “Into the vault.” “What vault?” “Under the altar.”

0.8040962 With whom? She did not know. Whither?

0.8023195 “Well! What is it? What is the matter, Javert?”

0.8011675 What mattered it to them? They were a whirlwind. Their valor was something indescribable.

0.80007267 I am tracked! By whom? By myself.

0.79997444 But then, what? In whom can we trust? _

0.79955006 more! Empty the vase! tip the urn!



In [60]:
resp_hybrid_knn = client.search(
    index = "text_embeddings_index",
    # size = 3,
    query = {
        "bool": 
        {
            "should": 
            [
                {
                    "match": {
                        "chunk": {
                            "query": "valjean",
                            "boost": 1
                        }
                    }
                },
                {
                    "knn": {
                        "field": "embedding_vector",
                        "query_vector": q_embeddings.tolist()[0],
                        "k": 10,
                        "boost": 2
                    }
                }
            ]
        }
    },
)

In [61]:
for hit in resp_hybrid_knn["hits"]["hits"]:
    print(hit["_score"], hit["_source"]["chunk"])
    print()

4.933772 You are Jean Valjean!’ ‘Jean Valjean! who’s Jean Valjean?’

4.6556225 ‘Jean Valjean! who’s Jean Valjean?’ Champmathieu feigns astonishment.

4.5801044 CHAPTER VI—JEAN VALJEAN Towards the middle of the night Jean Valjean woke. Jean Valjean came from a poor peasant family of Brie.

4.549079 Javert followed Jean Valjean. They reached No. 7. Jean Valjean knocked.

4.533677 Having exhausted these considerations, he passed on to Jean Valjean himself. Who was this Jean Valjean? Description of Jean Valjean: a monster spewed forth, etc.

4.414382 “What is your name?” said Jean Valjean. “Little Gervais, sir.” “Go away,” said Jean Valjean.

4.414382 Who could that stranger be? Could it be Jean Valjean? But Jean Valjean was dead.

4.414382 “Yes,” said Jean Valjean, “I am content!” “Well, then, laugh.” Jean Valjean began to laugh.

4.414382 “They are no longer here,” replied Jean Valjean. “This is too much!” Jean Valjean stammered:

4.414382 Jean Valjean said to him. Marius could find no w