In [None]:
import os
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")
from elasticsearch import Elasticsearch

es = Elasticsearch(
    os.getenv("ELASTICSEARCH_URL"),
    basic_auth=(
        os.getenv("ELASTICSEARCH_USER"),
        os.getenv("ELASTICSEARCH_PASSWORD")
    ),
    verify_certs=False
)

INDEX_NAME = "flipkart_products"

In [None]:
def search(body, type = "search"):
    res = es.search(index=INDEX_NAME, body=body)

    if type == "search":
        for hit in res['hits']['hits']:
            # source = hit['_source']
            print(hit)
    else:
        for bucket in res['aggregations']['categories']['buckets']:
            print(bucket)

In [None]:
# general search
# body = {
#     "_source": ["product_id", "name", "embedding_text"],
#     "query": {
#         "match_all": {}
#     },
#     "size": 10
# }

# agg search
body = {
    "aggs": {
        "categories": {
            "terms": {
                "field": "category",
                "size": 1000
            }
        }
    },
    "size": 0
}

# item search
query = "Item *cricket ball* is of category hierarchy Sports & Fitness and Team Sports and Cricket and Cricket Balls "
query_vector = model.encode(query).tolist()
body = {
    "_source": {"includes": ["embedding_text"]},
    "size": 20,
    "knn": {
        "field": "embedding",
        "query_vector": query_vector,
        "k": 75,
        "num_candidates": 100
    },
    "query": {
        "bool": {
            "should": [
                {
                    "term": {
                        "category": {
                            "value": "Sports & Fitness",
                            "boost": 1.5
                        }
                    }
                },
                {
                    "term": {
                        "sub_category": {
                            "value": "Team Sports",
                            "boost": 2.0
                        }
                    }
                },
                {
                    "term": {
                        "sub_sub_category": {
                            "value": "Cricket",
                            "boost": 3.0
                        }
                    }
                },
                {
                    "term": {
                        "sub_sub_sub_category": {
                            "value": "Cricket Balls",
                            "boost": 4.0
                        }
                    }
                },
                {
                    "match": {
                        "name": {
                            "query": "cricket ball",
                            "boost": 3.0
                        }
                    }
                }
            ],
            "minimum_should_match": 0
        }
    }
}

In [None]:
search(body, type="search")