# Connect to ElasticSearch 

In [2]:
from pprint import pprint 
from elasticsearch import Elasticsearch

es = Elasticsearch(
    "https://localhost:9200", 
    basic_auth=("elastic", "6AqhOxi*CPXYvCZl7Iln"), 
    verify_certs=False)
client_info = es.info() 
print("Connected to Elasticsearch!")
pprint(client_info.body)

  _transport = transport_class(


Connected to Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'mIJwhjTmStW54eKFEwQnMA',
 'name': 'f12c85f397e4',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2026-01-29T10:05:46.708397977Z',
             'build_flavor': 'default',
             'build_hash': '17b451d8979a29e31935fe1eb901310350b30e62',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '10.3.2',
             'minimum_index_compatibility_version': '8.0.0',
             'minimum_wire_compatibility_version': '8.19.0',
             'number': '9.3.0'}}


# Index data 

In [3]:
import json 
from pprint import pprint 
es.indices.delete(index='my_index', ignore_unavailable=True)
es.indices.create(index='my_index')

operations = []
clothes_documents = json.load(open('./data/clothes.json'))

for document in clothes_documents: 
    operations.append({'index': {'_index': 'my_index'}})
    operations.append(document)

response = es.bulk(operations=operations)
pprint(response.body)



{'errors': False,
 'items': [{'index': {'_id': 'ykM5cJwBtWFRoivzME-C',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 0,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 2},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': 'y0M5cJwBtWFRoivzME-D',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 1,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 2},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': 'zEM5cJwBtWFRoivzME-D',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 2,
                      '_shards': {'failed': 0, 'successful': 1, '

In [4]:
count = es.count(index='my_index')
print('Number of documents in index: ', count.body['count'])

Number of documents in index:  100




In [6]:
response = es.search(
    index="my_index",
    body={
        "query": {
            "bool": {
                "filter": [
                    {
                        "term": {
                            "brand": "adidas"
                        }
                    }
                ]
            }
        },
        "size": 100
    },
)

hits = response.body['hits']['hits']
print(f"Found {len(hits)} documents")

Found 23 documents




In [7]:

response = es.search(
    index="my_index",
    body={
        "query": {
            "bool": {
                "filter": [
                    {
                        "term": {
                            "color": "yellow"
                        }
                    },
                    {
                        "term": {
                            "brand": "adidas"
                        }
                    }
                ]
            }
        },
    },
)

hits = response.body['hits']['hits']
print(f"Found {len(hits)} documents")

Found 6 documents




# Post Filters

In [8]:
response = es.search(
    index="my_index",
    body={
        "query": {
            "bool": {
                "filter": {
                    "term": {
                        "brand": "gucci"
                    }
                }
            }
        },
        "aggs": {
            "colors": {
                "terms": {
                    "field": "color.keyword"
                }
            },
            "color_red": {
                "filter": {
                    "term": {
                        "color.keyword": "red"
                    }
                },
                "aggs": {
                    "models": {
                        "terms": {
                            "field": "model.keyword"
                        }
                    }
                }
            }
        },
        "post_filter": {
            "term": {
                "color": "red"
            }
        },
        "size": 20
    }
)
pprint(response.body)



{'_shards': {'failed': 0, 'skipped': 0, 'successful': 1, 'total': 1},
 'aggregations': {'color_red': {'doc_count': 12,
                                'models': {'buckets': [{'doc_count': 3,
                                                        'key': 'model_1'},
                                                       {'doc_count': 1,
                                                        'key': 'model_14'},
                                                       {'doc_count': 1,
                                                        'key': 'model_16'},
                                                       {'doc_count': 1,
                                                        'key': 'model_2'},
                                                       {'doc_count': 1,
                                                        'key': 'model_26'},
                                                       {'doc_count': 1,
                                                        'key': 'model_2

In [9]:

colors_aggregation = response.body['aggregations']['colors']['buckets']
pprint(colors_aggregation)

[{'doc_count': 12, 'key': 'red'},
 {'doc_count': 8, 'key': 'blue'},
 {'doc_count': 6, 'key': 'green'},
 {'doc_count': 4, 'key': 'yellow'}]


In [10]:

color_red_aggregation = response.body['aggregations']['color_red']['models']['buckets']
pprint(color_red_aggregation)

[{'doc_count': 3, 'key': 'model_1'},
 {'doc_count': 1, 'key': 'model_14'},
 {'doc_count': 1, 'key': 'model_16'},
 {'doc_count': 1, 'key': 'model_2'},
 {'doc_count': 1, 'key': 'model_26'},
 {'doc_count': 1, 'key': 'model_28'},
 {'doc_count': 1, 'key': 'model_3'},
 {'doc_count': 1, 'key': 'model_4'},
 {'doc_count': 1, 'key': 'model_6'},
 {'doc_count': 1, 'key': 'model_8'}]


In [11]:

hits = response.body['hits']['hits']
for hit in hits:
    print(f"""Shirt brand: {hit['_source']['brand']}, color: {
          hit['_source']['color']}, and model: {hit['_source']['model']}""")

Shirt brand: gucci, color: red, and model: model_1
Shirt brand: gucci, color: red, and model: model_3
Shirt brand: gucci, color: red, and model: model_1
Shirt brand: gucci, color: red, and model: model_4
Shirt brand: gucci, color: red, and model: model_1
Shirt brand: gucci, color: red, and model: model_2
Shirt brand: gucci, color: red, and model: model_28
Shirt brand: gucci, color: red, and model: model_6
Shirt brand: gucci, color: red, and model: model_14
Shirt brand: gucci, color: red, and model: model_26
Shirt brand: gucci, color: red, and model: model_8
Shirt brand: gucci, color: red, and model: model_16
