## Documentation

To read more about the common options in Elasticsearch, checkout the docs [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math).

![common_options_docs](../images/common_options_docs.png)

## Connect to ElasticSearch

In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

HOST = "http://localhost:9200"

es = Elasticsearch(hosts=HOST)
client_info = es.info()
print("Connected tp Elasticsearch!")
pprint(client_info.body)

Connected tp Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'iugjHCt8SwCWRVd35xnJ0A',
 'name': '5013781c82bc',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-02-05T22:10:57.067596412Z',
             'build_flavor': 'default',
             'build_hash': '747663ddda3421467150de0e4301e8d4bc636b0c',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.12.0',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.17.2'}}


## 1. Human-readable output

In [3]:
response = es.cluster.stats(human=True)
pprint(response.body['nodes']["jvm"])

{'max_uptime': '2h',
 'max_uptime_in_millis': 7426653,
 'mem': {'heap_max': '3gb',
         'heap_max_in_bytes': 3221225472,
         'heap_used': '109.8mb',
         'heap_used_in_bytes': 115235328},
 'threads': 91,
 'versions': [{'bundled_jdk': True,
               'count': 1,
               'using_bundled_jdk': True,
               'version': '23',
               'vm_name': 'OpenJDK 64-Bit Server VM',
               'vm_vendor': 'Oracle Corporation',
               'vm_version': '23+37-2369'}]}


In [4]:
response = es.cluster.stats(human=False)
pprint(response.body['nodes']["jvm"])

{'max_uptime_in_millis': 7501564,
 'mem': {'heap_max_in_bytes': 3221225472, 'heap_used_in_bytes': 119429632},
 'threads': 91,
 'versions': [{'bundled_jdk': True,
               'count': 1,
               'using_bundled_jdk': True,
               'version': '23',
               'vm_name': 'OpenJDK 64-Bit Server VM',
               'vm_vendor': 'Oracle Corporation',
               'vm_version': '23+37-2369'}]}


## 2. Date math

In [5]:
settings = {
    "index": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    }
}

index_name = "my_index"

es.indices.delete(index=index_name, ignore_unavailable=True)
es.indices.create(index=index_name, settings=settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [6]:
import json

operations = []
dummy_data = json.load(open("../data/dummy_data.json"))
for document in dummy_data:
    operations.append({'index': {'_index': index_name}})
    operations.append(document)

es.bulk(operations=operations)

ObjectApiResponse({'errors': False, 'took': 0, 'items': [{'index': {'_index': 'my_index', '_id': '_PpwPJUBq90DZqGOFGYI', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'my_index', '_id': '_fpwPJUBq90DZqGOFGYI', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'my_index', '_id': '_vpwPJUBq90DZqGOFGYI', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1, 'status': 201}}]})

In [9]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "range": {
                "created_on": {
                    "gte": "2024-09-22||+1d/d", # 2024-09-23
                    "lte": "now/d" # 2025-02-25
                }
            }
        }
    }
)

hits = response["hits"]["hits"]
print(f"Found {len(hits)} documents")

Found 2 documents


In [10]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "range": {
                "created_on": {
                    "gte": "2024-09-22||+1M/d", # 2024-09-22 + 1 Month
                    "lte": "now/d" # 2025-02-25
                }
            }
        }
    }
)

hits = response["hits"]["hits"]
print(f"Found {len(hits)} documents")

Found 0 documents


## 3. Response filtering

### 3.1 Inclusive filtering

In [12]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "match_all": {}
        }
    }
)

pprint(response.body)

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 1, 'total': 1},
 'hits': {'hits': [{'_id': '_PpwPJUBq90DZqGOFGYI',
                    '_index': 'my_index',
                    '_score': 1.0,
                    '_source': {'created_on': '2024-09-22',
                                'text': 'This is the first sample document '
                                        'text.',
                                'title': 'Sample Title 1'}},
                   {'_id': '_fpwPJUBq90DZqGOFGYI',
                    '_index': 'my_index',
                    '_score': 1.0,
                    '_source': {'created_on': '2024-09-24',
                                'text': 'Here is another example of a '
                                        'document.',
                                'title': 'Sample Title 2'}},
                   {'_id': '_vpwPJUBq90DZqGOFGYI',
                    '_index': 'my_index',
                    '_score': 1.0,
                    '_source': {'created_on': '2024-0

In [13]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "match_all": {}
        }
    },
    filter_path="hits.hits._id,hits.hits._source" # Keep only _id and source fields
)

pprint(response.body)

{'hits': {'hits': [{'_id': '_PpwPJUBq90DZqGOFGYI',
                    '_source': {'created_on': '2024-09-22',
                                'text': 'This is the first sample document '
                                        'text.',
                                'title': 'Sample Title 1'}},
                   {'_id': '_fpwPJUBq90DZqGOFGYI',
                    '_source': {'created_on': '2024-09-24',
                                'text': 'Here is another example of a '
                                        'document.',
                                'title': 'Sample Title 2'}},
                   {'_id': '_vpwPJUBq90DZqGOFGYI',
                    '_source': {'created_on': '2024-09-24',
                                'text': 'The content of the third document '
                                        'goes here.',
                                'title': 'Sample Title 3'}}]}}


### 3.2 Exclusive filtering

In [14]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "match_all": {}
        }
    },
    filter_path="-hits"  # Remove the hits key
)
pprint(response.body)

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 1, 'total': 1},
 'timed_out': False,
 'took': 3}


### 3.3. Combined filtering

In [15]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "match_all": {}
        }
    },
    filter_path="hits.hits._id,-hits.hits._score"
)
pprint(response.body)

{'hits': {'hits': [{'_id': '_PpwPJUBq90DZqGOFGYI'},
                   {'_id': '_fpwPJUBq90DZqGOFGYI'},
                   {'_id': '_vpwPJUBq90DZqGOFGYI'}]}}


## 4. Flat settings

In [16]:
response = es.indices.get_settings(
    index=index_name,
    flat_settings=True
)

pprint(response.body)

{'my_index': {'settings': {'index.creation_date': '1740475728860',
                           'index.number_of_replicas': '0',
                           'index.number_of_shards': '1',
                           'index.provided_name': 'my_index',
                           'index.routing.allocation.include._tier_preference': 'data_content',
                           'index.uuid': 'Br3wGAAWSoOFh-ZmPujA1w',
                           'index.version.created': '8521000'}}}
