## Documentation

To read more about the common options in Elasticsearch, checkout the docs [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math).

![common_options_docs](../images/common_options_docs.png)

## Connect to ElasticSearch

In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

es = Elasticsearch('http://localhost:9200')
client_info = es.info()
print('Connected to Elasticsearch!')
pprint(client_info.body)

Connected to Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'iNEgsrfzSs-A5IWMvnKk8w',
 'name': '5af1aab6c380',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2024-08-05T10:05:34.233336849Z',
             'build_flavor': 'default',
             'build_hash': '1a77947f34deddb41af25e6f0ddb8e830159c179',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.11.1',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.15.0'}}


## 1. Human-readable output

In [2]:
response = es.cluster.stats(human=True)
pprint(response["nodes"]["jvm"])

{'max_uptime': '7.5h',
 'max_uptime_in_millis': 27044242,
 'mem': {'heap_max': '3.8gb',
         'heap_max_in_bytes': 4110417920,
         'heap_used': '1.7gb',
         'heap_used_in_bytes': 1880053496},
 'threads': 149,
 'versions': [{'bundled_jdk': True,
               'count': 1,
               'using_bundled_jdk': True,
               'version': '22.0.1',
               'vm_name': 'OpenJDK 64-Bit Server VM',
               'vm_vendor': 'Oracle Corporation',
               'vm_version': '22.0.1+8-16'}]}


In [3]:
response = es.cluster.stats(human=False)
pprint(response["nodes"]["jvm"])

{'max_uptime_in_millis': 27066645,
 'mem': {'heap_max_in_bytes': 4110417920, 'heap_used_in_bytes': 1884247800},
 'threads': 148,
 'versions': [{'bundled_jdk': True,
               'count': 1,
               'using_bundled_jdk': True,
               'version': '22.0.1',
               'vm_name': 'OpenJDK 64-Bit Server VM',
               'vm_vendor': 'Oracle Corporation',
               'vm_version': '22.0.1+8-16'}]}


## 2. Date math

In [5]:
es.indices.delete(index='my_index', ignore_unavailable=True)
es.indices.create(index='my_index')

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [6]:
import json

operations = []
index_name = 'my_index'
dummy_data = json.load(open("../data/dummy_data.json"))
for document in dummy_data:
    operations.append({'index': {'_index': index_name}})
    operations.append(document)

es.bulk(operations=operations)

ObjectApiResponse({'errors': False, 'took': 30299362, 'items': [{'index': {'_index': 'my_index', '_id': 'hEkUNJUB6odEtf1MkYPJ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'my_index', '_id': 'hUkUNJUB6odEtf1MkYPJ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'my_index', '_id': 'hkkUNJUB6odEtf1MkYPJ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1, 'status': 201}}]})

In [7]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "range": {
                "created_on": {
                    "gte": "2024-09-22||+1d/d",  # 2024-09-23
                    "lte": "now/d"  # today
                }
            }
        }
    }
)
hits = response['hits']['hits']
print(f"Found {len(hits)} documents")

Found 2 documents


In [8]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "range": {
                "created_on": {
                    "gte": "2024-09-22||+1M/d",  # 2024-09-22 + 1 month
                    "lte": "now/d"
                }
            }
        }
    }
)
hits = response['hits']['hits']
print(f"Found {len(hits)} documents")

Found 0 documents


## 3. Response filtering

### 3.1 Inclusive filtering

In [9]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "match_all": {}
        }
    },
)
pprint(response.body)

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 1, 'total': 1},
 'hits': {'hits': [{'_id': 'hEkUNJUB6odEtf1MkYPJ',
                    '_index': 'my_index',
                    '_score': 1.0,
                    '_source': {'created_on': '2024-09-22',
                                'text': 'This is the first sample document '
                                        'text.',
                                'title': 'Sample Title 1'}},
                   {'_id': 'hUkUNJUB6odEtf1MkYPJ',
                    '_index': 'my_index',
                    '_score': 1.0,
                    '_source': {'created_on': '2024-09-24',
                                'text': 'Here is another example of a '
                                        'document.',
                                'title': 'Sample Title 2'}},
                   {'_id': 'hkkUNJUB6odEtf1MkYPJ',
                    '_index': 'my_index',
                    '_score': 1.0,
                    '_source': {'created_on': '2024-0

In [10]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "match_all": {}
        }
    },
    filter_path="hits.hits._id,hits.hits._source"  # Keep only _id and _source fields
)
pprint(response.body)

{'hits': {'hits': [{'_id': 'hEkUNJUB6odEtf1MkYPJ',
                    '_source': {'created_on': '2024-09-22',
                                'text': 'This is the first sample document '
                                        'text.',
                                'title': 'Sample Title 1'}},
                   {'_id': 'hUkUNJUB6odEtf1MkYPJ',
                    '_source': {'created_on': '2024-09-24',
                                'text': 'Here is another example of a '
                                        'document.',
                                'title': 'Sample Title 2'}},
                   {'_id': 'hkkUNJUB6odEtf1MkYPJ',
                    '_source': {'created_on': '2024-09-24',
                                'text': 'The content of the third document '
                                        'goes here.',
                                'title': 'Sample Title 3'}}]}}


### 3.2 Exclusive filtering

In [None]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "match_all": {}
        }
    },
    filter_path="-_shards,-timed_out,-took"  # Remove those fields from the response, only hits will be returned
)
pprint(response.body)

{'hits': {'hits': [{'_id': 'hEkUNJUB6odEtf1MkYPJ',
                    '_index': 'my_index',
                    '_score': 1.0,
                    '_source': {'created_on': '2024-09-22',
                                'text': 'This is the first sample document '
                                        'text.',
                                'title': 'Sample Title 1'}},
                   {'_id': 'hUkUNJUB6odEtf1MkYPJ',
                    '_index': 'my_index',
                    '_score': 1.0,
                    '_source': {'created_on': '2024-09-24',
                                'text': 'Here is another example of a '
                                        'document.',
                                'title': 'Sample Title 2'}},
                   {'_id': 'hkkUNJUB6odEtf1MkYPJ',
                    '_index': 'my_index',
                    '_score': 1.0,
                    '_source': {'created_on': '2024-09-24',
                                'text': 'The content of the thi

### 3.3. Combined filtering

In [18]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "match_all": {}
        }
    },
    filter_path="hits.hits._id,-hits.hits._score"
)
pprint(response.body)

{'hits': {'hits': [{'_id': 'hEkUNJUB6odEtf1MkYPJ'},
                   {'_id': 'hUkUNJUB6odEtf1MkYPJ'},
                   {'_id': 'hkkUNJUB6odEtf1MkYPJ'}]}}


## 4. Flat settings

In [19]:
response = es.indices.get_settings(
    index=index_name,
)
pprint(response.body)

{'my_index': {'settings': {'index': {'creation_date': '1740335513268',
                                     'number_of_replicas': '1',
                                     'number_of_shards': '1',
                                     'provided_name': 'my_index',
                                     'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}},
                                     'uuid': 'z1hyDhAnRf6xt8NDTP3TbQ',
                                     'version': {'created': '8512000'}}}}}


In [20]:
response = es.indices.get_settings(
    index=index_name,
    flat_settings=True,
)
pprint(response.body)

{'my_index': {'settings': {'index.creation_date': '1740335513268',
                           'index.number_of_replicas': '1',
                           'index.number_of_shards': '1',
                           'index.provided_name': 'my_index',
                           'index.routing.allocation.include._tier_preference': 'data_content',
                           'index.uuid': 'z1hyDhAnRf6xt8NDTP3TbQ',
                           'index.version.created': '8512000'}}}
