## Documentation

To read more about the common options in Elasticsearch, checkout the docs [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math).

![common_options_docs](../images/common_options_docs.png)

## Connect to ElasticSearch

In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

es = Elasticsearch('http://localhost:9200')
client_info = es.info()
print('Connected to Elasticsearch!')
pprint(client_info.body)

Connected to Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'DlYG5m9gR3upn7qgaYyAJA',
 'name': '3d37442d2591',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2024-08-05T10:05:34.233336849Z',
             'build_flavor': 'default',
             'build_hash': '1a77947f34deddb41af25e6f0ddb8e830159c179',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.11.1',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.15.0'}}


## 1. Human-readable output

In [2]:
response = es.cluster.stats(human=True)
pprint(response["nodes"]["jvm"])

{'max_uptime': '6.9m',
 'max_uptime_in_millis': 417648,
 'mem': {'heap_max': '7.6gb',
         'heap_max_in_bytes': 8183087104,
         'heap_used': '644.3mb',
         'heap_used_in_bytes': 675663848},
 'threads': 80,
 'versions': [{'bundled_jdk': True,
               'count': 1,
               'using_bundled_jdk': True,
               'version': '22.0.1',
               'vm_name': 'OpenJDK 64-Bit Server VM',
               'vm_vendor': 'Oracle Corporation',
               'vm_version': '22.0.1+8-16'}]}


In [4]:
response = es.cluster.stats(human=False)
pprint(response["nodes"]["jvm"])

{'max_uptime_in_millis': 439500,
 'mem': {'heap_max_in_bytes': 8183087104, 'heap_used_in_bytes': 684052456},
 'threads': 81,
 'versions': [{'bundled_jdk': True,
               'count': 1,
               'using_bundled_jdk': True,
               'version': '22.0.1',
               'vm_name': 'OpenJDK 64-Bit Server VM',
               'vm_vendor': 'Oracle Corporation',
               'vm_version': '22.0.1+8-16'}]}


## 2. Date math

In [7]:
import json

operations = []
index_name = 'my_index'
dummy_data = json.load(open("../data/dummy_data.json"))
for document in dummy_data:
    operations.append({'index': {'_index': index_name}})
    operations.append(document)

es.bulk(operations=operations)

ObjectApiResponse({'errors': False, 'took': 9248868, 'items': [{'index': {'_index': 'my_index', '_id': 'r_zdMZMBT6aY_-5Ln0JZ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 10, '_primary_term': 10, 'status': 201}}, {'index': {'_index': 'my_index', '_id': 'sPzdMZMBT6aY_-5Ln0Ja', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 11, '_primary_term': 10, 'status': 201}}, {'index': {'_index': 'my_index', '_id': 'sfzdMZMBT6aY_-5Ln0Ja', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 12, '_primary_term': 10, 'status': 201}}]})

In [None]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "range": {
                "created_on": {
                    "gte": "2024-09-22||+1d/d", # 2024-09-23
                    "lte": "now/d"
                }
            }
        }
    }
)
hits = response['hits']['hits']
print(f"Found {len(hits)} documents")

Found 2 documents


In [None]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "range": {
                "created_on": {
                    "gte": "2024-09-22||+1M/d", # 2024-09-22 + 1 month
                    "lte": "now/d"
                }
            }
        }
    }
)
hits = response['hits']['hits']
print(f"Found {len(hits)} documents")

Found 0 documents


## 3. Response filtering

### 3.1 Inclusive filtering

In [23]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "match_all": {}
        }
    },
)
pprint(response.body)

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 1, 'total': 1},
 'hits': {'hits': [{'_id': '3Eu8ApMB770u3lhz-QPW',
                    '_index': 'my_index',
                    '_score': 1.0,
                    '_source': {'content': 'The Solar System consists of the '
                                           'Sun and the objects that orbit it, '
                                           'including eight planets, their '
                                           'moons, dwarf planets, and '
                                           'countless small bodies like '
                                           'asteroids and comets.',
                                'id': 1,
                                'title': 'The Solar System'}},
                   {'_id': '3Uu8ApMB770u3lhz-QPX',
                    '_index': 'my_index',
                    '_score': 1.0,
                    '_source': {'content': 'A black hole is a region of space '
                                       

In [None]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "match_all": {}
        }
    },
    filter_path="hits.hits._id,hits.hits._source" # Keep only _id and _source fields
)
pprint(response.body)

{'hits': {'hits': [{'_id': '3Eu8ApMB770u3lhz-QPW',
                    '_source': {'content': 'The Solar System consists of the '
                                           'Sun and the objects that orbit it, '
                                           'including eight planets, their '
                                           'moons, dwarf planets, and '
                                           'countless small bodies like '
                                           'asteroids and comets.',
                                'id': 1,
                                'title': 'The Solar System'}},
                   {'_id': '3Uu8ApMB770u3lhz-QPX',
                    '_source': {'content': 'A black hole is a region of space '
                                           'where the gravitational pull is so '
                                           'strong that nothing, not even '
                                           'light, can escape from it. They '
                            

### 3.2 Exclusive filtering

In [None]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "match_all": {}
        }
    },
    filter_path="-hits" # Remove the hits key
)
pprint(response.body)

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 1, 'total': 1},
 'timed_out': False,
 'took': 2}


### 3.3. Combined filtering

In [28]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "match_all": {}
        }
    },
    filter_path="hits.hits._id,-hits.hits._score"
)
pprint(response.body)

{'hits': {'hits': [{'_id': '3Eu8ApMB770u3lhz-QPW'},
                   {'_id': '3Uu8ApMB770u3lhz-QPX'},
                   {'_id': '3ku8ApMB770u3lhz-QPX'},
                   {'_id': '30u8ApMB770u3lhz-QPY'},
                   {'_id': '4Eu8ApMB770u3lhz-QPY'},
                   {'_id': '4Uu8ApMB770u3lhz-QPY'},
                   {'_id': '4ku8ApMB770u3lhz-QPY'},
                   {'_id': '40u8ApMB770u3lhz-QPY'},
                   {'_id': '5Eu8ApMB770u3lhz-QPY'},
                   {'_id': '5Uu8ApMB770u3lhz-QPY'}]}}


## 4. Flat settings

In [30]:
response = es.indices.get_settings(
    index=index_name,
)
pprint(response.body)

{'my_index': {'settings': {'index': {'creation_date': '1730917759224',
                                     'number_of_replicas': '1',
                                     'number_of_shards': '1',
                                     'provided_name': 'my_index',
                                     'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}},
                                     'uuid': 'fUD9O2YHS7SQVN_M0-0hiQ',
                                     'version': {'created': '8512000'}}}}}


In [29]:
response = es.indices.get_settings(
    index=index_name,
    flat_settings=True,
)
pprint(response.body)

{'my_index': {'settings': {'index.creation_date': '1730917759224',
                           'index.number_of_replicas': '1',
                           'index.number_of_shards': '1',
                           'index.provided_name': 'my_index',
                           'index.routing.allocation.include._tier_preference': 'data_content',
                           'index.uuid': 'fUD9O2YHS7SQVN_M0-0hiQ',
                           'index.version.created': '8512000'}}}
