# Elasticsearch Low-Level Python API
## Index Information
### Assumes Star Wars indices are loaded

In [7]:
# import
import json
from elasticsearch import Elasticsearch

## Connect to Cluster

In [2]:
es_conn1 = Elasticsearch([{'host': '10.0.2.15', 'port': 9200}])
# print the health just because...
print(es_conn1.cluster.health())

{'cluster_name': 'parrot_elk', 'status': 'yellow', 'timed_out': False, 'number_of_nodes': 1, 'number_of_data_nodes': 1, 'active_primary_shards': 9, 'active_shards': 9, 'relocating_shards': 0, 'initializing_shards': 0, 'unassigned_shards': 8, 'delayed_unassigned_shards': 0, 'number_of_pending_tasks': 0, 'number_of_in_flight_fetch': 0, 'task_max_waiting_in_queue_millis': 0, 'active_shards_percent_as_number': 52.94117647058824}


### Function Definition

In [35]:
# useful json print function
# json print function
def json_print(json_obj):
    # create a formatted string of the Python JSON object
    text = json.dumps(json_obj, sort_keys=True, indent=3)
    print(text)

In [36]:
# function to return the storage size of an index
# this needs work with passing the args
def get_index_store_size(es_connection, index, byte_measurement):
    """
    returns the storage size of an index
    """
    #mid1 = '.cat.indices(\''
    #temp = str(es_connection) + mid1
    #print(temp)
    bytes_index = 'bytes=' + '\'' + byte_measurement + '\''
    #bytes_index = '\'' + byte_measurement + '\''
    #bytes_index = '=' + '\'' + byte_measurement + '\''
    #print(bytes_index)
    # works as expected
    #store_size = es_connection.cat.indices('sw_people', bytes='k', h='store.size')
    store_size = es_connection.cat.indices(index, bytes='k', h='store.size')
    # seems to ignore the bytes arg
    #store_size = es_connection.cat.indices(index, bytes='byte_measurement', h='store.size')
    #store_size = es_connection.cat.indices(index, bytes=bytes_index, h='store.size')
    
    #store_size = es_connection.cat.indices(index, bytes='( %s)' % byte_measurement , h='store.size')
    # err - TypeError: indices() got multiple values for argument 'params'
    #store_size = es_connection.cat.indices(index, bytes_index, h='store.size')
    # parsing error
    #store_size = es_connection.cat.indices(index, bytes=\'byte_measurement\', h='store.size')
    #err TypeError: indices() got multiple values for argument 'params'
    #store_size = es_connection.cat.indices(index, byte_measurement, h='store.size')
    #store_size = es_connection.cat.indices(index, str(byte_measurement), h='store.size')
    # store_size = es_connection.cat.indices(index, str(bytes_index), h='store.size')
    
    # err = TypeError: unsupported operand type(s) for +: 'type' and 'str'
    #store_size = es_connection.cat.indices(index, bytes+bytes_index, h='store.size')
    
    # it's the '=' sign
    #store_size = es_connection.cat.indices(bytes=byte_measurement, h='store.size')
    return(store_size)



## Index Information

In [28]:
# get all the indices
index_list = es_conn1.cat.indices()
print(index_list)

yellow open test-index   _j9S4I3zS9q8vFOvL0wZSQ 1 1  0 0    284b    284b
yellow open sw_people    0FSqKsdMQJ-m9sjP6nPiCQ 1 1 87 0  61.3kb  61.3kb
yellow open sw_vehicles  UTWNdxikQMuEcoq3Bofx2A 1 1 39 0    61kb    61kb
yellow open sw_films     142h1U3rQK61nqx18JYMcA 1 1  7 0 101.2kb 101.2kb
yellow open test_index1  0E4bdk9QTSSRVGPI8JXVIQ 1 1  2 1  17.1kb  17.1kb
yellow open sw_species   WZdBoaYvQOeJyDd0mg6P4A 1 1 37 0  45.8kb  45.8kb
yellow open sw_starships 3sFHgxqRR5mWpPwkAfqgtw 1 1 37 0  69.4kb  69.4kb
yellow open sw_planets   oNe7CZCfRkmZf_9b-8PgwA 1 1 61 0    63kb    63kb
green  open .kibana_1    QXZbXyTwR8KOKyBOryzy7Q 1 0 11 0  21.6kb  21.6kb



In [12]:
# get specific index info
index_info = es_conn1.cat.indices('sw_people')
json_print(index_info)

"yellow open sw_people 0FSqKsdMQJ-m9sjP6nPiCQ 1 1 87 0 61.3kb 61.3kb\n"


In [20]:
# get the doc count
index_doc_count = es_conn1.cat.count(index='sw_people')
print(index_doc_count)

1578181297 23:41:37 87



In [23]:
# get size of an index
# bytes arg is k -> KB, m -> MB, g -> GB, t ->tb
index_size = es_conn1.cat.indices('sw_people', bytes='k', h='store.size')
print(index_size)

61



In [5]:
es_client = es_conn1.cat.indices('sw_people', bytes='k', h='store.size')
es_count = es_conn1.cat.count('sw_people')
# es_conn2.cat.indices()
print('size in kb: ', es_client)
print(es_count)

size in kb:  61

1578180883 23:34:43 87



In [33]:
# tuned document count
print('base document count: ', es_conn1.cat.count())
print('tuned document count: ', es_conn1.cat.count(h='count'))
print('sw_people index document count: ', es_conn1.cat.count(index='sw_people', h='count'))

base document count:  1578181863 23:51:03 281

tuned document count:  281

sw_people index document count:  87



In [38]:
# calculate the average size of each document
# need to validate size format
# i.e. KB/MB/GB/TB

# get the size
index = 'sw_people'
bytes_in = 'k'
conn = es_conn1
size = get_index_store_size(conn, index, bytes_in)

# get the doc count
doc_count = es_conn1.cat.count(index='sw_people', h='count')

# calculate the average document size
avg_size = int(doc_count) / int(size)
print('average size per doc" ', avg_size)

average size per doc"  1.4262295081967213
