# performance test

## Setup

In [1]:
from pprint import pprint
import json
from datetime import date, datetime

def json_serial(obj):
    """
    Convert datetime as string to avoid 'TypeError: Object of type datetime is not JSON serializable'
    """
    if isinstance(obj, (datetime, date)):
        return obj.isoformat()
    raise TypeError ("Type %s not serializable" % type(obj))


def pp_json(d):
    """
    Parse dict to json
    """
    print(json.dumps(d, indent=2, default=json_serial))

In [2]:
!pip install elasticsearch==7.9.1



In [3]:
from elasticsearch import Elasticsearch
es = Elasticsearch('elasticsearch:9200')

In [4]:
es.info()

{'name': '0f828e83b923',
 'cluster_name': 'docker-cluster',
 'cluster_uuid': '94Dc9H8SQZ2KyUBol9P5Kg',
 'version': {'number': '7.9.1',
  'build_flavor': 'default',
  'build_type': 'docker',
  'build_hash': '083627f112ba94dffc1232e8b42b73492789ef91',
  'build_date': '2020-09-01T21:22:21.964974Z',
  'build_snapshot': False,
  'lucene_version': '8.6.2',
  'minimum_wire_compatibility_version': '6.8.0',
  'minimum_index_compatibility_version': '6.0.0-beta1'},
 'tagline': 'You Know, for Search'}

In [5]:
es.cat.plugins()

'0f828e83b923 elasticsearch-analysis-vietnamese 7.9.1\n'

# Performance test

### Loop following steps
1. PUT *test-N*
1. PUT *test-N/_mapping*
1. DELETE *test-N*


### Expected result
- Increasing stdout message **Loading unigram model...OK** in timber log
    - This might be caused by [Vietnamese Analysis plugin](https://docs.aws.amazon.com/elasticsearch-service/latest/developerguide/aes-supported-plugins.html), which is supported since 7.7.
- Consuming a lot of heap space by Vietnamese Analysis plugin

In [6]:
import time
import datetime

def test_suite(es, index_name, mapping, doc_type):
    """
    1. PUT index
    2. PUT index/_mapping
    3. DELETE index
    """
    es.indices.create(index=index_name)
    time.sleep(0.02)

    if doc_type is None:
        es.indices.put_mapping(index=index_name, body=mapping)
    else:
        es.indices.put_mapping(index=index_name, body=mapping, doc_type=doc_type)
    time.sleep(0.02)
    
    es.indices.delete(index=index_name)
    time.sleep(0.02)


def execute_test(description, es, mapping, doc_type=None, time=10):
    print(f"Start test {description} at {datetime.datetime.now().isoformat()}")

    # Delete indices before test
    es.indices.delete(index="test-*")

    for i in range(time):
        test_suite(es, f"test-{i}", mapping, doc_type)

    print(f"End test {description} at {datetime.datetime.now().isoformat()}")

## Sample test

In [7]:
%%time
mapping = {
    "properties": {
      "foo": {
        "type": "text"
      },
      "bar": {
        "type": "long"
      }
    }
  }
execute_test("sample", es=es, mapping=mapping)

Start test sample at 2021-04-18T14:18:54.481661
End test sample at 2021-04-18T14:19:08.030331
CPU times: user 56.5 ms, sys: 14.1 ms, total: 70.6 ms
Wall time: 13.6 s


## Loop test

In [8]:
%%time
mapping = {
    "properties": {
      "foo": {
        "type": "text"
      },
      "bar": {
        "type": "long"
      }
    }
  }
execute_test("sample", es=es, mapping=mapping, time=300)

Start test sample at 2021-04-18T14:19:23.600186
End test sample at 2021-04-18T14:21:51.763159
CPU times: user 989 ms, sys: 312 ms, total: 1.3 s
Wall time: 2min 28s
