# Elasticsearch 

# setup

In [1]:
# edit conf/kibana.yml, uncomment elasticsearch.hosts: ["http://localhost:9200"]
# run bin/elasticsearch
# run bin/kibana 

In [None]:
# elasticsearch: http://localhost:9200/
# kibana: http://localhost:5601/

In [37]:
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, MultiSearch

In [2]:
es = Elasticsearch('http://localhost:9200/')

In [3]:
es.ping()

True

In [4]:
es.info()

{'name': 'FVFYQ09LHV2G',
 'cluster_name': 'elasticsearch',
 'cluster_uuid': 'R0BO34yDRzmjtzeWhI0GNw',
 'version': {'number': '7.7.1',
  'build_flavor': 'default',
  'build_type': 'tar',
  'build_hash': 'ad56dce891c901a492bb1ee393f12dfff473a423',
  'build_date': '2020-05-28T16:30:01.040088Z',
  'build_snapshot': False,
  'lucene_version': '8.5.1',
  'minimum_wire_compatibility_version': '6.8.0',
  'minimum_index_compatibility_version': '6.0.0-beta1'},
 'tagline': 'You Know, for Search'}

# view indices

In [5]:
all_indices = es.indices.get_alias("*")
[each for each in all_indices if not '.' in each[0]]

['friends', 'customers', 'autos', 'emplyees', 'courses', 'vehicles']

# view mappings

In [6]:
es.indices.get_mapping("vehicles")

{'vehicles': {'mappings': {'properties': {'color': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'condition': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'make': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'price': {'type': 'long'},
    'sold': {'type': 'date'}}}}}

In [7]:
es.indices.get_mapping("customers")

{'customers': {'mappings': {'dynamic': 'strict',
   'properties': {'age': {'type': 'integer'},
    'gender': {'type': 'text', 'analyzer': 'standard'},
    'is_new': {'type': 'boolean'},
    'name': {'type': 'text', 'analyzer': 'standard'},
    'total_spent': {'type': 'float'}}}}}

In [8]:
es.indices.get_mapping("courses")

{'courses': {'mappings': {'properties': {'course_description': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'course_publish_date': {'type': 'date'},
    'name': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'professor': {'properties': {'department': {'type': 'text',
       'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
      'email': {'type': 'text',
       'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
      'facutly_type': {'type': 'text',
       'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
      'name': {'type': 'text',
       'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}}},
    'room': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'students_enrolled': {'type': 'long'}}}}}

# get document

In [9]:
es.get(index="autos", id=101)

{'_index': 'autos',
 '_type': '_doc',
 '_id': '101',
 '_version': 4,
 '_seq_no': 5,
 '_primary_term': 1,
 'found': True,
 '_source': {'make': 'ford', 'model': 'escape'}}

# search

In [10]:
res = es.search(index="vehicles",  body={"query": {"match_all": {}}})

In [11]:
res['hits']['total']['value']

32

In [12]:
res

{'took': 0,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 32, 'relation': 'eq'},
  'max_score': 1.0,
  'hits': [{'_index': 'vehicles',
    '_type': '_doc',
    '_id': 'm2HMhnIBZukbdG7-7pbh',
    '_score': 1.0,
    '_source': {'price': 10000,
     'color': 'white',
     'make': 'honda',
     'sold': '2016-10-28',
     'condition': 'okay'}},
   {'_index': 'vehicles',
    '_type': '_doc',
    '_id': 'nGHMhnIBZukbdG7-7pbh',
    '_score': 1.0,
    '_source': {'price': 20000,
     'color': 'white',
     'make': 'honda',
     'sold': '2016-11-05',
     'condition': 'new'}},
   {'_index': 'vehicles',
    '_type': '_doc',
    '_id': 'nWHMhnIBZukbdG7-7pbh',
    '_score': 1.0,
    '_source': {'price': 30000,
     'color': 'green',
     'make': 'ford',
     'sold': '2016-05-18',
     'condition': 'new'}},
   {'_index': 'vehicles',
    '_type': '_doc',
    '_id': 'nmHMhnIBZukbdG7-7pbh',
    '_score': 1.0,
    '_source': {'pri

In [34]:
query_json = { 
    "query": {
        "bool": {
            "filter": {
                "bool" : {
                    "must" : [
                        {"match" : {"professor.name" : "bill"}},
                        {"match" : {"name" : "accounting"}}
                    ],
                    "must_not":[
                        {"match" : {"room" : "e7"}}
                    ]
                }
            }
        }
    }
}

In [14]:
res = es.search(index="", body=query_json)

In [15]:
res

{'took': 290,
 'timed_out': False,
 '_shards': {'total': 21, 'successful': 21, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 0.0,
  'hits': [{'_index': 'courses',
    '_type': '_doc',
    '_id': '8',
    '_score': 0.0,
    '_source': {'name': 'Accounting Info Systems 350',
     'room': 'E3',
     'professor': {'name': 'Bill Cage',
      'department': 'accounting',
      'facutly_type': 'full-time',
      'email': 'cageb@onuni.com'},
     'students_enrolled': 19,
     'course_publish_date': '2014-05-15',
     'course_description': 'Act Sys 350 is an advanced course providing students a practical understanding of an accounting system in database technology. Students will use MS Access to build a transaction ledger system'}}]}}

# create index and add/delete documents

In [16]:
doc = {
    "first_name" : "dave",
    "last_name" : "jones",
    "age" : 27,
    "about" : "best friend",
    "interests" : ['sports','music'],
}

In [17]:
es.index(index='friends', body=doc, id=1001)

{'_index': 'friends',
 '_type': '_doc',
 '_id': '1001',
 '_version': 3,
 'result': 'updated',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 4,
 '_primary_term': 1}

In [18]:
es.search(index="friends",  body={"query": {"match_all": {}}})

{'took': 0,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 1.0,
  'hits': [{'_index': 'friends',
    '_type': '_doc',
    '_id': '1001',
    '_score': 1.0,
    '_source': {'first_name': 'dave',
     'last_name': 'jones',
     'age': 27,
     'about': 'best friend',
     'interests': ['sports', 'music']}}]}}

In [19]:
doc_to_delete = {
    "first_name" : "jeff",
    "last_name" : "smith",
    "age" : 27,
    "about" : "neighbor",
    "interests" : ['video games'],
}

In [20]:
es.index(index='friends', body=doc_to_delete, id=1002)

{'_index': 'friends',
 '_type': '_doc',
 '_id': '1002',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 5,
 '_primary_term': 1}

In [21]:
es.search(index="friends",  body={"query": {"match_all": {}}})

{'took': 0,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 1.0,
  'hits': [{'_index': 'friends',
    '_type': '_doc',
    '_id': '1001',
    '_score': 1.0,
    '_source': {'first_name': 'dave',
     'last_name': 'jones',
     'age': 27,
     'about': 'best friend',
     'interests': ['sports', 'music']}}]}}

In [22]:
es.delete(index="friends", id=1002)

{'_index': 'friends',
 '_type': '_doc',
 '_id': '1002',
 '_version': 2,
 'result': 'deleted',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 6,
 '_primary_term': 1}

In [23]:
es.search(index="friends",  body={"query": {"match_all": {}}})

{'took': 0,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 1.0,
  'hits': [{'_index': 'friends',
    '_type': '_doc',
    '_id': '1001',
    '_score': 1.0,
    '_source': {'first_name': 'dave',
     'last_name': 'jones',
     'age': 27,
     'about': 'best friend',
     'interests': ['sports', 'music']}}]}}

# search with pythonic API

In [24]:
Search(using=es, index="courses").query("match", name="accounting").execute()

<Response: [<Hit(courses/1): {'name': 'Accounting 101', 'room': 'E3', 'professor': {'name...}>, <Hit(courses/6): {'name': 'Cost Accounting 400', 'room': 'E7', 'professor': {...}>, <Hit(courses/9): {'name': 'Tax Accounting 200', 'room': 'E7', 'professor': {'...}>, <Hit(courses/8): {'name': 'Accounting Info Systems 350', 'room': 'E3', 'profe...}>]>

In [25]:
response = Search(using=es, index="courses") \
    .filter("term", name = "accounting") \
    .query("match", room = "e3") \
    .exclude("match", name="201") \
    .execute()

In [26]:
response.hits.total

{'value': 2, 'relation': 'eq'}

In [27]:
response.hits.hits

[{'_index': 'courses', '_type': '_doc', '_id': '1', '_score': 1.2321435, '_source': {'name': 'Accounting 101', 'room': 'E3', 'professor': {'name': 'Thomas Baszo', 'department': 'finance', 'facutly_type': 'part-time', 'email': 'baszot@onuni.com'}, 'students_enrolled': 27, 'course_publish_date': '2015-01-19', 'course_description': 'Act 101 is a course from the business school on the introduction to accounting that teaches students how to read and compose basic financial statements'}}, {'_index': 'courses', '_type': '_doc', '_id': '8', '_score': 1.2321435, '_source': {'name': 'Accounting Info Systems 350', 'room': 'E3', 'professor': {'name': 'Bill Cage', 'department': 'accounting', 'facutly_type': 'full-time', 'email': 'cageb@onuni.com'}, 'students_enrolled': 19, 'course_publish_date': '2014-05-15', 'course_description': 'Act Sys 350 is an advanced course providing students a practical understanding of an accounting system in database technology. Students will use MS Access to build a tra

In [28]:
for hit in response:
    print(hit.meta.score, hit.name)

1.2321435 Accounting 101
1.2321435 Accounting Info Systems 350


In [29]:
s = Search(using=es, index="vehicles") \
    .query("match", color = "red") 

In [30]:
s.aggs.bucket("popular_cars", "terms", field= "make.keyword") \
    .metric("stats_on_price", "stats", field="price")

Terms(aggs={'stats_on_price': Stats(field='price')}, field='make.keyword')

In [31]:
response = s.execute()

In [32]:
response.hits.hits

[{'_index': 'vehicles', '_type': '_doc', '_id': 'oGHMhnIBZukbdG7-7pbh', '_score': 1.1451323, '_source': {'price': 18000, 'color': 'red', 'make': 'dodge', 'sold': '2016-11-05', 'condition': 'good'}}, {'_index': 'vehicles', '_type': '_doc', '_id': 'oWHMhnIBZukbdG7-7pbh', '_score': 1.1451323, '_source': {'price': 80000, 'color': 'red', 'make': 'bmw', 'sold': '2016-01-01', 'condition': 'new'}}, {'_index': 'vehicles', '_type': '_doc', '_id': 'pGHMhnIBZukbdG7-7pbh', '_score': 1.1451323, '_source': {'price': 19000, 'color': 'red', 'make': 'dodge', 'sold': '2016-02-12', 'condition': 'good'}}, {'_index': 'vehicles', '_type': '_doc', '_id': 'pWHMhnIBZukbdG7-7pbh', '_score': 1.1451323, '_source': {'price': 20000, 'color': 'red', 'make': 'chevrolet', 'sold': '2016-08-15', 'condition': 'good'}}, {'_index': 'vehicles', '_type': '_doc', '_id': 'qGHMhnIBZukbdG7-7pbh', '_score': 1.1451323, '_source': {'price': 35000, 'color': 'red', 'make': 'dodge', 'sold': '2016-04-10', 'condition': 'new'}}, {'_index'

In [33]:
for each_make in response.aggregations.popular_cars.buckets:
    print(each_make.key, each_make.stats_on_price)

dodge {'count': 6, 'min': 18000.0, 'max': 35000.0, 'avg': 24000.0,...}
bmw {'count': 2, 'min': 80000.0, 'max': 80000.0, 'avg': 80000.0,...}
chevrolet {'count': 2, 'min': 20000.0, 'max': 20000.0, 'avg': 20000.0,...}
