
# Creating and deleting indices with Elasticsearch

First start up the Elasticsearch and Kibana environments.  Then execute commands below.  Make sure you have installed elasticsearch in the Python environment with pip prior to running this.

In [7]:
from elasticsearch import Elasticsearch                    # starts elasticsearch
es = Elasticsearch(HOST="http://localhost", PORT=9200)     # create elasticsearch connection; port 9200 is the default
es = Elasticsearch()                                       # creates the object to hold the data that we want to index

In [3]:
es.indices.create(index="first_index", ignore=400)        # creates an index in the es object
                                                          # we are ignoring the 400 error code

{'acknowledged': True, 'index': 'first_index', 'shards_acknowledged': True}

In [4]:
es.indices.exists(index="first_index")                   # checks to see if the index exists

True

In [5]:
es.indices.delete(index="first_index")                  # deletes the index

{'acknowledged': True}

In [6]:
es.indices.exists(index="first_index")                 # checks to see if an index exists

False

# Second Tutorial
In starting this one, the es.Elasticsearch() was not necessary as elasticsearch will create the object

In [8]:
doc1 = {"city":"New Delhi", "country":"India"}
doc2 = {"city":"London", "country":"England"}
doc3 = {"city":"Los Angeles", "country":"USA"}
        
es.index(index='cities', doc_type="places", id=1, body=doc1)   # these are the 4 required elements to create an index


{'_id': '1',
 '_index': 'cities',
 '_primary_term': 1,
 '_seq_no': 0,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': 'places',
 '_version': 1,
 'result': 'created'}

In [9]:
es.index(index='cities', doc_type="places", id=2, body=doc2)

{'_id': '2',
 '_index': 'cities',
 '_primary_term': 1,
 '_seq_no': 0,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': 'places',
 '_version': 1,
 'result': 'created'}

In [10]:
es.index(index='cities', doc_type="places", id=3, body=doc3)

{'_id': '3',
 '_index': 'cities',
 '_primary_term': 1,
 '_seq_no': 0,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': 'places',
 '_version': 1,
 'result': 'created'}

All three documents have now been successfully indexed 

In [12]:
res = es.get(index='cities', doc_type='places', id=2)    # Lets see whats in id 2
res

{'_id': '2',
 '_index': 'cities',
 '_source': {'city': 'London', 'country': 'England'},
 '_type': 'places',
 '_version': 1,
 'found': True}

The data is in the source key.  Let's go to the source key

In [13]:
res['_source']

{'city': 'London', 'country': 'England'}

How to query to match a document.

In [32]:
doc11 = {"sentence" : "Today is a sunny day."}
doc22 = {"sentence" : "Today is a bright-sunny day."}
doc33 = {"sentence" : "Today is a rainy day."}

es.index(index="english", doc_type="sentences", id=1, body=doc11)


{'_id': '1',
 '_index': 'english',
 '_primary_term': 1,
 '_seq_no': 1,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': 'sentences',
 '_version': 2,
 'result': 'updated'}

In [33]:
es.index(index="english", doc_type="sentences", id=2, body=doc22)

{'_id': '2',
 '_index': 'english',
 '_primary_term': 1,
 '_seq_no': 1,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': 'sentences',
 '_version': 2,
 'result': 'updated'}

In [34]:
es.index(index="english", doc_type="sentences", id=3, body=doc33)

{'_id': '3',
 '_index': 'english',
 '_primary_term': 1,
 '_seq_no': 0,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': 'sentences',
 '_version': 1,
 'result': 'created'}

Provide index and body keys in the search command.  The search is not case sensitive.

In [35]:
res = es.search(index="english", body={"from":0,"size":0,"query":{"match":{"sentence":"SUNNY"}}})
res

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [], 'max_score': 0.0, 'total': 2},
 'timed_out': False,
 'took': 1}

Changing the size to 2 will show the sentence instead of the counts

In [36]:
res = es.search(index="english", body={"from":0,"size":2,"query":{"match":{"sentence":"SUNNY"}}})
res

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '2',
    '_index': 'english',
    '_score': 0.2876821,
    '_source': {'sentence': 'Today is a bright-sunny day.'},
    '_type': 'sentences'},
   {'_id': '1',
    '_index': 'english',
    '_score': 0.2876821,
    '_source': {'sentence': 'Today is a sunny day.'},
    '_type': 'sentences'}],
  'max_score': 0.2876821,
  'total': 2},
 'timed_out': False,
 'took': 3}

Changing to match_phrase with 0 will match the phrase

In [37]:
res = es.search(index="english", body={"from":0,"size":0,"query":{"match_phrase":{"sentence":"SUNNY"}}})
res

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [], 'max_score': 0.0, 'total': 2},
 'timed_out': False,
 'took': 2}

The previous search found 2 instances, but when we add the word bright, there is only one match for both words. Again, this is case and punctuation insensitive.


In [38]:
res = es.search(index="english", body={"from":0,"size":2,"query":{"match":{"sentence":"bright SUNNY"}}})
res

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '2',
    '_index': 'english',
    '_score': 0.5753642,
    '_source': {'sentence': 'Today is a bright-sunny day.'},
    '_type': 'sentences'},
   {'_id': '1',
    '_index': 'english',
    '_score': 0.2876821,
    '_source': {'sentence': 'Today is a sunny day.'},
    '_type': 'sentences'}],
  'max_score': 0.5753642,
  'total': 2},
 'timed_out': False,
 'took': 6}

term matches the exact phrase and is case sensitive.  No documents matched.


In [39]:
res = es.search(index="english", body={"from":0,"size":2,"query":{"term":{"sentence":"SUNNY"}}})
res

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [], 'max_score': None, 'total': 0},
 'timed_out': False,
 'took': 2}

Combining search terms

must - has to
must_not - the opposite of must
should - when used with must, not necessary that the should clause matches; if no must clauses present, one of the should clauses should match - can also set the minimum should clauses that should match.

In the example below, the sentence must not have bright in it, but should have sunny in it.  So expect, and receive, one.  The command below, with the size changed, shows the 1 sentence found.

In [40]:
res = es.search(index="english", body={ "from": 0, "size": 0, "query": { "bool": { "must_not": 
                { "match": { "sentence": "bright" } }, "should": { "match":{"sentence": "sunny"} } } } } )
res

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [], 'max_score': 0.0, 'total': 1},
 'timed_out': False,
 'took': 6}

In [41]:
res = es.search(index="english", body={ "from": 0, "size": 2, "query": { "bool": { "must_not": 
                { "match": { "sentence": "bright" } }, "should": { "match":{"sentence": "sunny"} } } } } )
res

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '1',
    '_index': 'english',
    '_score': 0.2876821,
    '_source': {'sentence': 'Today is a sunny day.'},
    '_type': 'sentences'}],
  'max_score': 0.2876821,
  'total': 1},
 'timed_out': False,
 'took': 6}

In [42]:
es.get(index="english", doc_type="sentences", id=1)

{'_id': '1',
 '_index': 'english',
 '_source': {'sentence': 'Today is a sunny day.'},
 '_type': 'sentences',
 '_version': 2,
 'found': True}

In [43]:
es.get(index="english", doc_type="sentences", id=2)

{'_id': '2',
 '_index': 'english',
 '_source': {'sentence': 'Today is a bright-sunny day.'},
 '_type': 'sentences',
 '_version': 2,
 'found': True}

In [44]:
es.get(index="english", doc_type="sentences", id=3)

{'_id': '3',
 '_index': 'english',
 '_source': {'sentence': 'Today is a rainy day.'},
 '_type': 'sentences',
 '_version': 1,
 'found': True}

Let's use regex to search now.  Below looks for everything .*

In [45]:
res = es.search(index="english", body={"from":0,"size":0,"query":{"regexp":{"sentence":".*"}}})
res

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [], 'max_score': 0.0, 'total': 3},
 'timed_out': False,
 'took': 18}

In [46]:
res = es.search(index="english", body={"from":0,"size":3,"query":{"regexp":{"sentence":".*"}}})
res

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '2',
    '_index': 'english',
    '_score': 1.0,
    '_source': {'sentence': 'Today is a bright-sunny day.'},
    '_type': 'sentences'},
   {'_id': '1',
    '_index': 'english',
    '_score': 1.0,
    '_source': {'sentence': 'Today is a sunny day.'},
    '_type': 'sentences'},
   {'_id': '3',
    '_index': 'english',
    '_score': 1.0,
    '_source': {'sentence': 'Today is a rainy day.'},
    '_type': 'sentences'}],
  'max_score': 1.0,
  'total': 3},
 'timed_out': False,
 'took': 2}

In [47]:
res = es.search(index="english", body={"from":0,"size":0,"query":{"regexp":{"sentence":"sun.*"}}})
res

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [], 'max_score': 0.0, 'total': 2},
 'timed_out': False,
 'took': 25}

In [48]:
res = es.search(index="english", body={"from":0,"size":2,"query":{"regexp":{"sentence":"sun.*"}}})
res

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '2',
    '_index': 'english',
    '_score': 1.0,
    '_source': {'sentence': 'Today is a bright-sunny day.'},
    '_type': 'sentences'},
   {'_id': '1',
    '_index': 'english',
    '_score': 1.0,
    '_source': {'sentence': 'Today is a sunny day.'},
    '_type': 'sentences'}],
  'max_score': 1.0,
  'total': 2},
 'timed_out': False,
 'took': 10}

# What is mapping?

As per Elasticsearch Reference, "Mapping is the process of defining how a document, and the fields it contains, are stored and indexed."

# How does it help?

It enablesfaster search retrieval and aggregations.  Hence, your mapping defines how effectively you can handle your data.  A bad mapping can have severe consequences on the performance of your system.


In [50]:
# documents to insert in the elasticsearch index "cities"

doca = {"city":"Bangalore", "country":"India","datetime":"2018,01,01,10,20,00"} #datetime format: yyy,MM,dd,hh,mm,ss
docb = {"city":"London", "country":"England","datetime":"2018,01,02,03,12,00"}
docc = {"city":"Los Angeles", "country":"USA","datetime":"2018,04,19,21,02,00"}


Got the error below because we didn't create the index.  Create it in the box below.

In [52]:
es.indices.get_mapping(index='travel', doc_type='cities') #POSTMAN: http://127.0.0.1:9200/travel/_mapping/cities


GET http://localhost:9200/travel/_mapping/cities [status:404 request:0.004s]


NotFoundError: NotFoundError(404, 'index_not_found_exception', 'no such index')

In [53]:
es.index(index='travel', doc_type='cities', id=1, body=doca)

{'_id': '1',
 '_index': 'travel',
 '_primary_term': 1,
 '_seq_no': 0,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': 'cities',
 '_version': 1,
 'result': 'created'}

In [54]:
es.indices.get_mapping(index='travel', doc_type='cities') # Now we don't get an error, we get a JSON response.

{'travel': {'mappings': {'cities': {'properties': {'city': {'fields': {'keyword': {'ignore_above': 256,
        'type': 'keyword'}},
      'type': 'text'},
     'country': {'fields': {'keyword': {'ignore_above': 256,
        'type': 'keyword'}},
      'type': 'text'},
     'datetime': {'fields': {'keyword': {'ignore_above': 256,
        'type': 'keyword'}},
      'type': 'text'}}}}}}

When Elasticsearch did the indexing, it did not recognize the datetime field as a datetime.  It assigned it as a string keyword.  So we want to change the mapping manually.  You can't just change the index.  You have to delete it and then create it programmatically.

In [62]:
es.indices.delete(index='travel')

{'acknowledged': True}

In [64]:
es.indices.create(index='travel')

{'acknowledged': True, 'index': 'travel', 'shards_acknowledged': True}

In [65]:
#http://127.0.0.1:9200/travel/_mapping/places
es.indices.put_mapping(
    index='travel', 
    doc_type='cities',
    body = 
    {
        
        'properties': {
            'city': {
                "type": "text",
                'fields': {
                    'keyword': {
                        'type': 'keyword',
                         'ignore_above': 256
                    }
                }
            },
        'country': {
            'type': 'text',
            'fields': {
                'keyword': {
                   'type': 'keyword',
                   'ignore_above': 256
                     }
                }
            },
        'datetime': {
            'type':'date',
            'format':'yyy,MM,dd,hh,mm,ss'
            }
        }
    }
)

{'acknowledged': True}

In [66]:
es.indices.get_mapping(index='travel', doc_type='cities')  # now the date is the right format.

{'travel': {'mappings': {'cities': {'properties': {'city': {'fields': {'keyword': {'ignore_above': 256,
        'type': 'keyword'}},
      'type': 'text'},
     'country': {'fields': {'keyword': {'ignore_above': 256,
        'type': 'keyword'}},
      'type': 'text'},
     'datetime': {'format': 'yyy,MM,dd,hh,mm,ss', 'type': 'date'}}}}}}

To learn more about mappings:  http://www.elastic.co/guide/en/elasticsearch/reference/current/mappimg.html