In [4]:
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import reader
# Connect to the elastic cluster
es=Elasticsearch([{'host':'localhost','port':9200}])

In [121]:
# Get all indices   =>   es.indices.get_alias("*")
# Delete an index   =>   es.indices.delete(index='.monitoring-es-7-2019.06.21')
# Create an index   =>   es.indices.create(index='opal_dataset')
es.indices.create(index='bank')

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'bank'}

In [124]:
url='https://raw.githubusercontent.com/elastic/elasticsearch/master/docs/src/test/resources/accounts.json'
gen_data=reader.get_sample_data(url,index_name='bank')

In [125]:
helpers.bulk(es, gen_data)

(1000, [])

In [5]:
# Number of documents indexed
es.count(index='bank')

{'count': 1001,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}}

## Introducting the Query Language

In [6]:
def elastic_query(q):
    hits=es.search(index='bank',body=q)['hits']['hits']
    return [i['_source'] for i in hits]

In [7]:
# Get first  document
q={"query": { "match_all": {} },
   "size": 1}
elastic_query(q)

[{'account_number': 6,
  'balance': 5686,
  'firstname': 'Hattie',
  'lastname': 'Bond',
  'age': 36,
  'gender': 'M',
  'address': '671 Bristol Street',
  'employer': 'Netagy',
  'email': 'hattiebond@netagy.com',
  'city': 'Dante',
  'state': 'TN'}]

In [8]:
# Get 11th  document
q={"query": { "match_all": {} },
   "from": 10,
   "size": 1}
elastic_query(q)

[{'account_number': 56,
  'balance': 14992,
  'firstname': 'Josie',
  'lastname': 'Nelson',
  'age': 32,
  'gender': 'M',
  'address': '857 Tabor Court',
  'employer': 'Emtrac',
  'email': 'josienelson@emtrac.com',
  'city': 'Sunnyside',
  'state': 'UT'}]

In [9]:
order='asc' # ascending ordered
field="account_number"
query={"query":
       {"match_all":{}},
       "sort":[{field:order}],
       "size":"2"}

elastic_query(query)

[{'account_number': 0,
  'balance': 16623,
  'firstname': 'Bradshaw',
  'lastname': 'Mckenzie',
  'age': 29,
  'gender': 'F',
  'address': '244 Columbus Place',
  'employer': 'Euron',
  'email': 'bradshawmckenzie@euron.com',
  'city': 'Hobucken',
  'state': 'CO'},
 {'account_number': 1,
  'balance': 39225,
  'firstname': 'Amber',
  'lastname': 'Duke',
  'age': 32,
  'gender': 'M',
  'address': '880 Holmes Lane',
  'employer': 'Pyrami',
  'email': 'amberduke@pyrami.com',
  'city': 'Brogan',
  'state': 'IL',
  'Facked': True}]

In [10]:
# Get documents whose account_number fields satisfy such query. 
# "Expectedly" only one document returned

q={"query": { "match": {"account_number":20} }}
elastic_query(q)

[{'account_number': 20,
  'balance': 16418,
  'firstname': 'Elinor',
  'lastname': 'Ratliff',
  'age': 36,
  'gender': 'M',
  'address': '282 Kings Place',
  'employer': 'Scentric',
  'email': 'elinorratliff@scentric.com',
  'city': 'Ribera',
  'state': 'WA'}]

In [11]:
# Get document whose address field satisfies such query.
q={"query": { "match": { "address": "mill" }}}
elastic_query(q)

[{'account_number': 970,
  'balance': 19648,
  'firstname': 'Forbes',
  'lastname': 'Wallace',
  'age': 28,
  'gender': 'M',
  'address': '990 Mill Road',
  'employer': 'Pheast',
  'email': 'forbeswallace@pheast.com',
  'city': 'Lopezo',
  'state': 'AK'},
 {'account_number': 136,
  'balance': 45801,
  'firstname': 'Winnie',
  'lastname': 'Holland',
  'age': 38,
  'gender': 'M',
  'address': '198 Mill Lane',
  'employer': 'Neteria',
  'email': 'winnieholland@neteria.com',
  'city': 'Urie',
  'state': 'IL'},
 {'account_number': 345,
  'balance': 9812,
  'firstname': 'Parker',
  'lastname': 'Hines',
  'age': 38,
  'gender': 'M',
  'address': '715 Mill Avenue',
  'employer': 'Baluba',
  'email': 'parkerhines@baluba.com',
  'city': 'Blackgum',
  'state': 'KY'},
 {'account_number': 472,
  'balance': 25571,
  'firstname': 'Lee',
  'lastname': 'Long',
  'age': 32,
  'gender': 'F',
  'address': '288 Mill Street',
  'employer': 'Comverges',
  'email': 'leelong@comverges.com',
  'city': 'Movico',

In [12]:
# Get all document whose address field contains either mill OR lane
q={"query": { "match": { "address": "mill lane" }}}
or_res=elastic_query(q)
or_res

[{'account_number': 136,
  'balance': 45801,
  'firstname': 'Winnie',
  'lastname': 'Holland',
  'age': 38,
  'gender': 'M',
  'address': '198 Mill Lane',
  'employer': 'Neteria',
  'email': 'winnieholland@neteria.com',
  'city': 'Urie',
  'state': 'IL'},
 {'account_number': 970,
  'balance': 19648,
  'firstname': 'Forbes',
  'lastname': 'Wallace',
  'age': 28,
  'gender': 'M',
  'address': '990 Mill Road',
  'employer': 'Pheast',
  'email': 'forbeswallace@pheast.com',
  'city': 'Lopezo',
  'state': 'AK'},
 {'account_number': 345,
  'balance': 9812,
  'firstname': 'Parker',
  'lastname': 'Hines',
  'age': 38,
  'gender': 'M',
  'address': '715 Mill Avenue',
  'employer': 'Baluba',
  'email': 'parkerhines@baluba.com',
  'city': 'Blackgum',
  'state': 'KY'},
 {'account_number': 472,
  'balance': 25571,
  'firstname': 'Lee',
  'lastname': 'Long',
  'age': 32,
  'gender': 'F',
  'address': '288 Mill Street',
  'employer': 'Comverges',
  'email': 'leelong@comverges.com',
  'city': 'Movico',

In [13]:
# Get all document whose address field contains  mill or lane
q={"query": { 
    "bool":{
        "should":[ 
            {"match": { "address": "mill" }},
            {"match": { "address": "lane" }}]
        }
    }}
should_res=elastic_query(q)
should_res

[{'account_number': 136,
  'balance': 45801,
  'firstname': 'Winnie',
  'lastname': 'Holland',
  'age': 38,
  'gender': 'M',
  'address': '198 Mill Lane',
  'employer': 'Neteria',
  'email': 'winnieholland@neteria.com',
  'city': 'Urie',
  'state': 'IL'},
 {'account_number': 970,
  'balance': 19648,
  'firstname': 'Forbes',
  'lastname': 'Wallace',
  'age': 28,
  'gender': 'M',
  'address': '990 Mill Road',
  'employer': 'Pheast',
  'email': 'forbeswallace@pheast.com',
  'city': 'Lopezo',
  'state': 'AK'},
 {'account_number': 345,
  'balance': 9812,
  'firstname': 'Parker',
  'lastname': 'Hines',
  'age': 38,
  'gender': 'M',
  'address': '715 Mill Avenue',
  'employer': 'Baluba',
  'email': 'parkerhines@baluba.com',
  'city': 'Blackgum',
  'state': 'KY'},
 {'account_number': 472,
  'balance': 25571,
  'firstname': 'Lee',
  'lastname': 'Long',
  'age': 32,
  'gender': 'F',
  'address': '288 Mill Street',
  'employer': 'Comverges',
  'email': 'leelong@comverges.com',
  'city': 'Movico',

In [14]:
# Both queries return same result.
should_res==or_res

True

In [15]:
# Get all document whose address field contains  mill AND lane
q={"query": { 
    "bool":{
        "must":[ 
            {"match": { "address": "mill" }},
            {"match": { "address": "lane" }}]
        }
    }}
elastic_query(q)

[{'account_number': 136,
  'balance': 45801,
  'firstname': 'Winnie',
  'lastname': 'Holland',
  'age': 38,
  'gender': 'M',
  'address': '198 Mill Lane',
  'employer': 'Neteria',
  'email': 'winnieholland@neteria.com',
  'city': 'Urie',
  'state': 'IL'}]

In [16]:
# Get all document whose address field  contain neither mill nor lane
q={"query": { 
    "bool":{
        "must_not":[ 
            {"match": { "address": "mill" }},
            {"match": { "address": "lane" }}]
        }
    }}
nor=elastic_query(q)

In [17]:
q={ "query": {
    "bool": {
      "must": [
        { "match": { "age": "40" } }
      ],
      "must_not": [
        { "match": { "state": "ID" } }
      ]
    }
  }}
elastic_query(q)

[{'account_number': 474,
  'balance': 35896,
  'firstname': 'Obrien',
  'lastname': 'Walton',
  'age': 40,
  'gender': 'F',
  'address': '192 Ide Court',
  'employer': 'Suremax',
  'email': 'obrienwalton@suremax.com',
  'city': 'Crucible',
  'state': 'UT'},
 {'account_number': 479,
  'balance': 31865,
  'firstname': 'Cameron',
  'lastname': 'Ross',
  'age': 40,
  'gender': 'M',
  'address': '904 Bouck Court',
  'employer': 'Telpod',
  'email': 'cameronross@telpod.com',
  'city': 'Nord',
  'state': 'MO'},
 {'account_number': 549,
  'balance': 1932,
  'firstname': 'Jacqueline',
  'lastname': 'Maxwell',
  'age': 40,
  'gender': 'M',
  'address': '444 Schenck Place',
  'employer': 'Fuelworks',
  'email': 'jacquelinemaxwell@fuelworks.com',
  'city': 'Oretta',
  'state': 'OR'},
 {'account_number': 878,
  'balance': 49159,
  'firstname': 'Battle',
  'lastname': 'Blackburn',
  'age': 40,
  'gender': 'F',
  'address': '234 Hendrix Street',
  'employer': 'Zilphur',
  'email': 'battleblackburn@zi

# Executing Queries with filters

In [18]:
# Get all documents whose balance between 29920-30000
q={"query": {
    "bool": {
      "must": { "match_all": {} },
      "filter": {
        "range": {
          "balance": {
            "gte": 29920,
            "lte": 30000
          }}}}}}

elastic_query(q)

[{'account_number': 359,
  'balance': 29927,
  'firstname': 'Vanessa',
  'lastname': 'Harvey',
  'age': 28,
  'gender': 'F',
  'address': '679 Rutledge Street',
  'employer': 'Zentime',
  'email': 'vanessaharvey@zentime.com',
  'city': 'Williston',
  'state': 'IL'},
 {'account_number': 196,
  'balance': 29931,
  'firstname': 'Caldwell',
  'lastname': 'Daniel',
  'age': 28,
  'gender': 'F',
  'address': '405 Oliver Street',
  'employer': 'Furnigeer',
  'email': 'caldwelldaniel@furnigeer.com',
  'city': 'Zortman',
  'state': 'NE'},
 {'account_number': 781,
  'balance': 29961,
  'firstname': 'Sanford',
  'lastname': 'Mullen',
  'age': 26,
  'gender': 'F',
  'address': '879 Dover Street',
  'employer': 'Zanity',
  'email': 'sanfordmullen@zanity.com',
  'city': 'Martinez',
  'state': 'TX'}]

In [20]:
# Get all documents whose city is exactly martinez
q={"query": {
    "bool": {
      "must": { "match_all": {}},
      "filter": {
        "term": {
          "city.raw": "martinez"
        }}}}}

elastic_query(q)

[]

In [21]:
# Get all documents whose city is martinez #caseinsensitive
q={"query": {
    "bool": {
      "must": { "match_all": {}},
      "filter": {
        "term": {
          "city": "martinez"
        }}}}}

elastic_query(q)

[{'account_number': 781,
  'balance': 29961,
  'firstname': 'Sanford',
  'lastname': 'Mullen',
  'age': 26,
  'gender': 'F',
  'address': '879 Dover Street',
  'employer': 'Zanity',
  'email': 'sanfordmullen@zanity.com',
  'city': 'Martinez',
  'state': 'TX'}]

# Indexing via update

Upsert!

In [147]:
# we manually update some documents to add additional information
UPDATES = [{"_type": "_doc",
            "_id": "1",
            "_op_type": "update",
            "doc": {"Facked": True},
            "doc_as_upsert":True
           }]

In [148]:
helpers.bulk(es, UPDATES, index="bank")

(1, [])

In [149]:
es.get(index="bank", id=1)

{'_index': 'bank',
 '_type': '_doc',
 '_id': '1',
 '_version': 2,
 '_seq_no': 1001,
 '_primary_term': 1,
 'found': True,
 '_source': {'account_number': 1,
  'balance': 39225,
  'firstname': 'Amber',
  'lastname': 'Duke',
  'age': 32,
  'gender': 'M',
  'address': '880 Holmes Lane',
  'employer': 'Pyrami',
  'email': 'amberduke@pyrami.com',
  'city': 'Brogan',
  'state': 'IL',
  'Facked': True}}