In [4]:
from pprint import pprint
from elasticsearch import Elasticsearch

es = Elasticsearch('http://localhost:9200')
client_info = es.info()
print("Connected to Elastic Search")
pprint(client_info.body)


Connected to Elastic Search
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'dwZJ7aR-Rk-TNTwNsmuV-w',
 'name': '8c40224e1107',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2024-08-05T10:05:34.233336849Z',
             'build_flavor': 'default',
             'build_hash': '1a77947f34deddb41af25e6f0ddb8e830159c179',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.11.1',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.15.0'}}


# Create index

1. Simplest Way

In this method, the mappings which define the structure of documents within an index are infered automatically

In [None]:
es.indices.delete(index='my_index', ignore_unavailable=True)
es.indices.create(index="my_index")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

# 2. Specify the number of replicas and shards
## Shards: Elasticsearch divides the data in an index into multiple shards. Each shard is a self-contained index that Elasticsearch can distribute across multiple nodes in a cluster. 

## Replicas: For fault tolerance and high availability, an index can have replica shards, which are copies of the primary shards.

In [None]:
es.indices.delete(index='my_index', ignore_unavailable=True)

es.indices.create(index="my_index",settings={"index":{
    "number_of_shards":3, 
    "number_of_replicas":2
}
    }
)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

# Insert one document

## Create a dummy index just to test inserting one document

In [None]:
es.indices.delete(index='my_index',ignore_unavailable=True)
es.indices.create(index='my_index')

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [None]:
document = {
    'title':'title',
    'text':'text',
    'created_on':'2024-09-22',
}
response = es.index(index='my_index',body=document)
response

ObjectApiResponse({'_index': 'my_index', '_id': '2FUlLJwBELqBbuH6BWly', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [None]:
print(response["result"])

created


In [None]:
print(response["_shards"])

{'total': 2, 'successful': 1, 'failed': 0}


In [None]:
print(response)

{'_index': 'my_index', '_id': '2FUlLJwBELqBbuH6BWly', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}


# Insert multiple documents

In [None]:
import json

dummy_data = json.load(open("./dummy_data.json"))
dummy_data

[{'title': 'Sample Title 1',
  'text': 'This is the first sample document text.',
  'created_on': '2024-09-22'},
 {'title': 'Sample Title 2',
  'text': 'Here is another example of a document.',
  'created_on': '2024-09-24'},
 {'title': 'Sample Title 3',
  'text': 'The content of the third document goes here.',
  'created_on': '2024-09-24'}]

In [None]:
def insert_document(document):
    response = es.index(index='my_index',body=document)
    return response

def print_def(response):
    print(f"Document Id is {response['_id']} is {response['result']} and is split into {response['_shards']['total']} shards")

for document in dummy_data:
    response = insert_document(document)
    print_def(response)


Document Id is 2VUlLJwBELqBbuH6aGmy is created and is split into 2 shards
Document Id is 2lUlLJwBELqBbuH6aGm_ is created and is split into 2 shards
Document Id is 21UlLJwBELqBbuH6aGnI is created and is split into 2 shards


In [None]:
from pprint import pprint

index_mapping = es.indices.get_mapping(index='my_index')
pprint(index_mapping["my_index"]["mappings"]["properties"])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


# Common Types


## Binary Types

In [None]:
es.indices.delete(index='binary_index',ignore_unavailable=True)
es.indices.create(
    index='binary_index',
    mappings={"properties":{
        "image_data":{
            "type":"binary" # Type Binary
        }
    }}
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'binary_index'})

In [None]:
import base64

image_path = "field_data_types_docs.png"

with open(image_path,"rb") as image_file:
    image_bytes = image_file.read()
    image_base64 = base64.b64encode(image_bytes).decode("utf-8")

image_base64[:100]
len(image_base64)

271328

In [None]:
document = {
    "image_data":image_base64
}

response = es.index(index="binary_index", body=document)
response

ObjectApiResponse({'_index': 'binary_index', '_id': '3VVvLJwBELqBbuH66mkF', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})

In [None]:
es.indices.delete(index='other_common_data_types_index',ignore_unavailable=True)

es.indices.create(
    index='other_common_data_types_index',
    mappings={
        "properties":{
            "book_reference":{
                "type":"keyword"
            },
            "price":{
                "type":"float"
            },
            "publish_date":{
                "type":"date"
            },
            "is_available":{
                "type":"boolean"
            }

        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'other_common_data_types_index'})

In [None]:
document = {
    "book_reference":"978-1617294433",
    "price": 44.99,
    "publish_date":"2021-06-09",
    "is_available":True
}

response = es.index(index='other_common_data_types_index', body=document)
response

ObjectApiResponse({'_index': 'other_common_data_types_index', '_id': '31WDLJwBELqBbuH6qmnF', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})

In [None]:
document = {
    "author":{
        "first_name":"Imad",
        "last_name":"Saddik"
    }
} 

response = es.index(index="object_index",body=document)
response

ObjectApiResponse({'_index': 'object_index', '_id': '4FV_LZwBELqBbuH6LGlk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

# Nested Object

In [None]:
es.indices.delete(index='flattened_object_index', ignore_unavailable=True)
es.indices.create(
    index = 'flattened_object_index',
    mappings={
        "properties":{
            "author":{
            "type": "flattened"

            }
        }
    }
)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'flattened_object_index'})

In [None]:

document = {
    "author":{
        "first_name":"Imad",
        "last_name":"Saddik"
    }
} 

response = es.index(index="flattened_object_index",body=document)
response

ObjectApiResponse({'_index': 'flattened_object_index', '_id': '4VWWLZwBELqBbuH6MmnN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [None]:
es.indices.delete(index='nested_object_index', ignore_unavailable=True)
es.indices.create(
    index='nested_object_index',
    mappings={
        "properties":{
            "author":{
                "type":"nested"
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'nested_object_index'})

In [None]:
document = [
    {
          "first_name":"Imad",
          "last_name":"Saddik"
    },
    {
         "first_name":"cs",
        "last_name":"sc"
    }
]

response = es.index(index="nested_object_index",body={"user":document})
response

ObjectApiResponse({'_index': 'nested_object_index', '_id': '5FWqLZwBELqBbuH6uWld', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

# Text

In [None]:
es.indices.delete(index='text_index',ignore_unavailable=True)
es.indices.create(
    index="text-index",
    mappings={
        "properties":{
            "email_body":{
                "type":"text"
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'text-index'})

In [None]:
document = {
    "email_body":"Hello this is a test email"
}

response = es.index(index="text-index",body=document)

response

ObjectApiResponse({'_index': 'text-index', '_id': '5VXELZwBELqBbuH6bWm3', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [5]:
document_1 = {
    "suggest":{
        "input":["Mars","Planet"]
    }
}

document_2 = {
    "suggest":{
        "input":["Andromeda","Galaxy"]
    }
}

es.index(index='text-completion-text',body=document_1)
es.index(index='text-completion-text',body=document_2)


ObjectApiResponse({'_index': 'text-completion-text', '_id': '4WZRMZwBCyfgMRbzLMTD', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})

# Geo Point

In [None]:
es.indices.create(
    index='geo_point_index',
    mappings={
        "properties":{
            "location":{
                "type":"geo_point"
            }
        }
    }
)



ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'geo_point_index'})

In [19]:
document = {
    "text":"Geopoint as an object using GeoJSON format",
    "location":{
        "type":"Point",
        "coordiantes":[
            -71.34,
            41.12
        ]
    }
}

response = es.index(index='geo_point_index', body= document)
response

ObjectApiResponse({'_index': 'geo_point_index', '_id': '5WZ1MZwBCyfgMRbz-MRM', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})

In [21]:
es.indices.delete(index='geo_shape_index', ignore_unavailable=True)
es.indices.create(index='geo_shape_index',mappings={
    "properties":{
        "location":{
            "type":"geo_shape"
        }
    }
})

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'geo_shape_index'})

In [None]:
# document_1 = {
#     "location":{
#         "type":"LineString",
#         "coordinates": [
#         [
#             -77.03653,
#             38.897676
#         ],
#         [
#             -77.009051,
#             38.889939
#         ]
#      ]
    
#     }
# }

# document_2 = {
#     "location":{
#         "type":"Polygon",
#         "coordinates":[
#             [
#                 [100,0],
#                 [101,0],
#                 [101,1],
#                 [100,0],
#             ],
#             [
#                 [100.2,0.2],
#                 [100.8,0.2],
#                 [100.8,0.8],
#                 [100.2,0.8],
#                 [100.2,0.2],
#             ]
#         ]
#     }
# }

# es.index(index='geo_shape_index',body=document_1)
# es.index(index='geo_shape_index',body=document_2)





In [32]:
es.indices.delete(index='my_index',ignore_unavailable=True)
es.indices.create(index='my_index')

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

# Delete documents

In [55]:
import json
from tqdm import tqdm

document_ids = []
with open("dummy_data.json","r") as f:
    dummy_data = json.load(f)
    for document in tqdm(dummy_data,total=len(dummy_data)):
        response = es.index(index='my_index',body=document)
        document_ids.append(response['_id'])



100%|██████████| 3/3 [00:00<00:00, 81.62it/s]


In [51]:
document_ids

['_2aoMZwBCyfgMRbzs8Qu', 'AGaoMZwBCyfgMRbzs8U2', 'AWaoMZwBCyfgMRbzs8U9']

# Delete Operation

In [52]:
try:
    response = es.delete(index="my_index",id=document_ids[0])
    response.body
except Exception as e:
    print(e)


# Get Operation

In [57]:
try:
    response = es.get(index="my_index", id=document_ids[0])
    print(response)

except Exception as e:
    print(e)

{'_index': 'my_index', '_id': 'AmazMZwBCyfgMRbz-sX6', '_version': 1, '_seq_no': 23, '_primary_term': 1, 'found': True, '_source': {'title': 'Sample Title 1', 'text': 'This is the first sample document text.', 'created_on': '2024-09-22'}}


In [58]:
response = es.count(index='my_index')
count = response["count"]
print(f"The number of documents in the index is {count}")

The number of documents in the index is 14


In [68]:
query = {
    "range":{
        "created_on":{
            "gte":"2024-09-02",
            "lte":"2024-09-22",
            "format":"yyyy-MM-dd"
        }
    }
}

response = es.count(index='my_index',query=query)
count = response["count"]

print(f"The number of documents in the index is {count}")

The number of documents in the index is 2


## Exists API
# 1. Check index existence

In [70]:
response = es.indices.exists(index="my_index")
response.body

True

# 2. Check document existence

In [71]:
response = es.exists(index='my_index', id= document_ids[0])
response.body

True

In [76]:
from pprint import pprint
response = es.update(
    index="my_index",
    id=document_ids[0],
    script={
        "source": "ctx._source.title= params.title",
        "params":{
            "title":"New Title"
        }
    }
)

pprint(response.body)

{'_id': 'AmazMZwBCyfgMRbz-sX6',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 30,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 6,
 'result': 'updated'}


In [81]:
response = es.get(index="my_index",id=document_ids[0])
response

ObjectApiResponse({'_index': 'my_index', '_id': 'AmazMZwBCyfgMRbz-sX6', '_version': 6, '_seq_no': 30, '_primary_term': 1, 'found': True, '_source': {'title': 'New Title', 'text': 'This is the first sample document text.', 'created_on': '2024-09-22'}})

# Add a new field to the document

## To add a new field you can you script argument or doc argument

In [87]:
response = es.update(
    index="my_index",
    id=document_ids[0],
    script={
        "source": "ctx._source.new_field = 'dummy_value'"
    },
)

pprint(response.body)

{'_id': 'AmazMZwBCyfgMRbz-sX6',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 31,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 7,
 'result': 'updated'}


In [90]:
response = es.update(
    index="my_index",
    id = document_ids[0],
    doc={
         "new_value_2":"dummy_value_2"
    }
)

try:
    response = es.get(index="my_index",id=document_ids[0])
    print(response)

except Exception as e:
    print(e)



{'_index': 'my_index', '_id': 'AmazMZwBCyfgMRbz-sX6', '_version': 8, '_seq_no': 32, '_primary_term': 1, 'found': True, '_source': {'title': 'New Title', 'text': 'This is the first sample document text.', 'created_on': '2024-09-22', 'new_field': 'dummy_value', 'new_value_2': 'dummy_value_2'}}


# Remove a field

In [91]:
response = es.update(
    index='my_index',
    id = document_ids[0],
    script={
        "source" : "ctx._source.remove('new_field')"
    },
)

pprint(response)

ObjectApiResponse({'_index': 'my_index', '_id': 'AmazMZwBCyfgMRbz-sX6', '_version': 9, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 33, '_primary_term': 1})
