In [3]:
from pprint import pprint
from elasticsearch import Elasticsearch

es = Elasticsearch('http://localhost:9200')
client_info = es.info()
print("Connected to Elastic Search")
pprint(client_info.body)


Connected to Elastic Search
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'dwZJ7aR-Rk-TNTwNsmuV-w',
 'name': '8c40224e1107',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2024-08-05T10:05:34.233336849Z',
             'build_flavor': 'default',
             'build_hash': '1a77947f34deddb41af25e6f0ddb8e830159c179',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.11.1',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.15.0'}}


# Create index

1. Simplest Way

In this method, the mappings which define the structure of documents within an index are infered automatically

In [4]:
es.indices.delete(index='my_index', ignore_unavailable=True)
es.indices.create(index="my_index")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

# 2. Specify the number of replicas and shards
## Shards: Elasticsearch divides the data in an index into multiple shards. Each shard is a self-contained index that Elasticsearch can distribute across multiple nodes in a cluster. 

## Replicas: For fault tolerance and high availability, an index can have replica shards, which are copies of the primary shards.

In [5]:
es.indices.delete(index='my_index', ignore_unavailable=True)

es.indices.create(index="my_index",settings={"index":{
    "number_of_shards":3, 
    "number_of_replicas":2
}
    }
)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

# Insert one document

## Create a dummy index just to test inserting one document

In [6]:
es.indices.delete(index='my_index',ignore_unavailable=True)
es.indices.create(index='my_index')

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [7]:
document = {
    'title':'title',
    'text':'text',
    'created_on':'2024-09-22',
}
response = es.index(index='my_index',body=document)
response

ObjectApiResponse({'_index': 'my_index', '_id': '2FUlLJwBELqBbuH6BWly', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [8]:
print(response["result"])

created


In [9]:
print(response["_shards"])

{'total': 2, 'successful': 1, 'failed': 0}


In [10]:
print(response)

{'_index': 'my_index', '_id': '2FUlLJwBELqBbuH6BWly', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}


# Insert multiple documents

In [11]:
import json

dummy_data = json.load(open("./dummy_data.json"))
dummy_data

[{'title': 'Sample Title 1',
  'text': 'This is the first sample document text.',
  'created_on': '2024-09-22'},
 {'title': 'Sample Title 2',
  'text': 'Here is another example of a document.',
  'created_on': '2024-09-24'},
 {'title': 'Sample Title 3',
  'text': 'The content of the third document goes here.',
  'created_on': '2024-09-24'}]

In [12]:
def insert_document(document):
    response = es.index(index='my_index',body=document)
    return response

def print_def(response):
    print(f"Document Id is {response['_id']} is {response['result']} and is split into {response['_shards']['total']} shards")

for document in dummy_data:
    response = insert_document(document)
    print_def(response)


Document Id is 2VUlLJwBELqBbuH6aGmy is created and is split into 2 shards
Document Id is 2lUlLJwBELqBbuH6aGm_ is created and is split into 2 shards
Document Id is 21UlLJwBELqBbuH6aGnI is created and is split into 2 shards


In [15]:
from pprint import pprint

index_mapping = es.indices.get_mapping(index='my_index')
pprint(index_mapping["my_index"]["mappings"]["properties"])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


# Common Types


## Binary Types

In [None]:
es.indices.delete(index='binary_index',ignore_unavailable=True)
es.indices.create(
    index='binary_index',
    mappings={"properties":{
        "image_data":{
            "type":"binary" # Type Binary
        }
    }}
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'binary_index'})

In [29]:
import base64

image_path = "field_data_types_docs.png"

with open(image_path,"rb") as image_file:
    image_bytes = image_file.read()
    image_base64 = base64.b64encode(image_bytes).decode("utf-8")

image_base64[:100]
len(image_base64)

271328

In [28]:
document = {
    "image_data":image_base64
}

response = es.index(index="binary_index", body=document)
response

ObjectApiResponse({'_index': 'binary_index', '_id': '3VVvLJwBELqBbuH66mkF', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})

In [31]:
es.indices.delete(index='other_common_data_types_index',ignore_unavailable=True)

es.indices.create(
    index='other_common_data_types_index',
    mappings={
        "properties":{
            "book_reference":{
                "type":"keyword"
            },
            "price":{
                "type":"float"
            },
            "publish_date":{
                "type":"date"
            },
            "is_available":{
                "type":"boolean"
            }

        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'other_common_data_types_index'})

In [33]:
document = {
    "book_reference":"978-1617294433",
    "price": 44.99,
    "publish_date":"2021-06-09",
    "is_available":True
}

response = es.index(index='other_common_data_types_index', body=document)
response

ObjectApiResponse({'_index': 'other_common_data_types_index', '_id': '31WDLJwBELqBbuH6qmnF', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})

In [35]:
document = {
    "author":{
        "first_name":"Imad",
        "last_name":"Saddik"
    }
} 

response = es.index(index="object_index",body=document)
response

ObjectApiResponse({'_index': 'object_index', '_id': '4FV_LZwBELqBbuH6LGlk', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

# Nested Object

In [38]:
es.indices.delete(index='flattened_object_index', ignore_unavailable=True)
es.indices.create(
    index = 'flattened_object_index',
    mappings={
        "properties":{
            "author":{
            "type": "flattened"

            }
        }
    }
)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'flattened_object_index'})

In [39]:

document = {
    "author":{
        "first_name":"Imad",
        "last_name":"Saddik"
    }
} 

response = es.index(index="flattened_object_index",body=document)
response

ObjectApiResponse({'_index': 'flattened_object_index', '_id': '4VWWLZwBELqBbuH6MmnN', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [40]:
es.indices.delete(index='nested_object_index', ignore_unavailable=True)
es.indices.create(
    index='nested_object_index',
    mappings={
        "properties":{
            "author":{
                "type":"nested"
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'nested_object_index'})

In [44]:
document = [
    {
          "first_name":"Imad",
          "last_name":"Saddik"
    },
    {
         "first_name":"cs",
        "last_name":"sc"
    }
]

response = es.index(index="nested_object_index",body={"user":document})
response

ObjectApiResponse({'_index': 'nested_object_index', '_id': '5FWqLZwBELqBbuH6uWld', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

# Text

In [47]:
es.indices.delete(index='text_index',ignore_unavailable=True)
es.indices.create(
    index="text-index",
    mappings={
        "properties":{
            "email_body":{
                "type":"text"
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'text-index'})

In [48]:
document = {
    "email_body":"Hello this is a test email"
}

response = es.index(index="text-index",body=document)

response

ObjectApiResponse({'_index': 'text-index', '_id': '5VXELZwBELqBbuH6bWm3', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [None]:
# document_1 = {
#     "suggest":{
#         "input":["Mars","Planet"]
#     }
# }

# document_2 = {
#     "suggest":{
#         "input":["Andromeda","Galaxy"]
#     }
# }

# es.index(index=)