## Documentation

To read more about the index API, visit the [docs](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html).

![index_api_docs](../images/index_api_docs.png)

## Connect to ElasticSearch

In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

HOST = "http://localhost:9200"

es = Elasticsearch(HOST)
client_info = es.info()
print("Connected tp Elasticsearch!")
pprint(client_info.body)

Connected tp Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'a-_W5fVITCO39W2cUo2r2Q',
 'name': 'e3e67a410071',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-01-09T14:09:01.578835424Z',
             'build_flavor': 'default',
             'build_hash': '0f88dde84795b30ca0d2c0c4796643ec5938aeb5',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '8.11.3',
             'minimum_index_compatibility_version': '6.0.0-beta1',
             'minimum_wire_compatibility_version': '6.8.0',
             'number': '7.17.27'}}


  client_info = es.info()


## Insert one document

Create a dummy index just to test inserting one document

In [3]:
INDEX = "my_index"

es.indices.delete(index=INDEX, ignore_unavailable=True)
es.indices.create(index=INDEX, settings={
    "index": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    }
})

  es.indices.delete(index=INDEX, ignore_unavailable=True)
  es.indices.create(index=INDEX, settings={


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [4]:
document = {
    'title': 'title',
    'text': 'text',
    'created_on': '2024-09-22',
}
response = es.index(index=INDEX, body=document)
response

  response = es.index(index=INDEX, body=document)


ObjectApiResponse({'_index': 'my_index', '_type': '_doc', '_id': 'pafRIpUB6zEJPjitRKR4', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

The `response` object contains the result of the operation. If we successfully inserted the document, then `result = created`. Each document has an `id` and is fragmented into `shards`.

In [5]:
print(response["result"])

created


In [6]:
print(response["_shards"])

{'total': 1, 'successful': 1, 'failed': 0}


In [7]:
print(response["_id"])

pafRIpUB6zEJPjitRKR4


In [8]:
print(response["_index"])

my_index


## Insert multiple documents

Just do the same step but in a for loop

In [9]:
import json

dummy_data = json.load(open("../data/dummy_data.json"))
dummy_data

[{'title': 'Sample Title 1',
  'text': 'This is the first sample document text.',
  'created_on': '2024-09-22'},
 {'title': 'Sample Title 2',
  'text': 'Here is another example of a document.',
  'created_on': '2024-09-24'},
 {'title': 'Sample Title 3',
  'text': 'The content of the third document goes here.',
  'created_on': '2024-09-24'}]

In [11]:
def insert_document(document):
    response = es.index(index=INDEX, body=document)
    return response

def print_info(response):
    print(f"""
          Document ID: {response["_id"]} is '{response["result"]}' and is split into {response["_shards"]["total"]} shards 
          """)
    

for document in dummy_data:
    response = insert_document(document)
    print_info(response)


          Document ID: pqfZIpUB6zEJPjitF6Sc is 'created' and is split into 1 shards 
          

          Document ID: p6fZIpUB6zEJPjitF6S4 is 'created' and is split into 1 shards 
          

          Document ID: qKfZIpUB6zEJPjitF6TL is 'created' and is split into 1 shards 
          


  response = es.index(index=INDEX, body=document)


## Print mapping

In [13]:
from pprint import pprint

index_mapping = es.indices.get_mapping(index=INDEX)
pprint(index_mapping[INDEX]["mappings"]["properties"])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


  index_mapping = es.indices.get_mapping(index=INDEX)


## Manual mapping

In [15]:
es.indices.delete(index=INDEX, ignore_unavailable=True)
es.indices.create(index=INDEX, settings={
    "index": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    }
})

mapping = {
    'properties': {
        'created_on': {'type': 'date'},
        'text': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        },
        'title': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        }
    }
}

es.indices.put_mapping(index=INDEX, body=mapping)
pprint(index_mapping[INDEX]["mappings"]["properties"])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


  es.indices.delete(index=INDEX, ignore_unavailable=True)
  es.indices.create(index=INDEX, settings={
  es.indices.put_mapping(index=INDEX, body=mapping)



Alternative

In [16]:
mapping = {
    'properties': {
        'created_on': {'type': 'date'},
        'text': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        },
        'title': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        }
    }
}

es.indices.delete(index=INDEX, ignore_unavailable=True)
es.indices.create(index=INDEX, mappings=mapping, settings={
    "index": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    }
})

pprint(index_mapping[INDEX]["mappings"]["properties"])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


  es.indices.delete(index=INDEX, ignore_unavailable=True)
  es.indices.create(index=INDEX, mappings=mapping, settings={
