# Update Documents

In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

es = Elasticsearch(
    "https://localhost:9200", 
    basic_auth=("elastic", "6AqhOxi*CPXYvCZl7Iln"), 
    verify_certs=False)
client_info = es.info() 
print("Connected to Elasticsearch!")
pprint(client_info.body)

Connected to Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'mIJwhjTmStW54eKFEwQnMA',
 'name': 'f12c85f397e4',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2026-01-29T10:05:46.708397977Z',
             'build_flavor': 'default',
             'build_hash': '17b451d8979a29e31935fe1eb901310350b30e62',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '10.3.2',
             'minimum_index_compatibility_version': '8.0.0',
             'minimum_wire_compatibility_version': '8.19.0',
             'number': '9.3.0'}}


  _transport = transport_class(


# Index Documents

In [2]:
es.indices.delete(index="my_index", ignore_unavailable=True)
es.indices.create(index='my_index')



ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [6]:
import json 
from tqdm import tqdm 

document_ids = []
dummy_data = json.load(open("./data/dummy_data2.json"))
for document in tqdm(dummy_data, total=len(dummy_data)): 
    response = es.index(index='my_index', body=document)
    document_ids.append(response['_id'])

  0%|          | 0/3 [00:00<?, ?it/s]

100%|██████████| 3/3 [00:00<00:00,  7.41it/s]


In [7]:
document_ids

['_u1iZpwB8rl3oy5msZez', '_-1iZpwB8rl3oy5ms5cG', 'AO1iZpwB8rl3oy5ms5hN']

# Update API 

In [8]:
# If document exists in the index
# Update an existing field

from pprint import pprint 

response = es.update(
    index="my_index", 
    id=document_ids[0], 
    script={
        "source": "ctx._source.title = params.title",
        "params": {
            "title": "New Title"
        }
    }, 
)
pprint(response.body)



{'_id': '_u1iZpwB8rl3oy5msZez',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 9,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 2,
 'result': 'updated'}


In [11]:
response = es.get(index='my_index', id=document_ids[0])
response.body



{'_index': 'my_index',
 '_id': '_u1iZpwB8rl3oy5msZez',
 '_version': 2,
 '_seq_no': 9,
 '_primary_term': 1,
 'found': True,
 '_source': {'title': 'New Title',
  'text': 'This is the first sample document text.',
  'created_on': '2024-09-22'}}

# Add a new field
To add a new field, you can either use the scripts argument or the doc argument


In [13]:
# Method 1 
response = es.update(
    index='my_index', 
    id=document_ids[0], 
    script={
        "source": "ctx._source.new_field = 'dummy_value'"
    },
    
)
pprint(response.body)



{'_id': '_u1iZpwB8rl3oy5msZez',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 10,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 3,
 'result': 'updated'}


In [15]:
response = es.get(index='my_index', id=document_ids[0])
pprint(response.body)

{'_id': '_u1iZpwB8rl3oy5msZez',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 10,
 '_source': {'created_on': '2024-09-22',
             'new_field': 'dummy_value',
             'text': 'This is the first sample document text.',
             'title': 'New Title'},
 '_version': 3,
 'found': True}




In [16]:
# method 2 (doc)
response = es.update(
    index="my_index", 
    id=document_ids[0], 
    doc={
        "new_value_2": "dummy_value_2",
    }, 
)
pprint(response.body)



{'_id': '_u1iZpwB8rl3oy5msZez',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 11,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 4,
 'result': 'updated'}


In [17]:
response = es.get(index='my_index', id=document_ids[0])
pprint(response.body)

{'_id': '_u1iZpwB8rl3oy5msZez',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 11,
 '_source': {'created_on': '2024-09-22',
             'new_field': 'dummy_value',
             'new_value_2': 'dummy_value_2',
             'text': 'This is the first sample document text.',
             'title': 'New Title'},
 '_version': 4,
 'found': True}




In [18]:
# remove a field

response = es.update(
    index="my_index", 
    id=document_ids[0], 
    script={
        "source": "ctx._source.remove('new_field')",
    },
)
pprint(response.body)

{'_id': '_u1iZpwB8rl3oy5msZez',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 12,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 5,
 'result': 'updated'}




In [19]:
response = es.get(index='my_index', id=document_ids[0])
pprint(response.body
       )

{'_id': '_u1iZpwB8rl3oy5msZez',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 12,
 '_source': {'created_on': '2024-09-22',
             'new_value_2': 'dummy_value_2',
             'text': 'This is the first sample document text.',
             'title': 'New Title'},
 '_version': 5,
 'found': True}




# 2. If documents doesn't exist in the index
We use `doc_as_upsert` to tell Elasticsearch that if the document does not exist, it should be inserted as a new document 

In [20]:
response = es.update(
    index="my_index", 
    id="1", 
    doc={
        "book_id": 1234, 
        "book_name": "A book", 
    }, 
    doc_as_upsert=True, 
)



In [21]:
pprint(response.body)

{'_id': '1',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 13,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 1,
 'result': 'created'}


In [22]:
response = es.count(index='my_index')
response['count']



10

In [23]:
response = es.get(index='my_index', id='1')
response.body



{'_index': 'my_index',
 '_id': '1',
 '_version': 1,
 '_seq_no': 13,
 '_primary_term': 1,
 'found': True,
 '_source': {'book_id': 1234, 'book_name': 'A book'}}

In [24]:
response = es.count(index="my_index")
response["count"]



10