## Iterate over a Elasticsearch INDEX using scroll API and update registers

In this notebook we will access one index using Search - paginate the results with scroll, and update the registers with a new variable.

In [6]:
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import random

es = Elasticsearch([{'host':'localhost', 'port': 9200}])


Create the search function - in this case we will match the registers with one specific value

In [54]:

resp = es.search(index = 'youbora',  
                 #body={"query": {"match_all": {}}} ,
                 body={"query": {"match": {"avg_bitrate_count": 27}}} ,
                 scroll = '2s') # length of time to keep search context)


# keep track of pass scroll _id
old_scroll_id = resp['_scroll_id']

After that, we will iterate over all the document hits, create a new value and update the Elasticsearch index

In [55]:
doc_count = 0

# use a 'while' iterator to loop over document 'hits'
while len(resp['hits']['hits']):
    doc_count += 1
    print('Number of iteration:', doc_count)
    # make a request using the Scroll API
    resp = es.scroll( scroll_id = old_scroll_id, scroll = '2s')

    # check if there's a new scroll ID
    if old_scroll_id != resp['_scroll_id']:
        print ("NEW SCROLL ID:", resp['_scroll_id'])

            # keep track of pass scroll _id
    old_scroll_id = resp['_scroll_id']

    # print the response results
    #print ("\nresponse for index:", 'youbora')
    #print ("_scroll_id:", resp['_scroll_id'])
    

    # iterate over the document hits for each 'scroll'
    for doc in resp['hits']['hits']:
        print ("\n", doc['_id'])
        
        #print ("DOC COUNT:", doc_count)
        doc_id = doc["_id"]
        bucket = random.randint(1, 400)
        #update
        res = es.update(index='youbora',  id=doc_id, body={"doc":{"bucket":bucket}})
        
# print the total time and document count at the end
print ('response["hits"]["total"]["value"]:', resp["hits"]["total"]["value"])

Number of iteration: 1

 o375uH8BFdNqaqwSVUQ5

 hX75uH8BFdNqaqwSUD_D

 MH75uH8BFdNqaqwSUkEr

 Dn75uH8BFdNqaqwSXE0U

 en75uH8BFdNqaqwSSznd

 Sn75uH8BFdNqaqwSTTuZ

 hn75uH8BFdNqaqwSTTuZ

 Jn75uH8BFdNqaqwSTzw3

 m375uH8BFdNqaqwSPSoD

 AH75uH8BFdNqaqwSPy2U
Number of iteration: 2

 MH75uH8BFdNqaqwSPy2U

 IX75uH8BFdNqaqwSLBc8

 nn75uH8BFdNqaqwSLxlj

 4375uH8BFdNqaqwSLxlj

 MX75uH8BFdNqaqwSQi-9

 1H75uH8BFdNqaqwSQi-9

 a375uH8BFdNqaqwSRTBS

 yH75uH8BFdNqaqwSYVIT

 Xn75uH8BFdNqaqwSOCWF

 sX75uH8BFdNqaqwSOicJ
Number of iteration: 3

 mX75uH8BFdNqaqwSOyh8

 oX75uH8BFdNqaqwSHgdB

 Z375uH8BFdNqaqwSHwr3

 nX75uH8BFdNqaqwSHwr3

 9X75uH8BFdNqaqwSKBOj

 DH75uH8BFdNqaqwSKhVY

 Ln75uH8BFdNqaqwSKhVY

 E375uH8BFdNqaqwSKhZY
Number of iteration: 4
response["hits"]["total"]["value"]: 38
