## Documentation

To read more about the search API, visit the docs [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-your-data.html) and [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html).

![search_api_docs](../images/search_api_docs.png)

## Connect to ElasticSearch

In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

HOST = "http://localhost:9200"

es = Elasticsearch(HOST)
client_info = es.info()
print("Connected tp Elasticsearch!")
pprint(client_info.body)

Connected tp Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'IzAz_bJfQnS_zfMDjIPmJA',
 'name': 'eb6cd056e782',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-01-09T14:09:01.578835424Z',
             'build_flavor': 'default',
             'build_hash': '0f88dde84795b30ca0d2c0c4796643ec5938aeb5',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '8.11.3',
             'minimum_index_compatibility_version': '6.0.0-beta1',
             'minimum_wire_compatibility_version': '6.8.0',
             'number': '7.17.27'}}


  client_info = es.info()


## Inserting documents

In [2]:
settings = {
    "index": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    }
}

INDEX_1 = "index_1"
INDEX_2 = "index_2"

es.indices.delete(index=INDEX_1, ignore_unavailable=True)
es.indices.create(index=INDEX_1, settings=settings)

es.indices.delete(index=INDEX_2, ignore_unavailable=True)
es.indices.create(index=INDEX_2, settings=settings)

  es.indices.delete(index=INDEX_1, ignore_unavailable=True)
  es.indices.create(index=INDEX_1, settings=settings)
  es.indices.delete(index=INDEX_2, ignore_unavailable=True)
  es.indices.create(index=INDEX_2, settings=settings)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'index_2'})

Let's index the documents sequentially in both indices.

In [3]:
import json
from tqdm import tqdm


dummy_data = json.load(open("../data/dummy_data.json"))
for document in tqdm(dummy_data, total=len(dummy_data)):
    response = es.index(index=INDEX_1, body=document)

for document in tqdm(dummy_data, total=len(dummy_data)):
    response = es.index(index=INDEX_2, body=document)

  response = es.index(index=INDEX_1, body=document)
100%|██████████| 3/3 [00:00<00:00, 27.96it/s]
  response = es.index(index=INDEX_2, body=document)
100%|██████████| 3/3 [00:00<00:00, 32.01it/s]


## Searching

We can provide the `index` argument one index at a time.

In [4]:
response = es.search(
    index=INDEX_1,
    body={
        "query": {"match_all": {}}
    }
)

pprint(response.body)
n_hits = response['hits']['total']['value']
print(f"Found {n_hits} documents in index_1")

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 1, 'total': 1},
 'hits': {'hits': [{'_id': 'O3hSJJUBpQvCJGK5g00k',
                    '_index': 'index_1',
                    '_score': 1.0,
                    '_source': {'created_on': '2024-09-22',
                                'text': 'This is the first sample document '
                                        'text.',
                                'title': 'Sample Title 1'},
                    '_type': '_doc'},
                   {'_id': 'PHhSJJUBpQvCJGK5g01q',
                    '_index': 'index_1',
                    '_score': 1.0,
                    '_source': {'created_on': '2024-09-24',
                                'text': 'Here is another example of a '
                                        'document.',
                                'title': 'Sample Title 2'},
                    '_type': '_doc'},
                   {'_id': 'PXhSJJUBpQvCJGK5g018',
                    '_index': 'index_1',
                 

  response = es.search(


In [5]:
response = es.search(
    index=INDEX_2,
    body={
        "query": {"match_all": {}}
    }
)

n_hits = response['hits']['total']['value']
print(f"Found {n_hits} documents in index_2")

Found 3 documents in index_2


  response = es.search(


Or we can provide the `index` argument multiple indices at once.

In [6]:
response = es.search(
    index=f'{INDEX_1},{INDEX_2}',
    body={
        "query": {"match_all": {}}
    }
)

n_hits = response['hits']['total']['value']
print(f"Found {n_hits} documents in index_1 and index_2")

Found 6 documents in index_1 and index_2


  response = es.search(


We can also use wildcards `*` to match multiple indices without listing them individually, such as `"index*"`.

In [7]:
response = es.search(
    index='index*',
    body={
        "query": {"match_all": {}}
    }
)

n_hits = response['hits']['total']['value']
print(f"Found {n_hits} documents in all indexes with name starting with 'index'")

Found 6 documents in all indexes with name starting with 'index'


  response = es.search(


Or, to search all indices, we use `_all`.

In [8]:
response = es.search(
    index='_all',
    body={
        "query": {"match_all": {}}
    }
)

n_hits = response['hits']['total']['value']
print(f"Found {n_hits} documents in all indexes")

Found 8 documents in all indexes


  response = es.search(
