In [1]:
from opensearchpy import OpenSearch
import json
host = 'localhost'
port = 9200
auth = ('admin', 'admin') # For testing only. Don't store credentials in code.

client = OpenSearch(
    hosts = [{'host': host, 'port': port}],
    http_compress = True,
    http_auth = auth,
    use_ssl = True,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False,
)

print(client.cat.health())
print(client.cat.indices())

print(client.cat.count("search_fun_test", params={"v":"true"}))

1655773644 01:07:24 docker-cluster yellow 1 1 true 5 5 0 0 2 0 - 71.4%

green  open .kibana_92668751_admin_1     Rkf9tUKPQs-ZmF1m4jdVXg 1 0  1 0     5kb     5kb
yellow open search_fun_test              Dh7VpHvqRyWIZefdp2jFMA 1 1  1 0   5.4kb   5.4kb
green  open .opendistro_security         8pHwERl5T9W39_Qs3_r5bw 1 0  9 0  59.9kb  59.9kb
green  open .kibana_1                    Nj21asyETpCxUSKopgCwXg 1 0  0 0    208b    208b
yellow open security-auditlog-2022.06.21 X9zP8UIrSTC2yrcvfV3uMA 1 1 53 0 160.2kb 160.2kb

epoch      timestamp count
1655773644 01:07:24  1



In [2]:
## Compact and aligned text (CAT) API - https://opensearch.org/docs/latest/opensearch/rest-api/cat/index/
print(client.cat.aliases())

.kibana                .kibana_1                - - - -
.kibana_92668751_admin .kibana_92668751_admin_1 - - - -



In [3]:
print(client.cat.shards())

.opendistro_security         0 p STARTED     9  59.9kb 172.18.0.2 93d0236d0594
.kibana_92668751_admin_1     0 p STARTED     1     5kb 172.18.0.2 93d0236d0594
.kibana_1                    0 p STARTED     0    208b 172.18.0.2 93d0236d0594
security-auditlog-2022.06.21 0 p STARTED    54 174.6kb 172.18.0.2 93d0236d0594
security-auditlog-2022.06.21 0 r UNASSIGNED                       
search_fun_test              0 p STARTED     1   5.4kb 172.18.0.2 93d0236d0594
search_fun_test              0 r UNASSIGNED                       



In [4]:
print(client.cat.nodes())

172.18.0.2 4 98 0 0.00 0.02 0.06 dimr * 93d0236d0594



In [5]:
## https://elasticsearch-py.readthedocs.io/en/v7.10.1/api.html?highlight=cat.#elasticsearch.client.CatClient.help

In [6]:
# Create an Index
# Create an index with non-default settings.
index_name = 'search_fun_revisited'
index_body = {
  'settings': {
    'index': {
      'query':{
          'default_field': "body"
      }
    }
  }
}

response = client.indices.create(index_name, body=index_body)
print('\nCreating index:')
print(response)


Creating index:
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'search_fun_revisited'}


In [7]:
# Add our sample document to the index.
docs = [
    {
        "id": "doc_a",
        "title": "Fox and Hounds",
        "body": "The quick red fox jumped over the lazy brown dogs.",
        "price": "5.99",
        "in_stock": True,
        "category": "childrens"},
    {
        "id": "doc_b",
        "title": "Fox wins championship",
        "body": "Wearing all red, the Fox jumped out to a lead in the race over the Dog.",
        "price": "15.13",
        "in_stock": True,
        "category": "sports"},
    {
        "id": "doc_c",
        "title": "Lead Paint Removal",
        "body": "All lead must be removed from the brown and red paint.",
        "price": "150.21",
        "in_stock": False,
        "category": "instructional"},
    {
        "id": "doc_d",
        "title": "The Three Little Pigs Revisted",
        "price": "3.51",
        "in_stock": True,
        "body": "The big, bad wolf huffed and puffed and blew the house down. The end.",
        "category": "childrens"}
]

for doc in docs:
    doc_id = doc["id"]
    print("Indexing {}".format(doc_id))
    response = client.index(
        index=index_name,
        body=doc,
        id=doc_id,
        refresh=True
    )
    print('\n\tResponse:')
    print(response)

# Verify they are in:
print(client.cat.count(index_name, params={"v": "true"}))

Indexing doc_a

	Response:
{'_index': 'search_fun_revisited', '_type': '_doc', '_id': 'doc_a', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}
Indexing doc_b

	Response:
{'_index': 'search_fun_revisited', '_type': '_doc', '_id': 'doc_b', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1}
Indexing doc_c

	Response:
{'_index': 'search_fun_revisited', '_type': '_doc', '_id': 'doc_c', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1}
Indexing doc_d

	Response:
{'_index': 'search_fun_revisited', '_type': '_doc', '_id': 'doc_d', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1}
epoch      timestamp cou

In [8]:
## BULK Indexing
# Create an index with non-default settings.
index_name = 'search_fun_revisited'
# index_body = {
#   'settings': {
#     'index': {
#       'query':{
#           'default_field': "body"
#       }
#     }
#   }
# }

# response = client.indices.create(index_name, body=index_body)
# print('\nCreating index:')
# print(response)

docs = [
    {
        "id": "doc_a",
        "title": "Fox and Hounds",
        "body": "The quick red fox jumped over the lazy brown dogs.",
        "price": "5.99",
        "in_stock": True,
        "category": "childrens"},
    {
        "id": "doc_b",
        "title": "Fox wins championship",
        "body": "Wearing all red, the Fox jumped out to a lead in the race over the Dog.",
        "price": "15.13",
        "in_stock": True,
        "category": "sports"},
    {
        "id": "doc_c",
        "title": "Lead Paint Removal",
        "body": "All lead must be removed from the brown and red paint.",
        "price": "150.21",
        "in_stock": False,
        "category": "instructional"},
    {
        "id": "doc_d",
        "title": "The Three Little Pigs Revisted",
        "price": "3.51",
        "in_stock": True,
        "body": "The big, bad wolf huffed and puffed and blew the house down. The end.",
        "category": "childrens"}
]

for doc in docs:
    doc_id = doc["id"]
    print("Indexing {}".format(doc_id))
    response = client.index(
        index=index_name,
        body=doc,
        id=doc_id,
        refresh=True
    )
    print('\n\tResponse:')
    print(response)

# Verify they are in:
print(client.cat.count(index_name, params={"v": "true"}))

Indexing doc_a

	Response:
{'_index': 'search_fun_revisited', '_type': '_doc', '_id': 'doc_a', '_version': 2, 'result': 'updated', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 4, '_primary_term': 1}
Indexing doc_b

	Response:
{'_index': 'search_fun_revisited', '_type': '_doc', '_id': 'doc_b', '_version': 2, 'result': 'updated', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 5, '_primary_term': 1}
Indexing doc_c

	Response:
{'_index': 'search_fun_revisited', '_type': '_doc', '_id': 'doc_c', '_version': 2, 'result': 'updated', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 6, '_primary_term': 1}
Indexing doc_d

	Response:
{'_index': 'search_fun_revisited', '_type': '_doc', '_id': 'doc_d', '_version': 2, 'result': 'updated', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 7, '_primary_term': 1}
epoch      timestamp cou

In [9]:
print(client.indices.get_mapping(index_name))
## Use GET /search_fun_revisited in DevTools

{'search_fun_revisited': {'mappings': {'properties': {'body': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'category': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'id': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'in_stock': {'type': 'boolean'}, 'price': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'title': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}}}}}


In [10]:
### Analyzers
index_name = 'search_fun_revisited_custom_mappings'
index_body = {
    'settings': {
        'index': {
            'query': {
                'default_field': "body"
            }
        }
    },
    "mappings": {
        "properties": {
            "title": {"type": "text", "analyzer": "english"},
            "body": {"type": "text", "analyzer": "english"},
            "in_stock": {"type": "boolean"},
            "category": {"type": "keyword", "ignore_above": "256"},
            "price": {"type": "float"}
        }
    }
}

client.indices.create(index_name, body=index_body)



for doc in docs:
    doc_id = doc["id"]
    print("Indexing {}".format(doc_id))
    response = client.index(
        index=index_name,
        body=doc,
        id=doc_id,
        refresh=True
    )
    print('\n\tResponse:')
    print(response)

# Check results of these in DevTools
# GET search_fun_revisited/_search?q=body:dogs
# GET search_fun_revisited_custom_mappings/_search?q=body:dogs


Indexing doc_a

	Response:
{'_index': 'search_fun_revisited_custom_mappings', '_type': '_doc', '_id': 'doc_a', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}
Indexing doc_b

	Response:
{'_index': 'search_fun_revisited_custom_mappings', '_type': '_doc', '_id': 'doc_b', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1}
Indexing doc_c

	Response:
{'_index': 'search_fun_revisited_custom_mappings', '_type': '_doc', '_id': 'doc_c', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1}
Indexing doc_d

	Response:
{'_index': 'search_fun_revisited_custom_mappings', '_type': '_doc', '_id': 'doc_d', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 1, 'failed'

In [11]:
### Querying for docs with custom mappings
q = 'dogs'
index_name = 'search_fun_revisited_custom_mappings'
query = {
  'size': 5,
  'query': {
    'multi_match': {
      'query': q,
      'fields': ['title^2', 'body']
    }
  }
}

client.search(
    body = query,
    index = index_name
)


{'took': 42,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 0.71833557,
  'hits': [{'_index': 'search_fun_revisited_custom_mappings',
    '_type': '_doc',
    '_id': 'doc_a',
    '_score': 0.71833557,
    '_source': {'id': 'doc_a',
     'title': 'Fox and Hounds',
     'body': 'The quick red fox jumped over the lazy brown dogs.',
     'price': '5.99',
     'in_stock': True,
     'category': 'childrens'}},
   {'_index': 'search_fun_revisited_custom_mappings',
    '_type': '_doc',
    '_id': 'doc_b',
    '_score': 0.6548753,
    '_source': {'id': 'doc_b',
     'title': 'Fox wins championship',
     'body': 'Wearing all red, the Fox jumped out to a lead in the race over the Dog.',
     'price': '15.13',
     'in_stock': True,
     'category': 'sports'}}]}}

In [12]:
# Phrase Query
q = 'fox dog'
query = {
  'size': 5,
  'query': {
    'match_phrase': {
      'body': {"query": q}
    }
  }
}

client.search(
    body = query,
    index = index_name
)

# Notice that we didn’t return any results! This is due to phrase queries requiring that the tokens fox and dog occur next to each other.

{'took': 15,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [13]:
#try a phrase query with slop - This query says “find all documents where the terms “fox” and “dog” occur within 10 positions of each other.”
q = 'fox dog'
query = {
  'size': 5,
  'query': {
    'match_phrase': {
      'body': {"query": q, "slop":10}
    }
  }
}

client.search(
    body = query,
    index = index_name
)

{'took': 8,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 0.39418244,
  'hits': [{'_index': 'search_fun_revisited_custom_mappings',
    '_type': '_doc',
    '_id': 'doc_a',
    '_score': 0.39418244,
    '_source': {'id': 'doc_a',
     'title': 'Fox and Hounds',
     'body': 'The quick red fox jumped over the lazy brown dogs.',
     'price': '5.99',
     'in_stock': True,
     'category': 'childrens'}},
   {'_index': 'search_fun_revisited_custom_mappings',
    '_type': '_doc',
    '_id': 'doc_b',
    '_score': 0.19532394,
    '_source': {'id': 'doc_b',
     'title': 'Fox wins championship',
     'body': 'Wearing all red, the Fox jumped out to a lead in the race over the Dog.',
     'price': '15.13',
     'in_stock': True,
     'category': 'sports'}}]}}

In [14]:
## Filter and Query
index_name = 'search_fun_revisited_custom_mappings'
query = {
    'size': 5,
    'query': {
        "function_score": {
            "query": {
                "bool": {
                    "must": [
                        {"match_all": {}}
                    ],
                    "filter": [
                        {"term": {"category": "childrens"}}
                    ]
                }
            },
            "field_value_factor": {
                "field": "price",
                "missing": 1
            }
        }
    }
}

client.search(
    body=query,
    index=index_name
)


{'took': 29,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 5.99,
  'hits': [{'_index': 'search_fun_revisited_custom_mappings',
    '_type': '_doc',
    '_id': 'doc_a',
    '_score': 5.99,
    '_source': {'id': 'doc_a',
     'title': 'Fox and Hounds',
     'body': 'The quick red fox jumped over the lazy brown dogs.',
     'price': '5.99',
     'in_stock': True,
     'category': 'childrens'}},
   {'_index': 'search_fun_revisited_custom_mappings',
    '_type': '_doc',
    '_id': 'doc_d',
    '_score': 3.51,
    '_source': {'id': 'doc_d',
     'title': 'The Three Little Pigs Revisted',
     'price': '3.51',
     'in_stock': True,
     'body': 'The big, bad wolf huffed and puffed and blew the house down. The end.',
     'category': 'childrens'}}]}}

In [15]:
# Aggregations
query = {
    'size': 0,
    'query': {
        "match_all": {}
    },
    'aggs': {
        "category": {
            "terms": {
                "field": "category",
                "size": 10,
                "missing": "N/A",
                "min_doc_count": 0
            }
        }
    }
}

response = client.search(
    body=query,
    index=index_name
)
print('\nSearch results:')
print(json.dumps(response, indent=4))


Search results:
{
    "took": 20,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 4,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "category": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "childrens",
                    "doc_count": 2
                },
                {
                    "key": "instructional",
                    "doc_count": 1
                },
                {
                    "key": "sports",
                    "doc_count": 1
                },
                {
                    "key": "N/A",
                    "doc_count": 0
                }
            ]
        }
    }
}


In [17]:
# Aggregate by price
query = {
    'size': 0,
    'query': {
        "match_all": {}
    },
    'aggs': {
        "price": {
            "terms": {
                "field": "price",
                "size": 10,
                "min_doc_count": 0
            }
        }
    }
}

response = client.search(
    body=query,
    index=index_name
)
print('\nSearch results:')
print(json.dumps(response, indent=4))


Search results:
{
    "took": 25,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 4,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "price": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": 3.509999990463257,
                    "doc_count": 1
                },
                {
                    "key": 5.989999771118164,
                    "doc_count": 1
                },
                {
                    "key": 15.130000114440918,
                    "doc_count": 1
                },
                {
                    "key": 150.2100067138672,
                    "doc_count": 1
                }
            ]
        }
    }
}


In [19]:
# Aggregate by price using range
query = {
    'size': 0,
    'query': {
        "match_all": {}
    },
    'aggs': {
        "price": {
            "range": {
                "field": "price",
                "ranges": [
                    {
                        "to": 5
                    },
                    {
                        "from": 5,
                        "to": 20
                    },
                    {
                        "from": 20,
                    }
                ]
            }
        }
    }
}

response = client.search(
body = query,
index = index_name
)
print('\nSearch results:')
print(json.dumps(response, indent=4))


Search results:
{
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 4,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "price": {
            "buckets": [
                {
                    "key": "*-5.0",
                    "to": 5.0,
                    "doc_count": 1
                },
                {
                    "key": "5.0-20.0",
                    "from": 5.0,
                    "to": 20.0,
                    "doc_count": 2
                },
                {
                    "key": "20.0-*",
                    "from": 20.0,
                    "doc_count": 1
                }
            ]
        }
    }
}
