In [1]:
from opensearchpy import OpenSearch
host = 'localhost'
port = 9200
auth = ('admin', 'admin')  # For testing only. Don't store credentials in code.

# Create the client with SSL/TLS enabled, but hostname and certificate verification disabled.
client = OpenSearch(
    hosts=[{'host': host, 'port': port}],
    http_compress=True,  # enables gzip compression for request bodies
    http_auth=auth,
    # client_cert = client_cert_path,
    # client_key = client_key_path,
    use_ssl=True,
    verify_certs=False,
    ssl_assert_hostname=False,
    ssl_show_warn=False,
)

In [2]:
# Do a few checks before we start indexing:
print(client.cat.health())
print(client.cat.indices())

1695520014 01:46:54 docker-cluster yellow 1 1 true 11 11 0 0 6 0 - 64.7%

yellow open .plugins-ml-config                        gpaVzMC9Rm-ixr8sC2AMOA 1 1     1 0   3.9kb   3.9kb
green  open .opensearch-observability                 Bfrp1mOqQYanRKHFWrbByA 1 0     0 0    208b    208b
green  open .kibana_92668751_admin_1                  bruHV_IvRymvc8IduVDd_Q 1 0     1 0   5.1kb   5.1kb
yellow open security-auditlog-2023.09.24              BxumpqiRREiQ_i137LxLWg 1 1   232 0 360.8kb 360.8kb
green  open opensearch_dashboards_sample_data_flights 3ipTpd0SSQ-xhjnIQiaIdQ 1 0 13059 0   5.8mb   5.8mb
yellow open search_fun_test                           aPRLpy-eRzG185DHcTQLhg 1 1     4 0   6.9kb   6.9kb
green  open .kibana_1                                 fVVE40BkSEqQvTlA5A1Tng 1 0    61 0    37kb    37kb
green  open .opendistro_security                      88369CGsQ3CShUM32lIe4w 1 0    10 0  75.6kb  75.6kb
yellow open search_fun_revisited                      3qiiDzwgT1GnwjPa-FLV3A 1 1     4

In [3]:
# If you still have your documents from the Dev Tools test, we should be able to check them here:
try:
    print(client.cat.count("search_fun_test", params={"v": "true"}))
except:
    print("search_fun_test doesn't exist, that's OK")

epoch      timestamp count
1695520114 01:48:34  4



In [11]:
import json
def print_json(x):
    print(json.dumps(x, indent=2))

### Create an index with non-default settings.

In [None]:
index_name = 'search_fun_revisited'
index_body = {
  'settings': {
    'index': {
      'query':{
          'default_field': "body"
      }
    }
  }
}

response = client.indices.create(index_name, body=index_body)
print('\nCreating index:')
print_json(response)


### Add our sample document to the index.

In [6]:
docs = [
    {
        "id": "doc_a",
        "title": "Fox and Hounds",
        "body": "The quick red fox jumped over the lazy brown dogs.",
        "price": "5.99",
        "in_stock": True,
        "category": "childrens"},
    {
        "id": "doc_b",
        "title": "Fox wins championship",
        "body": "Wearing all red, the Fox jumped out to a lead in the race over the Dog.",
        "price": "15.13",
        "in_stock": True,
        "category": "sports"},
    {
        "id": "doc_c",
        "title": "Lead Paint Removal",
        "body": "All lead must be removed from the brown and red paint.",
        "price": "150.21",
        "in_stock": False,
        "category": "instructional"},
    {
        "id": "doc_d",
        "title": "The Three Little Pigs Revisted",
        "price": "3.51",
        "in_stock": True,
        "body": "The big, bad wolf huffed and puffed and blew the house down. The end.",
        "category": "childrens"}
]
import pandas as pd
pd.DataFrame(docs)

Unnamed: 0,id,title,body,price,in_stock,category
0,doc_a,Fox and Hounds,The quick red fox jumped over the lazy brown d...,5.99,True,childrens
1,doc_b,Fox wins championship,"Wearing all red, the Fox jumped out to a lead ...",15.13,True,sports
2,doc_c,Lead Paint Removal,All lead must be removed from the brown and re...,150.21,False,instructional
3,doc_d,The Three Little Pigs Revisted,"The big, bad wolf huffed and puffed and blew t...",3.51,True,childrens


In [None]:
for doc in docs:
    doc_id = doc["id"]
    print("Indexing {}".format(doc_id))
    response = client.index(
        index=index_name,
        body=doc,
        id=doc_id,
        refresh=True
    )
    print('\n\tResponse:')
    print_json(response)

In [4]:
index_name = 'search_fun_bulk'
index_body = {
    'settings': {
        'index': {
            'query': {
                'default_field': "body"
            }
        }
    }
}

client.indices.create(index_name, body=index_body)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'search_fun_bulk'}

In [7]:
docs_bulk = docs
for doc in docs_bulk:
    doc["_index"] = index_name
pd.DataFrame(docs_bulk)

Unnamed: 0,id,title,body,price,in_stock,category,_index
0,doc_a,Fox and Hounds,The quick red fox jumped over the lazy brown d...,5.99,True,childrens,search_fun_bulk
1,doc_b,Fox wins championship,"Wearing all red, the Fox jumped out to a lead ...",15.13,True,sports,search_fun_bulk
2,doc_c,Lead Paint Removal,All lead must be removed from the brown and re...,150.21,False,instructional,search_fun_bulk
3,doc_d,The Three Little Pigs Revisted,"The big, bad wolf huffed and puffed and blew t...",3.51,True,childrens,search_fun_bulk


In [8]:
from opensearchpy.helpers import bulk
bulk(client, docs)

print(client.cat.count(index_name, params={"v": "true"}))

epoch      timestamp count
1695520274 01:51:14  0



In [12]:
print_json(client.indices.get_mapping(index_name))

{
  "search_fun_bulk": {
    "mappings": {
      "properties": {
        "body": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "category": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "id": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "in_stock": {
          "type": "boolean"
        },
        "price": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "title": {
          "type": "text",
          "fields": {
            "keyword": {