In [None]:
from elasticsearch import Elasticsearch
import os

ELASTIC_USERNAME = "elastic"
ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD")

es = Elasticsearch(
    "https://localhost:9200",
    basic_auth=(ELASTIC_USERNAME, ELASTIC_PASSWORD),
    verify_certs=False  # local self-signed cert
)

# Test connection
if es.ping():
    print("‚úÖ Connected to Elasticsearch")
else:
    print("‚ùå Connection failed")


‚úÖ Connected to Elasticsearch


  _transport = transport_class(


In [2]:
info = es.info()
print(info["cluster_name"])
print(info["version"]["number"])


elasticsearch
9.2.4




# create index

In [3]:
index_name = "dummy_products"

mapping = {
    "mappings": {
        "properties": {
            "product_id": {"type": "keyword"},
            "name": {
                "type": "text",
                "fields": {
                    "keyword": {"type": "keyword"}
                }
            },
            "description": {"type": "text"},
            "category": {"type": "keyword"},
            "price": {"type": "float"},
            "rating": {"type": "float"},
            "in_stock": {"type": "boolean"}
        }
    }
}

if not es.indices.exists(index=index_name):
    es.indices.create(index=index_name, body=mapping)
    print("‚úÖ Index created")
else:
    print("‚ÑπÔ∏è Index already exists")




‚úÖ Index created


In [4]:
doc = {
    "product_id": "P100",
    "name": "Apple iPhone 14",
    "description": "Smartphone with A15 chip",
    "category": "electronics",
    "price": 69999,
    "rating": 4.6,
    "in_stock": True
}

es.index(index=index_name, document=doc)
print("‚úÖ Product inserted")


‚úÖ Product inserted




In [5]:
# Bulk insert (recommended for datasets)


from elasticsearch.helpers import bulk

products = [
    {
        "_index": index_name,
        "_source": {
            "product_id": "P101",
            "name": "Samsung Galaxy S23",
            "description": "Android phone with AMOLED display",
            "category": "electronics",
            "price": 74999,
            "rating": 4.5,
            "in_stock": True
        }
    },
    {
        "_index": index_name,
        "_source": {
            "product_id": "P102",
            "name": "Nike Running Shoes",
            "description": "Comfortable running shoes for men",
            "category": "fashion",
            "price": 3999,
            "rating": 4.2,
            "in_stock": False
        }
    }
]

bulk(es, products)
print("‚úÖ Bulk insert done")


‚úÖ Bulk insert done




In [6]:
#  Basic keyword search (MATCH)

query = {
    "query": {
        "match": {
            "name": "iphone"
        }
    }
}

res = es.search(index=index_name, body=query)

for hit in res["hits"]["hits"]:
    print(hit["_source"])




{'product_id': 'P100', 'name': 'Apple iPhone 14', 'description': 'Smartphone with A15 chip', 'category': 'electronics', 'price': 69999, 'rating': 4.6, 'in_stock': True}


In [7]:
res

ObjectApiResponse({'took': 333, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 0.9808291, 'hits': [{'_index': 'dummy_products', '_id': '2fIA35sBnJip5Pfx8stz', '_score': 0.9808291, '_source': {'product_id': 'P100', 'name': 'Apple iPhone 14', 'description': 'Smartphone with A15 chip', 'category': 'electronics', 'price': 69999, 'rating': 4.6, 'in_stock': True}}]}})

In [12]:
res['hits']

{'total': {'value': 1, 'relation': 'eq'},
 'max_score': 0.9808291,
 'hits': [{'_index': 'dummy_products',
   '_id': '2fIA35sBnJip5Pfx8stz',
   '_score': 0.9808291,
   '_source': {'product_id': 'P100',
    'name': 'Apple iPhone 14',
    'description': 'Smartphone with A15 chip',
    'category': 'electronics',
    'price': 69999,
    'rating': 4.6,
    'in_stock': True}}]}

In [10]:
res['hits']['total']

{'value': 1, 'relation': 'eq'}

In [13]:
res['hits']['max_score']

0.9808291

In [16]:
res['hits']['hits'][0]['_source']

{'product_id': 'P100',
 'name': 'Apple iPhone 14',
 'description': 'Smartphone with A15 chip',
 'category': 'electronics',
 'price': 69999,
 'rating': 4.6,
 'in_stock': True}

In [18]:
# Full e-commerce style search (FILTER + SEARCH)


query = {
    "query": {
        "bool": {
            "must": [
                {"match": {"description": "phone"}}
            ],
            "filter": [
                {"term": {"category": "electronics"}},
                {"range": {"price": {"lte": 75000}}},
                {"term": {"in_stock": True}}
            ]
        }
    }
}

res = es.search(index=index_name, body=query)

for hit in res["hits"]["hits"]:
    print(hit["_source"]["name"], hit["_score"])


Samsung Galaxy S23 0.9529822




In [19]:
# Sort results (price / rating)


query = {
    "query": {"match_all": {}},
    "sort": [
        {"price": {"order": "asc"}}
    ]
}

res = es.search(index=index_name, body=query)

for hit in res["hits"]["hits"]:
    print(hit["_source"]["name"], hit["_source"]["price"])


Nike Running Shoes 3999
Apple iPhone 14 69999
Samsung Galaxy S23 74999




In [23]:
# Pagination (VERY IMPORTANT)


query = {
    "from": 0,
    "size": 2,
    "query": {"match_all": {}}
}

res = es.search(index=index_name, body=query)




In [27]:
# Delete / update documents


# es.update(
#     index=index_name,
#     id="P100",
#     doc={"rating": 4.8}
# )




In [None]:
# es.delete(index=index_name, id="P100")


# this will give error because at the time of inserting we did not provide the the id .. that's why it is using autogenerated id 

In [30]:
res = es.search(
    index=index_name,
    body={
        "query": {"match_all": {}}
    }
)

for hit in res["hits"]["hits"]:
    print("ID:", hit["_id"], "‚Üí", hit["_source"]["name"])


ID: 2fIA35sBnJip5Pfx8stz ‚Üí Apple iPhone 14
ID: 2vIC35sBnJip5Pfx58vh ‚Üí Samsung Galaxy S23
ID: 2_IC35sBnJip5Pfx58vi ‚Üí Nike Running Shoes




## hybrid ...

In [32]:
# creating hybrid index

In [33]:
from elasticsearch import Elasticsearch

es = Elasticsearch(
    "https://localhost:9200",
    basic_auth=("elastic", "Ch*nTnE+YqS+Rq9q-eq3"),
    verify_certs=False
)

index_name = "hybrid_products"

mapping = {
    "mappings": {
        "properties": {
            "product_id": {"type": "keyword"},
            "name": {"type": "text"},
            "description": {"type": "text"},
            "category": {"type": "keyword"},
            "price": {"type": "float"},
            "embedding": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            }
        }
    }
}

if es.indices.exists(index=index_name):
    es.indices.delete(index=index_name)

es.indices.create(index=index_name, body=mapping)
print("‚úÖ Hybrid index created")


  _transport = transport_class(


‚úÖ Hybrid index created


In [34]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")


  from .autonotebook import tqdm as notebook_tqdm


In [35]:
products = [
    {
        "product_id": "P1",
        "name": "Apple iPhone 14",
        "description": "Smartphone with excellent camera and performance",
        "category": "electronics",
        "price": 69999
    },
    {
        "product_id": "P2",
        "name": "Samsung Galaxy S23",
        "description": "Android phone with AMOLED display",
        "category": "electronics",
        "price": 74999
    },
    {
        "product_id": "P3",
        "name": "Nike Running Shoes",
        "description": "Comfortable sports shoes for running and jogging",
        "category": "fashion",
        "price": 3999
    },
    {
        "product_id": "P4",
        "name": "Sony WH-1000XM5 Headphones",
        "description": "Noise cancelling wireless headphones with premium sound",
        "category": "electronics",
        "price": 29999
    },
    {
        "product_id": "P5",
        "name": "Dell XPS 13 Laptop",
        "description": "Lightweight laptop with powerful performance",
        "category": "electronics",
        "price": 119999
    },
    {
        "product_id": "P6",
        "name": "Adidas Sports T-Shirt",
        "description": "Breathable and lightweight sportswear",
        "category": "fashion",
        "price": 1499
    },
    {
        "product_id": "P7",
        "name": "Puma Track Pants",
        "description": "Comfortable track pants for workouts",
        "category": "fashion",
        "price": 2499
    },
    {
        "product_id": "P8",
        "name": "Apple Watch Series 9",
        "description": "Smartwatch with health tracking features",
        "category": "electronics",
        "price": 41999
    },
    {
        "product_id": "P9",
        "name": "Logitech MX Master 3 Mouse",
        "description": "Advanced wireless mouse for productivity",
        "category": "electronics",
        "price": 9999
    },
    {
        "product_id": "P10",
        "name": "Amazon Echo Dot",
        "description": "Smart speaker with Alexa voice assistant",
        "category": "electronics",
        "price": 4499
    },
    {
        "product_id": "P11",
        "name": "Levi's Men's Jeans",
        "description": "Classic denim jeans with modern fit",
        "category": "fashion",
        "price": 3299
    },
    {
        "product_id": "P12",
        "name": "Canon EOS 1500D DSLR",
        "description": "DSLR camera for photography beginners",
        "category": "electronics",
        "price": 36999
    },
    {
        "product_id": "P13",
        "name": "Philips Air Fryer",
        "description": "Healthy cooking appliance with rapid air technology",
        "category": "home_appliances",
        "price": 10999
    },
    {
        "product_id": "P14",
        "name": "Prestige Induction Cooktop",
        "description": "Energy efficient induction cooktop",
        "category": "home_appliances",
        "price": 3499
    },
    {
        "product_id": "P15",
        "name": "Wooden Study Table",
        "description": "Compact wooden table for home office",
        "category": "furniture",
        "price": 8999
    },
    {
        "product_id": "P16",
        "name": "Office Ergonomic Chair",
        "description": "Comfortable chair with lumbar support",
        "category": "furniture",
        "price": 12999
    },
    {
        "product_id": "P17",
        "name": "Boat Rockerz Bluetooth Speaker",
        "description": "Portable speaker with deep bass",
        "category": "electronics",
        "price": 1999
    },
    {
        "product_id": "P18",
        "name": "HP Wireless Keyboard and Mouse",
        "description": "Wireless keyboard and mouse combo",
        "category": "electronics",
        "price": 2499
    },
    {
        "product_id": "P19",
        "name": "Ray-Ban Aviator Sunglasses",
        "description": "Stylish sunglasses with UV protection",
        "category": "fashion",
        "price": 8999
    },
    {
        "product_id": "P20",
        "name": "Mi 108 cm Smart TV",
        "description": "4K Android smart TV with Dolby Vision",
        "category": "electronics",
        "price": 42999
    }
]


In [36]:
for p in products:
    text = p["name"] + " " + p["description"]
    embedding = model.encode(text).tolist()

    p["embedding"] = embedding

    es.index(
        index=index_name,
        id=p["product_id"],
        document=p
    )

print("‚úÖ Products indexed with embeddings")



‚úÖ Products indexed with embeddings




In [48]:
# test sementic only

query_text = "running shoes"
query_vector = model.encode(query_text).tolist()

vector_query = {
    "knn": {
        "field": "embedding",
        "query_vector": query_vector,
        "k": 3,
        "num_candidates": 10
    }
}

res = es.search(index=index_name, body=vector_query)

print("üîπ Semantic search results:")
for hit in res["hits"]["hits"]:
    print(hit["_source"]["name"], hit["_score"])


üîπ Semantic search results:
Nike Running Shoes 0.88111496
Puma Track Pants 0.7068163
Adidas Sports T-Shirt 0.6770781




In [None]:
# test keyword only

keyword_query = {
    "query": {
        "match": {
            "description": "running shoes"
        }
    }
}

res = es.search(index=index_name, body=keyword_query)

print("üîπ Keyword search results:")
for hit in res["hits"]["hits"]:
    print(hit["_source"]["name"], hit["_score"])


üîπ Keyword search results:
Nike Running Shoes 2.3741708




In [49]:
query_text = "running shoes"
query_vector = model.encode(query_text).tolist()


hybrid_query = {
    "query": {
        "bool": {
            "must": [
                {
                    "match": {
                        "description": "phone"
                    }
                }
            ]
        }
    },
    "knn": {
        "field": "embedding",
        "query_vector": query_vector,
        "k": 3,
        "num_candidates": 10
    }
}

res = es.search(index=index_name, body=hybrid_query)

print("üî• Hybrid search results:")
for hit in res["hits"]["hits"]:
    print(hit["_source"]["name"], hit["_score"])


üî• Hybrid search results:
Samsung Galaxy S23 2.7409952
Nike Running Shoes 0.88111496
Puma Track Pants 0.7068163
Adidas Sports T-Shirt 0.6770781




In [54]:
user_query="give me nike shoes"

user_query_vector = model.encode(user_query).tolist()


keyword_query = {
    "match": {
        "description": user_query
    }
}



hybrid_query = {
    "size": 5,
    "query": {
        "bool": {
            "must": [
                keyword_query
            ]
        }
    },
    "knn": {
        "field": "embedding",
        "query_vector": query_vector,
        "k": 2,
        "num_candidates": 20
    }
}


res = es.search(index=index_name, body=hybrid_query)

print("üî• Hybrid search results:")
for hit in res["hits"]["hits"]:
    print(hit["_source"]["name"], hit["_score"])

üî• Hybrid search results:
Nike Running Shoes 2.3741708
Apple iPhone 14 0.6862087
Samsung Galaxy S23 0.6237334


