In [None]:
from elasticsearch import Elasticsearch
import os

ELASTIC_USERNAME = "elastic"
ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD")

es = Elasticsearch(
    "https://localhost:9200",
    basic_auth=(ELASTIC_USERNAME, ELASTIC_PASSWORD),
    verify_certs=False  # local self-signed cert
)

# Test connection
if es.ping():
    print("✅ Connected to Elasticsearch")
else:
    print("❌ Connection failed")

✅ Connected to Elasticsearch




## creating index

In [4]:

INDEX_NAME = "products_hybridd"

mapping = {
    "mappings": {
        "properties": {
            "product_id": {"type": "keyword"},

            # UI
            "image_url": {"type": "keyword"},
            "product_url": {"type": "keyword"},

            # Keyword Search (BM25)
            "title": {"type": "text"},
            "product_details": {"type": "text"},

            # Filters
            "brand": {"type": "keyword"},
            "category": {"type": "keyword"},
            "colour": {"type": "keyword"},
            "size": {"type": "keyword"},
            "competitor": {"type": "keyword"},

            "selling_price": {"type": "float"},
            "mrp": {"type": "float"},
            "star_rating": {"type": "float"},

            # Vector Search
            "embedding": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            }
        }
    }
}

if not es.indices.exists(index=INDEX_NAME):
    es.indices.create(index=INDEX_NAME, body=mapping)
    print("✅ Index created")
else:
    print("⚠️ Index already exists")


⚠️ Index already exists




## create embedding

In [5]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")

def embed_text(text: str):
    return model.encode(text).tolist()


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
## light preprocessing only for embedding: 

import re

def preprocess(text: str) -> str:
    if not text:
        return ""
    text = text.lower()
    text = re.sub(r"[^a-z0-9\s]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text


In [7]:
## read data from postgresql: 

import psycopg2
from psycopg2.extras import execute_values


conn = psycopg2.connect(
    host="127.0.0.1",
    port=5432,
    dbname="ecommerce_db",
    user="postgres",      # or admin
    password="postgres"
)

cur = conn.cursor()



In [8]:
cur.execute("""
    SELECT
        product_id,
        title,
        product_details,
        brand,
        category,
        colour,
        size,
        competitor,
        selling_price,
        mrp,
        star_rating,
        image_url,
        product_url
    FROM products
""")

rows = cur.fetchall()


In [9]:
len(rows)

100

In [10]:
import math

def safe(v):
    if v is None:
        return None
    if isinstance(v, float) and math.isnan(v):
        return None
    return v


for row in rows:
    (
        product_id, title, product_details, brand, category, colour,
        size, competitor, selling_price, mrp, star_rating,
        image_url, product_url
    ) = row

    embedding_text = preprocess(
        f"{safe(title)} {safe(product_details)} "
        f"Brand {safe(brand)} Category {safe(category)} Colour {safe(colour)}"
    )

    doc = {
        "product_id": product_id,

        "title": safe(title),
        "product_details": safe(product_details),

        "brand": safe(brand),
        "category": safe(category),
        "colour": safe(colour),
        "size": safe(size),
        "competitor": safe(competitor),

        "selling_price": safe(selling_price),
        "mrp": safe(mrp),
        "star_rating": safe(star_rating),

        "image_url": safe(image_url),
        "product_url": safe(product_url),

        "embedding": embed_text(embedding_text)
    }

    es.index(index=INDEX_NAME, id=product_id, document=doc)

print("✅ Indexing completed")




✅ Indexing completed


In [17]:
# HYbrid search query


def hybrid_search(
    query: str,
    max_price: float | None = None,
    brand: str | None = None
):
    query_vector = embed_text(query)

    filters = []
    if max_price:
        filters.append({"range": {"selling_price": {"lte": max_price}}})
    if brand:
        filters.append({"term": {"brand": brand}})

    body = {
        "size": 10,
        "query": {
            "bool": {
                "must": [
                    {
                        "multi_match": {
                            "query": query,
                            "fields": ["title", "product_details"]
                        }
                    }
                ],
                "filter": filters,
                "should": [
                    {
                        "knn": {
                            "field": "embedding",
                            "query_vector": query_vector,
                            "k": 10,
                            "num_candidates": 100
                        }
                    }
                ]
            }
        }
    }

    return es.search(index=INDEX_NAME, body=body)


In [28]:
# hybrid search manually test
res = hybrid_search(
    query="Jeans for women",
    max_price=300
)

for hit in res["hits"]["hits"]:
    print(f"title: {hit["_source"]["title"]}, Relevency Score: {hit["_score"]}, Selling Prize: {hit["_source"]['selling_price']}, MRP: {hit["_source"]["mrp"]}")


title: Slim Fit Mid-Wash Jeans, Relevency Score: 4.8021936, Selling Prize: 0.0, MRP: 0.0
title: Heartloom Two-Tone Rayon Plain / Solid Calf Length Front Slit Kurti for Women Set Of 4, Iconic- musterd, Relevency Score: 3.5719771, Selling Prize: 290.0, MRP: 449.0
title: Rich Skin Blended Georgette Floral Print Printed Saree (SRIDEVI) Set Of 4, Relevency Score: 3.4113917, Selling Prize: 192.0, MRP: 499.0
title: Goo On Cotton Round Neck Printed T-Shirt for Men, Relevency Score: 2.8270812, Selling Prize: 165.0, MRP: 999.0
title: Mark 484, Relevency Score: 2.7547934, Selling Prize: 223.0, MRP: 349.0
title: Mega Steps 163, Relevency Score: 2.7071767, Selling Prize: 215.0, MRP: 499.0
title: Zulements 100% Bio Washed Cotton Round Neck Message / Quote / Text Print T-Shirt for Men, Relevency Score: 2.3897266, Selling Prize: 0.0, MRP: 0.0
title: Dark Thread Heavy Laffer Twill Casual Wear Big Checks Shirt for Men Set Of 12, 6205, Relevency Score: 2.3272262, Selling Prize: 0.0, MRP: 0.0
title: HRG 1



In [13]:
res['hits']['hits'][0]["_source"]

{'product_id': '000e8541-cb5f-41b1-89ce-f8ac5944ecf3',
 'title': 'Twill Checks Casual Shirts',
 'product_details': '{"Fit":"Regular Fit","With Box":"No","Product Name":"Casual Shirts","Sleeve":"Full-length"}',
 'brand': 'NEVERDOWN',
 'category': 'Shirts',
 'colour': 'None',
 'size': None,
 'competitor': 'Ajio_b',
 'selling_price': 341.0,
 'mrp': 920.0,
 'star_rating': 0.0,
 'image_url': '~^https://assets.ajio.com/b2b/medias/sys_master/root/h2a/h9b/62005776973854/f5286ee8-1c0d-450b-8f8f-e61f9775960c-1080Wx1353H.jpg~^https://assets.ajio.com/b2b/medias/sys_master/root/h24/hd8/62005776580638/f5286ee8-1c0d-450b-8f8f-e61f9775960c-777Wx973H.jpg~^https://assets.ajio.com/b2b/medias/sys_master/root/h24/h57/62005775859742/f5286ee8-1c0d-450b-8f8f-e61f9775960c-239Wx300H.jpg~^https://assets.ajio.com/b2b/medias/sys_master/root/h9e/hde/62005779726366/1a7101bc-2418-42ce-b54c-1dc5f112b108-1080Wx1353H.jpg~^https://assets.ajio.com/b2b/medias/sys_master/root/h3c/h59/62005777825822/1a7101bc-2418-42ce-b54c-1