In [1]:
from elasticsearch import Elasticsearch

In [3]:
# Properly specify the scheme (http) in the URL
es = Elasticsearch(["http://localhost:9200"])
es.ping()

True

## Prepare the Data

In [5]:
import pandas as pd

In [12]:
df=pd.read_csv('myntra_products_catalog.csv').loc[:499]
df.head()

Unnamed: 0,ProductID,ProductName,ProductBrand,Gender,Price (INR),NumImages,Description,PrimaryColor
0,10017413,DKNY Unisex Black & Grey Printed Medium Trolle...,DKNY,Unisex,11745,7,"Black and grey printed medium trolley bag, sec...",Black
1,10016283,EthnoVogue Women Beige & Grey Made to Measure ...,EthnoVogue,Women,5810,7,Beige & Grey made to measure kurta with churid...,Beige
2,10009781,SPYKAR Women Pink Alexa Super Skinny Fit High-...,SPYKAR,Women,899,7,Pink coloured wash 5-pocket high-rise cropped ...,Pink
3,10015921,Raymond Men Blue Self-Design Single-Breasted B...,Raymond,Men,5599,5,Blue self-design bandhgala suitBlue self-desig...,Blue
4,10017833,Parx Men Brown & Off-White Slim Fit Printed Ca...,Parx,Men,759,5,"Brown and off-white printed casual shirt, has ...",White


In [15]:
df.isna().value_counts()

ProductID  ProductName  ProductBrand  Gender  Price (INR)  NumImages  Description  PrimaryColor
False      False        False         False   False        False      False        False           500
Name: count, dtype: int64

In [14]:
df.fillna('None',inplace=True)

## Convert the relevant field to vector using BERT Model

In [17]:
from sentence_transformers import SentenceTransformer

# Load https://huggingface.co/sentence-transformers/all-mpnet-base-v2
model = SentenceTransformer("all-mpnet-base-v2")


  from .autonotebook import tqdm as notebook_tqdm
Error while downloading from https://cdn-lfs.hf.co/sentence-transformers/all-mpnet-base-v2/78c0197b6159d92658e319bc1d72e4c73a9a03dd03815e70e555c5ef05615658?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model.safetensors%3B+filename%3D%22model.safetensors%22%3B&Expires=1742427695&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0MjQyNzY5NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9zZW50ZW5jZS10cmFuc2Zvcm1lcnMvYWxsLW1wbmV0LWJhc2UtdjIvNzhjMDE5N2I2MTU5ZDkyNjU4ZTMxOWJjMWQ3MmU0YzczYTlhMDNkZDAzODE1ZTcwZTU1NWM1ZWYwNTYxNTY1OD9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=qmdh1iyie-1ZEGujHRcAV4F4Z73xDS6kbbpS997-XLI9CsFgHbtMHdNj6R-dmMME6OexYzlDoxXjA01ghrgKEMkKjrxs12Hjq9Gs30CwfkDnPeG2sH1IwRFhn43EDkqIOAe4dzXytRMNURlCHJtU334D7gC3AdCJ5sjY-86itixGG7lZaLIgFA1pIbT8a-xLUUxuVvoZ8E05pT8lDn3qDXrAmGBKiigsX8mh-ADj3-dUDGuji3gxhye%7Esf4vQs7gclqIdXHKViRm0l8ZU%7E%7EkVx8VeM-DBEw%7EFqVPA0lXLST1

In [18]:
df['DescriptionVector'] = df['Description'].apply(lambda x: model.encode(x))

In [19]:
df.head()

Unnamed: 0,ProductID,ProductName,ProductBrand,Gender,Price (INR),NumImages,Description,PrimaryColor,DescriptionVector
0,10017413,DKNY Unisex Black & Grey Printed Medium Trolle...,DKNY,Unisex,11745,7,"Black and grey printed medium trolley bag, sec...",Black,"[0.027645748, -0.0026341858, -0.0035884143, 0...."
1,10016283,EthnoVogue Women Beige & Grey Made to Measure ...,EthnoVogue,Women,5810,7,Beige & Grey made to measure kurta with churid...,Beige,"[-0.024660708, -0.028755415, -0.020332461, 0.0..."
2,10009781,SPYKAR Women Pink Alexa Super Skinny Fit High-...,SPYKAR,Women,899,7,Pink coloured wash 5-pocket high-rise cropped ...,Pink,"[-0.046943296, 0.08182791, 0.048335195, -0.000..."
3,10015921,Raymond Men Blue Self-Design Single-Breasted B...,Raymond,Men,5599,5,Blue self-design bandhgala suitBlue self-desig...,Blue,"[-0.015098773, -0.010285493, 0.009487324, -0.0..."
4,10017833,Parx Men Brown & Off-White Slim Fit Printed Ca...,Parx,Men,759,5,"Brown and off-white printed casual shirt, has ...",White,"[-0.017746517, 0.0062095528, 0.02181398, 0.026..."


## Create New Index in Elastic Search

In [42]:
es.ping()

True

In [43]:
indexMapping = {
    "properties": {
        "ProductID": {
            "type": "long"
        },
        "ProductName": {
            "type": "text"
        },
        "ProductBrand": {
            "type": "text"
        },
        "Gender": {
            "type": "text"
        },
        "Price (INR)": {
            "type": "long"
        },
        "NumImages": {
            "type": "long"
        },
        "Description": {
            "type": "text"
        },
        "PrimaryColor": {
            "type": "text"
        },
        "DescriptionVector": {
            "type": "dense_vector",
            "dims": 768,
            "index": True,
            "similarity": "cosine"  # Lowercase l2_norm
        }
    }
}

In [46]:

es.indices.create(index='all_products', mappings=indexMapping)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'all_products'})

## Ingest the data into index

In [47]:
record_list = df.to_dict("records")

In [48]:
record_list[0]

{'ProductID': 10017413,
 'ProductName': 'DKNY Unisex Black & Grey Printed Medium Trolley Bag',
 'ProductBrand': 'DKNY',
 'Gender': 'Unisex',
 'Price (INR)': 11745,
 'NumImages': 7,
 'Description': 'Black and grey printed medium trolley bag, secured with a TSA lockOne handle on the top and one on the side, has a trolley with a retractable handle on the top and four corner mounted inline skate wheelsOne main zip compartment, zip lining, two compression straps with click clasps, one zip compartment on the flap with three zip pocketsWarranty: 5 yearsWarranty provided by Brand Owner / Manufacturer',
 'PrimaryColor': ' Black',
 'DescriptionVector': array([ 2.76457481e-02, -2.63418583e-03, -3.58841429e-03,  5.13588004e-02,
         3.09660304e-02,  1.40506821e-02,  7.27051590e-03,  3.13871652e-02,
        -6.23787642e-02, -3.82879982e-03,  3.15214023e-02,  7.55473524e-02,
         2.12641433e-03,  4.64894399e-02,  5.07449098e-02, -1.71941835e-02,
         1.22892270e-02, -1.95682421e-02, -9.6

In [49]:
for record in record_list:
    try:
        es.index(index='all_products', document=record, id=record['ProductID'])
    except Exception as e:
        print(e)

In [50]:
# Check if Elasticsearch is responding
es.ping()

True

In [51]:
es.count(index="all_products")

ObjectApiResponse({'count': 500, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

In [45]:
try:
    result = es.indices.delete(index="all_products")
    print("Index deleted successfully:", result)
except Exception as e:
    print("Error deleting index:", e)

Index deleted successfully: {'acknowledged': True}


In [55]:
input = "Exotic"
vector_of_input = model.encode(input)
query={
    "field": "DescriptionVector",
    "query_vector": vector_of_input,
    "k": 3,
    "num_candidates": 500
}
res=es.knn_search(index='all_products',knn=query,source=["ProductID","ProductName","ProductBrand",
"Price (INR)","Description"])

res["hits"]["hits"]


  res=es.knn_search(index='all_products',knn=query,source=["ProductID","ProductName","ProductBrand",
  res=es.knn_search(index='all_products',knn=query,source=["ProductID","ProductName","ProductBrand",


[{'_index': 'all_products',
  '_id': '10001265',
  '_score': 0.6777725,
  '_source': {'ProductID': 10001265,
   'ProductName': 'Michael Kors Women Sexy Amber Eau de Parfum 100ml',
   'ProductBrand': 'Michael Kors',
   'Price (INR)': 7920,
   'Description': 'Michael Kors Sexy Amber Eau de ParfumFragrance Notes:Top notes - amberHeart notes - sandalwoodBase - white flowers'}},
 {'_index': 'all_products',
  '_id': '1000718',
  '_score': 0.66698647,
  '_source': {'ProductID': 1000718,
   'ProductName': 'ahilya Imperial Filigree Collection Gold-Plated Sterling Silver Drop Earrings',
   'ProductBrand': 'ahilya',
   'Price (INR)': 6120,
   'Description': 'A pair of gold-plated sterling silver drop earrings, has a stylised dangler with cut-out detail, a faceted beaded dropSecured with a fish-hook ear wire'}},
 {'_index': 'all_products',
  '_id': '1000717',
  '_score': 0.66692495,
  '_source': {'ProductID': 1000717,
   'ProductName': 'ahilya Maroon Gold-Plated Sterling Silver Hoop Earrings',
   