In [2]:
import pandas as pd

df=pd.read_csv("myntra_products_catalog.csv").loc[:499]

In [3]:
df.to_csv("small_dataset.csv",index=False)

In [4]:
df.head()

Unnamed: 0,ProductID,ProductName,ProductBrand,Gender,Price (INR),NumImages,Description,PrimaryColor
0,10017413,DKNY Unisex Black & Grey Printed Medium Trolle...,DKNY,Unisex,11745,7,"Black and grey printed medium trolley bag, sec...",Black
1,10016283,EthnoVogue Women Beige & Grey Made to Measure ...,EthnoVogue,Women,5810,7,Beige & Grey made to measure kurta with churid...,Beige
2,10009781,SPYKAR Women Pink Alexa Super Skinny Fit High-...,SPYKAR,Women,899,7,Pink coloured wash 5-pocket high-rise cropped ...,Pink
3,10015921,Raymond Men Blue Self-Design Single-Breasted B...,Raymond,Men,5599,5,Blue self-design bandhgala suitBlue self-desig...,Blue
4,10017833,Parx Men Brown & Off-White Slim Fit Printed Ca...,Parx,Men,759,5,"Brown and off-white printed casual shirt, has ...",White


In [5]:
df.columns

Index(['ProductID', 'ProductName', 'ProductBrand', 'Gender', 'Price (INR)',
       'NumImages', 'Description', 'PrimaryColor'],
      dtype='object')

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   ProductID     500 non-null    int64 
 1   ProductName   500 non-null    object
 2   ProductBrand  500 non-null    object
 3   Gender        500 non-null    object
 4   Price (INR)   500 non-null    int64 
 5   NumImages     500 non-null    int64 
 6   Description   500 non-null    object
 7   PrimaryColor  468 non-null    object
dtypes: int64(3), object(5)
memory usage: 31.4+ KB


In [7]:
df.isna().value_counts()

ProductID  ProductName  ProductBrand  Gender  Price (INR)  NumImages  Description  PrimaryColor
False      False        False         False   False        False      False        False           468
                                                                                   True             32
Name: count, dtype: int64

In [8]:
df.dropna(inplace=True)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 468 entries, 0 to 499
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   ProductID     468 non-null    int64 
 1   ProductName   468 non-null    object
 2   ProductBrand  468 non-null    object
 3   Gender        468 non-null    object
 4   Price (INR)   468 non-null    int64 
 5   NumImages     468 non-null    int64 
 6   Description   468 non-null    object
 7   PrimaryColor  468 non-null    object
dtypes: int64(3), object(5)
memory usage: 32.9+ KB


In [10]:
df["Context"] = df["ProductName"] +" :\n\n" +df["Description"]

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 468 entries, 0 to 499
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   ProductID     468 non-null    int64 
 1   ProductName   468 non-null    object
 2   ProductBrand  468 non-null    object
 3   Gender        468 non-null    object
 4   Price (INR)   468 non-null    int64 
 5   NumImages     468 non-null    int64 
 6   Description   468 non-null    object
 7   PrimaryColor  468 non-null    object
 8   Context       468 non-null    object
dtypes: int64(3), object(6)
memory usage: 36.6+ KB


In [12]:
from langchain.schema import Document

def df_to_langchain_documents(df):
    documents = []
    
    for index, row in df.iterrows():
        page_content = row["Context"]
        
        metadata = row.drop(labels=["Context"]).to_dict()
        
        documents.append(Document(page_content=page_content, metadata=metadata))
    
    return documents


In [13]:
elastic_content=df_to_langchain_documents(df)

In [14]:
elastic_content[0].metadata

{'ProductID': 10017413,
 'ProductName': 'DKNY Unisex Black & Grey Printed Medium Trolley Bag',
 'ProductBrand': 'DKNY',
 'Gender': 'Unisex',
 'Price (INR)': 11745,
 'NumImages': 7,
 'Description': 'Black and grey printed medium trolley bag, secured with a TSA lockOne handle on the top and one on the side, has a trolley with a retractable handle on the top and four corner mounted inline skate wheelsOne main zip compartment, zip lining, two compression straps with click clasps, one zip compartment on the flap with three zip pocketsWarranty: 5 yearsWarranty provided by Brand Owner / Manufacturer',
 'PrimaryColor': ' Black'}

In [15]:
elastic_content[0].page_content

'DKNY Unisex Black & Grey Printed Medium Trolley Bag :\n\nBlack and grey printed medium trolley bag, secured with a TSA lockOne handle on the top and one on the side, has a trolley with a retractable handle on the top and four corner mounted inline skate wheelsOne main zip compartment, zip lining, two compression straps with click clasps, one zip compartment on the flap with three zip pocketsWarranty: 5 yearsWarranty provided by Brand Owner / Manufacturer'

In [16]:
import os
from elasticsearch import Elasticsearch
client=Elasticsearch(hosts=os.environ["ELASTIC_CLOUD_ID"],api_key=os.environ["ELASTIC_API_KEY"], timeout=60)

  client=Elasticsearch(hosts=os.environ["ELASTIC_CLOUD_ID"],api_key=os.environ["ELASTIC_API_KEY"], timeout=60)


In [17]:
from dotenv import load_dotenv

load_dotenv()
from langchain_elasticsearch import ElasticsearchStore
from langchain_openai import OpenAIEmbeddings


embeddings = OpenAIEmbeddings()


# vectorstore = ElasticsearchStore.from_documents(
#     elastic_content,
#     embeddings,
#     index_name="myntra_products_catalog-small[468]",
#     es_connection=client,)

In [18]:
index_name="myntra_products_catalog-small[468]"

In [19]:
vectorstore=ElasticsearchStore(index_name=index_name,embedding=embeddings,es_connection=client)

In [20]:
from typing import Any, Dict, Iterable
from langchain_elasticsearch import ElasticsearchRetriever

def vector_query(search_query: str) -> Dict:
    vector = embeddings.embed_query(search_query)  # same embeddings as for indexing
    return {
        "knn": {
            "field": "vector",
            "query_vector": vector,
            "k": 5,
            "num_candidates": 10,
        }
    }


vector_retriever = ElasticsearchRetriever.from_es_params(
    index_name=index_name,
    body_func=vector_query,
    content_field="text",
    url=os.environ["ELASTIC_CLOUD_ID"],api_key=os.environ["ELASTIC_API_KEY"],
)

vector_retriever.invoke("Show me all DKNY products available in black, priced above ₹10,000")

[Document(metadata={'_index': 'myntra_products_catalog-small[468]', '_id': '40d4aec2-dc15-408c-b0c6-a7fa87be3a87', '_score': 0.8885498, '_ignored': ['text.keyword'], '_source': {'metadata': {'ProductID': 10007835, 'ProductName': 'her by invictus Women Red Cushioned Loafers', 'ProductBrand': 'her by invictus', 'Gender': 'Women', 'Price (INR)': 1299, 'NumImages': 6, 'Description': 'A pair of round closed toe red loafers with comfort lining and foam cushioned footbed for your work-from-home needs. It has regular styling, and closed backSynthetic upperFoam Cushioned footbedTextured and patterned outsole', 'PrimaryColor': ' Red'}, 'vector': [-0.013812124729156494, -0.01635398529469967, -0.014793901704251766, -0.008109748363494873, 0.009145321324467659, 0.008096299134194851, -0.03303074836730957, -0.03598953038454056, -0.0062436992302536964, -0.016138801351189613, 0.02848498523235321, 0.00578979542478919, 0.004976130556315184, -0.0362854078412056, -0.016085006296634674, 0.02209671027958393, 

In [21]:
def bm25_query(search_query: str) -> Dict:
    return {
        "query": {
            "match": {
                "text": search_query,
            },
        },
    }


bm25_retriever = ElasticsearchRetriever.from_es_params(
    index_name=index_name,
    body_func=bm25_query,
    content_field="text",
    url=os.environ["ELASTIC_CLOUD_ID"],api_key=os.environ["ELASTIC_API_KEY"],
)

bm25_retriever.invoke("Show me Unisex products")

[Document(metadata={'_index': 'myntra_products_catalog-small[468]', '_id': 'e49e6c50-f660-4012-a60b-20ccd9a4aa2d', '_score': 5.1503816, '_ignored': ['metadata.Description.keyword', 'text.keyword'], '_source': {'metadata': {'ProductID': 1001221, 'ProductName': 'Rocia Women Black & Silver-Toned Wedges', 'ProductBrand': 'Rocia', 'Gender': 'Women', 'Price (INR)': 1512, 'NumImages': 5, 'Description': 'A pair of black and silver-toned wedgesSynthetic thong-style upper embellished with beads and stonesCushioned footbedTextured and patterned outsoleWarranty: 3 months against manufacturing defects only (not valid on products under discount or promotional offer)', 'PrimaryColor': ' Black'}, 'vector': [-0.011675831861793995, -0.01228861603885889, 0.007241387851536274, -0.03044150397181511, 0.003213818883523345, 0.01271690521389246, -0.011478160507977009, -0.02767409384250641, -0.0015764350537210703, -0.035923611372709274, 0.0024989047087728977, 0.024419093504548073, 0.0022765237372368574, -0.0213

In [22]:
def hybrid_query(search_query: str) -> Dict:
    vector = embeddings.embed_query(search_query)  # same embeddings as for indexing
    return {
        "query": {
            "match": {
                "text": search_query,
            },
        },
        "knn": {
            "field": "vector",
            "query_vector": vector,
            "k": 5,
            "num_candidates": 10,
        },
        "rank": {"rrf": {}},
    }


hybrid_retriever = ElasticsearchRetriever.from_es_params(
    index_name=index_name,
    body_func=hybrid_query,
    content_field="text",
    url=os.environ["ELASTIC_CLOUD_ID"],api_key=os.environ["ELASTIC_API_KEY"])

hybrid_retriever.invoke("Show me all DKNY products available in black, priced above ₹10,000")

[Document(metadata={'_index': 'myntra_products_catalog-small[468]', '_id': 'c244e91f-c55a-4e3e-a8bc-69b53a3b5974', '_score': 0.031544957, '_rank': 1, '_ignored': ['text.keyword'], '_source': {'metadata': {'ProductID': 10017429, 'ProductName': 'DKNY Unisex Black Solid Leather Backpack with Pouch', 'ProductBrand': 'DKNY', 'Gender': 'Unisex', 'Price (INR)': 13020, 'NumImages': 7, 'Description': 'Black backpackPadded haul loop3 and more main compartments with zip closurePadded backZip PocketPadded shoulder strap: PaddedWater-resistance: YesComes with a pouchWarranty: 5 yearsWarranty provided by brand/manufacturer', 'PrimaryColor': ' Black'}, 'vector': [0.015979686751961708, 0.010238640010356903, 0.007136623375117779, -0.04619293287396431, -0.011991378851234913, 0.007407801691442728, 0.00023872798192314804, -0.03521351516246796, -0.019498392939567566, -0.007745121140033007, 0.03688026964664459, 0.003111938014626503, -0.008862905204296112, 0.021522309631109238, -0.009742582216858864, -0.0009

In [23]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_template(
    """Answer the question based only on the context provided.

Context: {context}

Question: {question}"""
)

llm = ChatOpenAI(model="gpt-4o-mini")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


chain = (
    {"context": hybrid_retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [24]:
chain.invoke("Show me all DKNY products available in black, priced above ₹10,000")

'Based on the context provided, the DKNY products available in black are:\n\n1. **DKNY Unisex Black Solid Leather Backpack with Pouch**\n   - Features: Black backpack, padded haul loop, multiple compartments with zip closure, padded back, zip pocket, padded shoulder strap, water-resistant, comes with a pouch, warranty: 5 years.\n\n2. **DKNY Unisex Black & Grey Printed Medium Trolley Bag**\n   - Features: Black and grey printed medium trolley bag, secured with a TSA lock, two handles, trolley with retractable handle, four corner mounted inline skate wheels, multiple compartments, warranty: 5 years.\n\n3. **DKNY Unisex Black Large Trolley Bag**\n   - Features: Black solid large trolley bag, secured with a TSA lock, two handles, trolley with retractable handle, four corner mounted inline skate wheels, multiple compartments, warranty: 5 years.\n\n4. **DKNY Unisex Black Medium Trolley Bag**\n   - Features: Black solid medium trolley bag, secured with a TSA lock, two handles, trolley with re