In [1]:
import pandas as pd
from nltk.tokenize import word_tokenize

In [96]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.documents import Document
from langchain.load import dumps, loads

from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI

from langchain_chroma.vectorstores import Chroma
from langchain.retrievers import BM25Retriever, EnsembleRetriever
import os




In [3]:
HF_TOKEN = "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"
gemini_api_key = "AIzaSyBdML4pNIAfajoCcMFyckHiOaZYvhct4WU"

In [4]:
ecom = pd.read_csv("sample_docs/sample-data.csv")
ecom.head()

Unnamed: 0,id,description
0,1,Active classic boxers - There's a reason why o...
1,2,Active sport boxer briefs - Skinning up Glory ...
2,3,Active sport briefs - These superbreathable no...
3,4,"Alpine guide pants - Skin in, climb ice, switc..."
4,5,"Alpine wind jkt - On high ridges, steep ice an..."


In [5]:
ecom['doc_length'] = ecom['description'].apply(lambda x: len(word_tokenize(x)))

In [6]:
ecom.head()

Unnamed: 0,id,description,doc_length
0,1,Active classic boxers - There's a reason why o...,242
1,2,Active sport boxer briefs - Skinning up Glory ...,264
2,3,Active sport briefs - These superbreathable no...,230
3,4,"Alpine guide pants - Skin in, climb ice, switc...",307
4,5,"Alpine wind jkt - On high ridges, steep ice an...",408


In [7]:
ecom['doc_length'].max()

639

In [8]:
embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key=HF_TOKEN, model_name="BAAI/bge-base-en-v1.5"
)

In [15]:
len(embeddings.embed_query("How are you"))

768

In [19]:
# Getting all the SKU descriptions and taking only 200 of them for RAG 
documents = ecom['description'].tolist()
documents = documents[:200]

In [20]:
# Converting string into Langchain Document
documents = [Document(i) for i in documents]

In [21]:
# Vector store and retriever object for Dense Embedding (Semantic)
vstore = Chroma.from_documents(documents, embeddings)
vstore_retriever = vstore.as_retriever(search_kwargs={'k': 3})

In [87]:
# BM25 Retriever for Sparse Embeddings (Syntactic)
bm_retriever = BM25Retriever.from_documents(documents)
bm_retriever.k = 3

In [88]:
# Creating a Ensemble Retriever from Langchain object that will calculate RRF Scores
# Weights can be adjusted as per use case and accordingly the results will varry. 
ensemble_retriever = EnsembleRetriever(retrievers=[vstore_retriever,bm_retriever], weights=[0.7, 0.3])

In [23]:
os.environ['GOOGLE_API_KEY'] = gemini_api_key

In [28]:
# Initiate LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.8,
    max_tokens=None,
    timeout=None, 
    max_retries=2
    
)

## MUlti Query

In [29]:
multi_query_template ="""
You are an helpful assistant that generates multiple alternate search query out of user's input query. These \
alternate queries will be used to make simantic search within a vector database using similariy metrics. Generate 5 \
alternate queries that can be formed to better understand user's input query given below

{user_query}

Strictly retrun only the alternate queries separated by new line 
"""
multi_query_prompt = ChatPromptTemplate.from_template(multi_query_template)

In [30]:
multi_query_chain = (
    multi_query_prompt
    | llm
    | StrOutputParser()
    | (lambda x: [i for i in x.split("\n") if x!=''])
)

In [31]:
multi_query_chain.invoke("Suggest me items to sell in my new outdoor adventure and expedition utilities shop.")

['Outdoor adventure equipment shop inventory ideas',
 'Outdoor expedition gear store product suggestions',
 'Best selling items for an outdoor adventure shop',
 'Essential equipment for outdoor expeditions',
 'Products to stock in an outdoor adventure and expedition store ',
 '']

In [79]:
def get_unique(documents):
    flattened_documents = [dumps(doc) for sublist in documents for doc in sublist]
    unique_documents = list(set(flattened_documents))
    return [loads(doc) for doc in unique_documents]

In [80]:
retrieval_chain = (
    multi_query_chain
    | vstore_retriever.map()
    | get_unique
)

In [86]:
retrieved_docs = retrieval_chain.invoke("Suggest me items to sell in my new outdoor adventure and expedition utilities shop.")

for chunk in retrieved_docs:
    print(">> ", chunk.page_content[:200], "\n")

>>  Watermaster waders - short - Blackberries, barbwire, basalt...the fact is, we ask a lot of our waders - most importantly, that they keep us dry through all sorts of abuse. The new version of our Water 

>>  Ulw hiking crew socks - The heat along the Boucher Trail can shut you down fast. These airy hikers, with their dynamic fabric blend of 75% chlorine-free merino wool/21% nylon/4% spandex, manage the mo 

>>  Sub divider - Just as tough and water resistant as the Great Divider, the Sub is a compact, soft-sided boat chest that protects, hauls and organizes vital gear. Reels, fly boxes, phone, flares or lunc 

>>  Great divider - From standing waves to blue-water wind chop, there's one universal truth: Boat decks get wet. Hence, the Great Divider. Our award-winning soft-sided gear chest efficiently organizes an 

>>  Rock guide shorts - Made for high, open expanses of granite, like Lumpy Ridge, Tuolumne and Vedauwoo, the lightweight Rock Guides combine 96% stretch-woven nylon and 4%

## Reciprocal Rank Fusion

In [90]:
template="""
<|system|>
You are an excellent AI Brand Reccommendor for a ecommerce website. As per the user query and \
the given context containg brands, you need to reccomend items as per their relevance in bullet point(s)

CONTEXT: {context}
</s>
<|user|>
{query}
</s>
<|assistant|>
"""
prompt = ChatPromptTemplate.from_template(template)


In [91]:
chain = (
    {"context": ensemble_retriever, "query": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [58]:
# Example - 1
print(chain.invoke("its summer time and children play outside a lot. I am looking for something to protect them from the heat"))

Here are some recommendations to protect children from the summer heat:

* **Baby sun bucket hat:** This hat provides 360-degree sun protection with a wide brim and is made from a durable, water-repellent fabric. 
* **Baby sunshade top:** Made from soft, stretchy polyester, this top is fast-wicking, quick-drying, and breathable, providing protection from the sun while keeping your child cool. 
* **Beach bucket:** This hat offers 360-degree sun protection with a durable, semi-rigid foam bill that floats. It's made from a soft, water-shedding fabric and has a moisture-wicking headband for added comfort.



In [95]:
# Example - 2
print(chain.invoke("Me and my friends have not been out in a while. This time around we a planning for a \
meet out in the jungle. It will be a great time to detox ourselves from the digital world and be out in the nature. \
I dont have any utilites and wears, hence need some suggestions"))

Based on your need for clothing and utilities for a jungle trip with friends, here are some recommendations from the provided context:

* **Island Hemp Skirt:** This skirt provides comfort and is made from a natural blend of hemp and organic cotton. Perfect for relaxed jungle exploration. 
* **Retro Grade Pants:** Made from a comfortable blend of organic cotton and spandex, these pants offer flexibility for hiking and exploring the jungle. 
* **S/s Island Hopper Shirt:** This lightweight shirt is perfect for keeping you cool and comfortable in the jungle heat. It features moisture-wicking properties and has oversized pockets for carrying essentials. 
* **Lw Travel Pack:** This lightweight backpack is ideal for carrying your supplies and can be easily packed away.  It features breathable shoulder straps and a waist belt for comfortable carrying. 
* **Cap 2 Zip Neck:** This zip neck provides warmth and breathability for cooler evenings or when you're in areas with higher altitudes. It's 

In [93]:
# Lokking at individual Retrievals for each Embedding type
bm_chunks =bm_retriever.get_relevant_documents("Me and my friends have not been out in a while. This time around we a planning for a \
meet out in the jungle. It will be a great time to detox ourselves from the digital world and be out in the nature. \
I dont have any utilites and wears, hence need some suggestions")
for chunk in bm_chunks:
    print(">> ", chunk.page_content[:200], "\n")

>>  Lw travel pack - In the lifelong search for a place to call home, we all do time on the road. This super-lightweight travel backpack holds everything you need for road trips, hikes or perusing the mar 

>>  Cap 2 zip neck - The Bugaboos have long approaches that make you sweaty and blustery ridges that flash-freeze your core. This zip-neck is your first step to comfort, providing maximum breathability, s 

>>  Surf brim - Whether you're paddling back out or waiting on the next set, our Surf Brim keeps your cranium cool and your eyes shaded. It is a quick-drying, secure hat with full-coverage protection. The 



In [94]:
vstore_chunks = vstore_retriever.get_relevant_documents("Me and my friends have not been out in a while. This time around we a planning for a \
meet out in the jungle. It will be a great time to detox ourselves from the digital world and be out in the nature. \
I dont have any utilites and wears, hence need some suggestions")

for chunk in vstore_chunks:
    print(">> ", chunk.page_content[:200], "\n")

>>  Island hemp skirt - The Island Hemp Skirt brings freestyle comfort to any creekside rock-skipping session. The skirt's summer-weight hemp/organic cotton (55%/45%) fabric blend has a soft, linen-like d 

>>  Retro grade pants - We used Sonnie Trotter's wish list as our guide for these climbing pants. Made from a friendly blend of organic cotton and spandex (98%/2%), they feel soft, light and cool, with ju 

>>  S/s island hopper shirt - Serene in the face of serious heat and always up for a cleansing dunk in the hotel sink, an Island Hopper is traveler's manna. Its ultralight blend of moisture-wicking 65% al 



## Reciprocal Rank Fusion + Multi Query

In [41]:
multi_query_fusion_chain = (
    multi_query_chain
    | ensemble_retriever.map()
    | get_unique
)

In [42]:
rerieved_docs = multi_query_fusion_chain.invoke("Suggest me items to sell in my new outdoor adventure and expedition utilities shop.")


In [43]:
len(rerieved_docs)

25

In [44]:
for chunk in rerieved_docs:
    print(chunk.page_content[:20], "\n")

Merino 2 t-shirt - S 

Rock guide shorts -  

Stormfront duffel 10 

Going big in b.c. po 

Flying fish t-shirt  

S/s island hopper sh 

Freewheeler - This s 

Live simply guitar t 

Watermaster waders - 

Down sweater - There 

Duck pants - long -  

Ultra shorts - Our e 

Sub divider - Just a 

Stormfront pack - Wa 

Great divider - From 

Lw endurance ankle s 

Alpine guide pants - 

Alpine guide pants - 

Three trees shirt -  

Merino 1 graphic t-s 

Lw travel pack - In  

Ulw hiking crew sock 

Live simply deer t-s 

M10 jkt - Made for t 

Lead an examined lif 



## Decomposition

In [122]:
template = """You are a helpful ai assistant who helps users to find relevant products from \
online clothing store by decomposing their query into 4 smaller-queries each of which addresses a \
specific clothing condition which then be used to query of database for retrieving relevant products. \
Carefully evaluate the query and come up with sub-queries that helps in recommending the best clothing items for the user.

Generate sub-queries related to: {question} \n
Stirctly respond with only the sub-questions separated by new line
"""
prompt_to_decompose = ChatPromptTemplate.from_template(template)

In [123]:
user_input = "I am having a baby for the first time and I \
dont know what cloths should I get for her. \
I live in which is both very cold in winters and sunny in summers and also planning to go to outdoor walks with my baby."

l_to_m_decomposition_chain = (
    prompt_to_decompose
    | llm
    | StrOutputParser()
    | (lambda x: x.strip().split("\n"))
)
sub_queries = l_to_m_decomposition_chain.invoke(user_input)
sub_queries

['What type of clothing is needed for a newborn baby?',
 'What type of clothing is suitable for both cold winters and sunny summers?',
 'What kind of clothing is suitable for outdoor walks?',
 'What are some essential baby clothing items for a newborn?']

In [138]:
template = """Here is the question you need to answer for reccomending unique clothing items from our store:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [132]:
from operator import itemgetter

In [139]:

def create_qa_pairs(question, answer):
    return f"Question: {question}\nAnswer: {answer}\n\n"
q_a_pairs = ""
for ques in sub_queries:
    rag_chain = (
        {'context': itemgetter("question") | vstore_retriever,
        "question": itemgetter("question"),
        "q_a_pairs": itemgetter("q_a_pairs")}
        | decomposition_prompt
        | llm
        | StrOutputParser()
    )
    answer = rag_chain.invoke({"question":ques, "q_a_pairs":q_a_pairs})
    q_a_pair = create_qa_pairs(ques, answer)
    q_a_pairs += "\n---\n" + q_a_pair

In [140]:
print(answer)

Based on the provided information, here are some essential baby clothing items for a newborn:

* **Soft, stretchy tops:** The "Baby sunshade top" is perfect for a newborn. Its lap shoulders and roll-down cuffs allow for easy dressing and accommodate growth spurts. The breathable fabric is ideal for sensitive skin. 

* **Warm and cozy layers:** The "Baby micro d-luxe cardigan" provides warmth and comfort with its soft, recycled fleece fabric. The wind flap and hood offer extra protection against the elements, making it suitable for various weather conditions. 

**Additional considerations:**

* **Onesies or Bodysuits:** These are essential for newborns as they provide full coverage and are easy to change. 
* **Sleep sacks:** These help keep babies warm and safe while sleeping, especially for newborns who are not yet able to regulate their body temperature.
* **Socks and mittens:** Newborns can lose heat through their hands and feet, so socks and mittens are important for keeping them wa

## Step-Back Evaluation

In [61]:
from langchain_core.prompts import FewShotChatMessagePromptTemplate
examples = [
    {
        "input": "it is great to have those ankle length socks for both office and casual waer",
        "output": "which socks are great?"
    },
    {
        "input": "The Rashguard prevents the skin from sun burns",
        "output": "how to protect our skin from sun burns"
    }
]

example_prompt = ChatPromptTemplate.from_messages(
    [
        ("user", "{input}"),
        ("ai", "{output}")
    ]
)

few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt = example_prompt,
    examples = examples
)

step_back_prompt = ChatPromptTemplate.from_messages(
    [
    ("system", "You are a helpul AI assistant working in ecommerce industry. Your task is to step back and paraphrase a questions \
                to a more generic alternative which is easier to answer and can loo at the bigger picture. Here are a few examples:"),
    few_shot_prompt,
    ("user", "{question}")
    ]
)

step_back_chain = (
    step_back_prompt
    | llm
    | StrOutputParser()
)


In [142]:
query = "these clothing items are great for sports and outdoor activity"
response = step_back_chain.invoke({"question": query})

In [70]:
print("Step Back Query---> ",response)

Step Back Query--->  what type of clothing is best for sports and outdoor activities? 



In [72]:
# retrieval from the step-back question
docs = vstore_retriever.get_relevant_documents(response)
for doc in docs:
    print(">> ", doc.page_content,"\n")

>>  All weather training top - Whether you're hitting your stride on Ojai's Pratt Trail or finding your chi in the yoga studio, sometimes a quick escape is all you need. This simplified and functional multisport training top stretches, breathes, and features wider straps for pack-wearing comfort and a full-coverage back for underlayer compatibility. The updated lighter fabric (88% polyester/12% spandex) is supple, durable and stretchy, the offset shoulder seams reduce chafe, and the asymmetrical side seams do the same under a waist belt or climbing harness. A revised, simple V-shaped neck helps keep you cool, while a reflective heat-transfer logo keeps you visible.<br><br><b>Details:</b><ul> <li>"Supple, durable, stretchy knit fabric that's highly breathable"</li> <li>Offset shoulder seams to reduce chafe</li> <li>Wide straps and v-neck front with full coverage back for compatibility with undergarments</li> <li>Asymmetrical side seams work well beneath harnesses and packs</li> <li>Refl

In [141]:
from langchain_core.runnables import RunnableLambda

In [148]:
step_back_response_template="""\
You are an expert reccomendor for Sports and Advanteure clothing store. Utilizing the Below \
contexts, Answer the Original question.

{original_context}
{step_back_context}

Original Qiestion: {question}
Answer:"""

sb_response_prompt = ChatPromptTemplate.from_template(step_back_response_template)

sb_response_chain=(
    {"original_context": RunnableLambda(lambda x: x['question']) | vstore_retriever,
     "step_back_context": step_back_chain | vstore_retriever, 
     "question": lambda x: x['question']
    }
    | sb_response_prompt
    | llm
    | StrOutputParser()
)

answer = sb_response_chain.invoke({"question":"these clothing items are great for sports and outdoor activity"})
print(answer)

You're absolutely right! The clothing items described are designed specifically for sports and outdoor activities, offering features like:

* **Breathability and Moisture-Wicking:** Fabrics like Capilene 2 and the blend of polyester and spandex in the Active Sport briefs and boxer briefs are designed to wick away sweat and keep you dry and comfortable during intense workouts or outdoor adventures.
* **Durability and Stretch:** The fabrics are durable enough to withstand the wear and tear of active use and have enough stretch to allow for a full range of motion.
* **Chafe Prevention:**  Features like flat-sewn seams, offset shoulder seams, and self-binding necklines are designed to reduce chafing and friction, ensuring comfort during extended periods of activity. 
* **Layering Compatibility:** The Cap 2 t-shirt and the All Weather Training Top are designed to be worn under other layers for warmth or as standalone pieces for moderate temperatures. 
* **Seamless Construction:**  The Activ

## HyDE

In [76]:
template = """
Generate a product description that best suit the below product not more that 200 words.
Return only the description 

product : {product}
product description : 
"""
hyde_prompt = ChatPromptTemplate.from_template(template)
hyde_chain = (hyde_prompt|llm|StrOutputParser())

question = "unisex hoodies"
response = hyde_chain.invoke({"product": question})
print(f"Synthetic product description for {question}:\n {response}")

Synthetic product description for unisex hoodies:
 Stay cozy and stylish with our unisex hoodies! Made from premium fleece, these hoodies offer a soft and comfortable feel, perfect for layering on chilly days or just lounging around. Featuring a classic kangaroo pocket and adjustable drawstring hood, they provide both practicality and a touch of cool. The relaxed fit ensures a comfortable wear for all body types. Choose from a variety of trendy colors and designs to express your unique style.  Our unisex hoodies are the perfect blend of comfort and fashion, making them an essential addition to any wardrobe. 



In [78]:
retrieval_chain = (hyde_chain|vstore_retriever)
relevant_items = retrieval_chain.invoke({"product": question})
for item in relevant_items:
    print(">> ", item.page_content, "\n")

>>  Mandeville hoody - A going-places hoody. The Mandeville's soft, waffle knit is a supple blend of 40% organic cotton, 35% Tencel and 20% all-recycled polyester with 5% spandex for stretch. The rib-knit trimmed hood (with drawcord) opens to a full zip; the extra-wide rib-knit cuffs have thumb loops for attitude; the hem, also in a rib knit, hits at the lower hip. With handwarmer pockets. Recyclable through the Common Threads Recycling Program.<br><br><b>Details:</b><ul> <li>Supersoft organic cotton/Tencel blend waffle-knit hoody with cotton twill drawcord in hood</li> <li>Rib knit trim on hood and hem</li> <li>Thumb loop in cuffs for warmth; front pockets</li> <li>Lower hip length</li></ul><br><br><b>Fabric: </b>6.3-oz 40% organic cotton/35% Tencel/20% all-recycled polyester/5% spandex. Recyclable through the Common Threads Recycling Program<br><br><b>Weight: </b>(323 g 11.2 oz)<br><br>Made in Thailand. 

>>  R1 hoody - Spotted round the globe from high alpine ridges to backcountry b