In [5]:
# !pip install llama-index

In [6]:
import warnings

# Ignore all warnings
warnings.filterwarnings("ignore")


In [7]:
import openai
import os

In [8]:
import pandas as pd
from llama_index.core import Document,VectorStoreIndex, ServiceContext, PromptHelper

from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
# from llama_index.core.llms import OpenAI
from llama_index.llms.openai import OpenAI
from llama_index.core.node_parser import TokenTextSplitter

import ast



openai.api_key = open("OpenAISeceretKey.txt", "r").read().strip()
os.environ['OPENAI_API_KEY'] = openai.api_key

In [9]:
import pandas as pd
import re

In [10]:
# Reading csv file and removing any row with null or blank values

df = pd.read_csv('Data/FashionDataset/FashionDatasetv2.csv')
df_cleaned = df.dropna(how='any')
print('Data before cleaning', df.shape)
print('Data after cleaning', df_cleaned.shape)

Data before cleaning (14214, 11)
Data after cleaning (6530, 11)


### Rectifying Json. Json contain details about clothes

In [11]:
def correct_json_string(json_str):
    json_str = re.sub(r"(?<=\w)'(?=\w)", r"%%", json_str)
    json_str = re.sub(r"(?<!\\)'", '"', json_str)
    json_str = re.sub(r"%%", r"'", json_str)
    return json_str

df_cleaned['p_attribute_corrected_json'] = df_cleaned['p_attributes'].apply(correct_json_string)

### Identifying if there are any common attributes about clothes

In [12]:
from collections import Counter
import json

key_counter = Counter()
for jsSonObj in df_cleaned['p_attribute_corrected_json']:
    json_obj = json.loads(jsSonObj)
    key_counter.update(json_obj.keys())

key_count_df = pd.DataFrame(key_counter.items(), columns=['Key', 'Count'])
key_count_df = key_count_df.sort_values(by='Count', ascending=False)
key_count_df.head(10)

# for _, row in key_count_df.iterrows():
#     print(row['Key'],' ',row['Count'])

Unnamed: 0,Key,Count
33,Wash Care,6232
15,Occasion,6230
23,Sustainable,5519
2,Body or Garment Size,5384
44,Pattern,5088
39,Fabric,5019
18,Print or Pattern Type,4726
52,Closure,4200
43,Length,4153
14,Number of Pockets,4069


## Observation
- There are no attributes which are common to all the clothes, hence we have to utilize all the attributes 
Below attributes of clothes are identified for specific usage.
- image_id 
- color
- brand
- ratingCount
- price
- products
- avg_rating
- pattern':pattern
- PrintorPatternType
- fabric

### Function to create documents directly from the dataframe create from the csv file fashion data

In [13]:
def read_csv_to_documents(df_cleaned):
    """
    This function creates documents from the dataframe.
    """
    df = df_cleaned
    documents = []
    for _, row in df.iterrows():
        # attributes = ast.literal_eval(row['attributes']) if isinstance(row['attributes'], str) else row['attributes']
        # p_id	name	products	price	colour	brand	img	ratingCount	avg_rating		p_attributes	p_attribute_json
        attributes=row['p_attribute_corrected_json']
        json_obj_attribute=json.loads(attributes)
        pattern=json_obj_attribute.get("Pattern", 'NA')
        PrintorPatternType	=json_obj_attribute.get("Print or Pattern Type", 'NA')
        fabric=json_obj_attribute.get("Fabric", 'NA')
        doc = Document(
            text=row['description'],
            metadata={
                'image_id': row['p_id'],
                'color': row['colour'],
                'brand': row['brand'],
                'ratingCount': row['ratingCount'],
                'price': row['price'],
                'products': row['products'],
                'avg_rating': row['avg_rating'],
                'pattern':pattern,
                'PrintorPatternType':PrintorPatternType,
                'fabric':fabric,
                'attributes': attributes
            }
        )
        documents.append(doc)
    return documents

In [14]:
# Creating Documents from the dataframe
documents=read_csv_to_documents(df_cleaned)
# documents[0]

In [15]:
# Defing LLM
llm = OpenAI(model='gpt-3.5-turbo', temperature=0, max_tokens=256)

# Initialize an OpenAIEmbedding model
embed_model = OpenAIEmbedding()
Settings.embed_model = embed_model


# Create a VectorStoreIndex from a list of documents and assigning embedding model
index = VectorStoreIndex.from_documents(documents,embed_model=embed_model)



## Defining Agent as Query Tools

In [16]:
from llama_index.core.agent import ReActAgent
from llama_index.core.tools import QueryEngineTool, ToolMetadata

# Defining Query engine
fashionEngine = index.as_query_engine(similarity_top_k=20)

# creting query engine tools
query_engine_tools = [
    QueryEngineTool(
        query_engine=fashionEngine,
        metadata=ToolMetadata(
            name="fashionEngine_Top20",
            description="Searches information in the vectorstore with respect to user query. This agent always searches in vector store."
            "Use a detailed plain text question as input to the tool.",
        )
    ),
    
]

# initialize ReAct agent
agent = ReActAgent.from_tools(query_engine_tools, llm=llm, verbose=False)

### This Creates HTML datafram which displays data along with pitcures
It contains below columns
- Score
- Brand
- ImageId
- Image
- Price
- Rating
- Product Description

In [17]:
from IPython.display import Image,display, HTML

def extractHTMLDataFrame(agentResponse):
    """
    This function creates HTML dataframe with the search results. Dataframe created bu this function also contains image.
    """
    score=[]
    brand=[]
    image=[]
    imageid=[]
    price=[]
    rating=[]
    textDesc=[]
    
    for tool_output in agentResponse.sources:
        
        for toolSpecific_source_node in tool_output.raw_output.source_nodes:
            toolSpecific_source_node
            score.append(toolSpecific_source_node.score)
            brand.append(toolSpecific_source_node.node.metadata['brand'])
            textDesc.append(toolSpecific_source_node.node.text)
            imageid.append(toolSpecific_source_node.node.metadata['image_id'])
            imageurl='Data/images/'+str(toolSpecific_source_node.node.metadata['image_id'])+'.jpg'
            image.append(imageurl)
            price.append(toolSpecific_source_node.node.metadata['price'])
            rating.append(toolSpecific_source_node.node.metadata['avg_rating'])


    images_html = [f'<img src="{path}" width="350" height="350">' for path in image]


    searchResults=pd.DataFrame({'Score': score, 'Brand': brand,
                                'ImageId':imageid,
                                'Image': images_html,'Price': price,
                                'Rating': rating,'Product Description':textDesc})         


    searchResults = searchResults.sort_values(by='Score', ascending=False) 
    return searchResults


### search input verification function to identify if question asked is related to clothing or not

In [18]:
from llama_index.core.llms import ChatMessage
def isClothingQuestion(userinput):
    """
    This function checks if question asked by user in search is realted to clothing or not.
    """
    messages = [
        ChatMessage(
            role="system",
            content="You are a helpful assistant with a high understanding of language. Identify if a question is related to clothing. Respond only with 'yes' or 'no'."
        ),
        ChatMessage(
            role="user",
            content=f"Identify if question: '{userinput}' is related to clothing search and return answer only as yes or no."
        ),
        ]
    resp = OpenAI().chat(messages)
    if "yes" in str(resp).strip().lower():
        return "yes"
    else:
        return "no"
    

In [19]:
def indexSearchWithoutAgent(userinput):
    """
    This function seaches vector index without agent tools
    """
    retriever = index.as_retriever(similarity_top_k=20,embed_model=embed_model)
    nodes = retriever.retrieve(userinput)
        # print(nodes)
    score=[]
    brand=[]
    image=[]
    imageid=[]
    price=[]
    rating=[]
    textDesc=[]
    for source_nodesTemp in nodes:
        # print(source_nodesTemp.node.metadata['image_id'],'      ',source_nodesTemp.score)         
        score.append(source_nodesTemp.score)
        brand.append(source_nodesTemp.node.metadata['brand'])
        textDesc.append(source_nodesTemp.node.text)
        imageid.append(source_nodesTemp.node.metadata['image_id'])
        imageurl='Data/images/'+str(source_nodesTemp.node.metadata['image_id'])+'.jpg'
        image.append(imageurl)
        price.append(source_nodesTemp.node.metadata['price'])
        rating.append(source_nodesTemp.node.metadata['avg_rating'])
            
        images_html = [f'<img src="{path}" width="350" height="350">' for path in image]
        searchResults=pd.DataFrame({'Score': score, 'Brand': brand,
                                'ImageId':imageid,
                                'Image': images_html,'Price': price,
                                'Rating': rating,'Product Description':textDesc}) 
        return searchResults
        

### This function initiates Search conversation with user

In [20]:
def initiateSearch(userinput):
    """
    This function initiates the user conversation
    """
    # Check if user has entered anything other than clothing search
    if isClothingQuestion(userinput) =="yes":
        agentResponse=agent.chat(userinput)
        searchResultsDf=extractHTMLDataFrame(agentResponse)

        # If agent is not able to search then direct vector index serch is initiated
        if searchResultsDf.empty: 
            searchResults=indexSearchWithoutAgent(userinput)        
            
            searchResults = searchResults.sort_values(by='Score', ascending=False)
            print('No Exact match of clothes found')
            display(HTML(searchResultsDf.to_html(escape=False)))
    
        
        else:
            display(HTML(searchResultsDf.to_html(escape=False)))

            # Feedback mechanism in which user is asked to refine search if he is not happy with results
            while(True):
                user_feedback = input(" Do you want to refine search ? \n Type yes or no : ").strip().lower()
                if "yes" != user_feedback and "no" != user_feedback:
                    print('Please enter only yes or no \n')
                    continue
    
                if "no" == user_feedback:
                    print('Thanks, Enjoy clothing!')
                    break
    
                if "yes" == user_feedback:
                    additional_input = input("Please provide more details to refine the search: \n")
                    refined_input = f" Additional details: {additional_input}"
                    agent_response = agent.chat(refined_input)
                    searchResultsDf = extractHTMLDataFrame(agent_response)
                    if not searchResultsDf.empty:
                        display(HTML(searchResultsDf.to_html(escape=False)))
                    else:
                        print("No results found.")
                        break
            
    else:
        print('Question: "',userinput,'" is not about clothing.')
        userinput=input(" Please enter fashion clothing queries:\n")
        initiateSearch(userinput)

### Top 20 Tshirts

In [21]:
userinput="flower printed shirt"
initiateSearch(userinput)

Unnamed: 0,Score,Brand,ImageId,Image,Price,Rating,Product Description
0,0.861991,URBANIC,15841446,,1490,4.0,"Pink and beige regular shirt style top Floral print V-neck, long, regular sleeves Woven Button closure Polyester Machine-washThe model (height 5'8) is wearing a size S"
1,0.856116,URBANIC,15841450,,1490,4.0,"Beige and yellow regular shirt style topFloral printV-neck, long, regular sleevesWovenButton closurePolyester Machine-washThe model (height 5'8) is wearing a size S"
2,0.852285,20Dresses,12290254,,1495,4.443114,"Pink, white and green floral print woven crop top with ruffles and smocked detail at the back, has a square neck, and long sleevesThe model (height 5'8"") is wearing a size SMaterial: Viscose Rayon  Machine Wash"
3,0.850292,Berrylush,16944668,,1799,3.0,"Look cool and stylish when you slip into this gorgeous and well-designed top. This top features a lovely floral print and V-neck that amplifies your look. Pretty pink shadeBeautiful floral printV-neckLong puff sleevesPolyester, machine washTrend Alert Symbols of freshness and beauty, romantic florals in vibrant hues and prints have been a mainstay in fashion for decades. Right from delicate all-over patterns to abstract and oversized motifs, romantic florals lend any garment a feminine touch.Polyester, Machine WashThe model (height 5'8) is wearing a size S"
4,0.846269,Flambeur,13271248,,1699,3.846154,"Pink and Green printed woven regular top, has a mandarin collar, and three-quarter sleevesMaterial: Polyester  Hand WashThe model (height 5'8"") is wearing a size XXL"
5,0.846183,Mast & Harbour,17820020,,1499,4.678571,"Pink regular shirt style topFloral printMandarin collarLong, puff sleevesTie-ups detail WovenButton closure100% Polyester Machine wash Size worn by the model: S Chest: 30"" Height: 5'9"""
6,0.845444,H&M,18254984,,1499,3.777778,"Short blouse in a patterned weave with a V-neck and a drawstring with narrow ties down the front to create a draped, gathered effect. Square neckline and smocking at the back, and short puff sleeves with narrow, covered elastication. Unlined.100% Viscose machine wash"
7,0.844818,FLAWLESS,18553788,,899,4.0,"White regular high-low top Floral print V-neck, three-quarter, roll-up sleeves Woven cotton Button closure The model (height 5'8) is wearing a size Scotton blend machine wash"
8,0.843268,FableStreet,14084886,,1895,4.0,"Pink and blue regular topFloral printedRound neck, three-quarter sleeves, regular sleevesDetailWovenButton closureStretchable and anti-wrinkleThe model (height 5'8) is wearing a size SPolyester Moss Lycra Hand wash"
9,0.842134,Marie Claire,2213582,,1299,4.294431,"Off-White and pink printed woven regular top, has a shoulder straps, sleevelessThe model (height 5'8"") is wearing a size SPolyester Hand-wash"


 Do you want to refine search ? 
 Type yes or no :  no


Thanks, Enjoy clothing!


### Trying to enter something query other than clothing

In [23]:
userinput="I want to buy medicines"
initiateSearch(userinput)

Question: " I want to buy medicines " is not about clothing.


 Please enter fashion clothing queries:
 blue jeans pants


Unnamed: 0,Score,Brand,ImageId,Image,Price,Rating,Product Description
0,0.852865,People,16728370,,1299,4.333333,"Light shade, light fade blue jeansSlim fit, mid-riseClean lookNon-stretchableHas whiskers and chevrons effect5 pocketsLength: cropped100% Cotton Machine washFit: Slim Fit Non-Stretchable The model (height 5'8) is wearing a size 28"
1,0.852728,ether,11307820,,1499,3.892857,"Blue dark wash 4-pocket mid-rise jeans, clean look, no fade, has a button and zip closure, and waistband with belt loopsMaterial: 100% cotton Machine WashFlared The model (height 5'8"") is wearing a size 28"
2,0.85196,ether,16211994,,1899,4.02439,"Medium shade, light fade blue jeansSkinny fit, mid-riseLow distressStretchable5 pocketLength: regular82% Cotton, 17% Polyester, 1% Spandex Machine washFit: Skinny Fit Size worn by the model: 28 Waist: 24"" Hips: 35"" Height: 5'9"""
3,0.851839,People,15911856,,1299,4.0,"Medium shade, heavy fade blue jeans Tapered fit, high-rise Clean look Non stretchable 5 pocket Length: cropped 100% POLYESTER machine wash  Fit: Tapered Fit Non Stretchable The model (height 5'8) is wearing a size 28"
4,0.85148,People,16557246,,1299,5.0,"Dark shade, no fade blue jeans Skinny fit, mid-rise Clean look Non stretchable 4 pocket Length: regular 98% Cotton , 2% Lycra Machine wash  Fit: Skinny Fit Non Stretchable The model (height 5'8) is wearing a size 28"
5,0.850942,People,15317036,,1399,4.416667,"Light shade, light fade blue jeansSlim fit, mid-riseClean lookStretchable5 pocketsLength: croppedFit: Slim Fit The model (height 5'8) is wearing a size 2895% Cotton 5% Lycra Machine wash"
6,0.85081,plusS,17519052,,2599,4.277778,"Pull off an effortlessly chic look by donning this smart and stylish pair of jeans. Designed with an appealing jogger fit and a mid-rise waist, this pair of jeans is a must-have. Stunning blue colourJogger fitMid-rise waistRegular lengthCotton, machine washVolume denimTrend Alert Volume denims are trendy and versatile fits, like mom jeans, flared jeans, and other high-rise oversized fits that are apt for youngsters. They boast many experiential patterns and prints, like linear abstract, Tie & Dye, bi-colour blockdowns, ombre gradient, contemporary craft (denim), and floral and distinct cellular patterns.Pure Cotton Machine Wash  Fit: Jogger Stretchable The model (height 5'8) is wearing a size 28"
7,0.850413,People,16728362,,999,3.8,"Light shade, no fade blue jeansSkinny fit, mid-riseClean LookNon stretchable5 pocketsLength: cropped100% Cotton Machine-wash  Fit: Skinny Fit Non Stretchable The model (height 5'8) is wearing a size 28"
8,0.850296,People,13499400,,999,3.92,"Blue medium wash jeans, clean look, light fade, and has a zip fly closureMaterial: CottonSlim Fit The model (height 5'8"") is wearing a size 28"
9,0.850231,People,15317032,,1399,3.555556,"Light shade, heavy fade blue jeans Slim fit, mid-rise Clean look Non stretchable Na pocket Length: cropped Cotton Machine wash  Fit: Slim Fit Non Stretchable The model (height 5'8) is wearing a size 28"


 Do you want to refine search ? 
 Type yes or no :  no


Thanks, Enjoy clothing!
