In [2]:
import pandas as pd
import numpy as np

## Fetch products

In [4]:
product_df = pd.read_csv("dataset/product.csv", sep='\t')
product_df.head()

Unnamed: 0,product_id,product_name,product_class,category hierarchy,product_description,product_features,rating_count,average_rating,review_count
0,0,solid wood platform bed,Beds,Furniture / Bedroom Furniture / Beds & Headboa...,"good , deep sleep can be quite difficult to ha...",overallwidth-sidetoside:64.7|dsprimaryproducts...,15.0,4.5,15.0
1,1,all-clad 7 qt . slow cooker,Slow Cookers,Kitchen & Tabletop / Small Kitchen Appliances ...,"create delicious slow-cooked meals , from tend...",capacityquarts:7|producttype : slow cooker|pro...,100.0,2.0,98.0
2,2,all-clad electrics 6.5 qt . slow cooker,Slow Cookers,Kitchen & Tabletop / Small Kitchen Appliances ...,prepare home-cooked meals on any schedule with...,features : keep warm setting|capacityquarts:6....,208.0,3.0,181.0
3,3,all-clad all professional tools pizza cutter,"Slicers, Peelers And Graters",Browse By Brand / All-Clad,this original stainless tool was designed to c...,overallwidth-sidetoside:3.5|warrantylength : l...,69.0,4.5,42.0
4,4,baldwin prestige alcott passage knob with roun...,Door Knobs,Home Improvement / Doors & Door Hardware / Doo...,the hardware has a rich heritage of delivering...,compatibledoorthickness:1.375 '' |countryofori...,70.0,5.0,42.0


In [5]:
## Assemble the product text use our product description as that text unless there is no description in which case we will use the product name.

In [6]:
product_df['product_text'] = np.where(pd.notna(product_df['product_description']), 
                                      product_df['product_description'], 
                                      product_df['product_name'])

In [7]:
product_df.iloc[42990]

product_id                                                         42990
product_name                       emmeline 5 piece breakfast dining set
product_class                                          Dining Table Sets
category hierarchy     Furniture / Kitchen & Dining Furniture / Dinin...
product_description                                                  NaN
product_features       basematerialdetails : steel| : gray wood|ofhar...
rating_count                                                      1314.0
average_rating                                                       4.5
review_count                                                       864.0
product_text                       emmeline 5 piece breakfast dining set
Name: 42990, dtype: object

In [8]:
from llama_index.core import Document

documents = [
    Document(
        text=row['product_text'],
        metadata={
            'product_id': row['product_id'],
            'product_name': row['product_name'],
        }
    )
    for _, row in product_df.iterrows()
]

In [9]:
print(documents[0])

Doc ID: b4bcc069-3be0-4e71-a3ce-b7d442934e1e
Text: good , deep sleep can be quite difficult to have in this busy
age . fortunately , there ’ s an antidote to such a problem : a nice ,
quality bed frame like the acacia kaylin . solidly constructed from
acacia wood , this bed frame will stand the test of time and is fit to
rest your shoulders on for years and years . its sleek , natural wood
grain...


In [10]:
from llama_index.core.schema import MetadataMode

In [11]:
print(documents[0].get_content(metadata_mode=MetadataMode.ALL))

product_id: 0
product_name: solid wood platform bed

good , deep sleep can be quite difficult to have in this busy age . fortunately , there ’ s an antidote to such a problem : a nice , quality bed frame like the acacia kaylin . solidly constructed from acacia wood , this bed frame will stand the test of time and is fit to rest your shoulders on for years and years . its sleek , natural wood grain appearance provides a pleasant aesthetic to adorn any bedroom , acting both as a decorative piece as well as a place to give comfort after a hard day of work . our bed frame is designed to give ample under-bed space for easy cleaning and other usages , with a headboard attached to further express the craftiness . it can be used with other accessories such as a nightstand or bookcase headboard and is compatible with many types of mattresses including memory foam , spring , or hybrid ones . there ’ s nowhere better to relax than your own home , and with this bed frame that feeling of homeliness

In [12]:
print(documents[0].metadata)

{'product_id': 0, 'product_name': 'solid wood platform bed'}


### Convert Product Info into Embeddings

### Assign Label Scores

In [16]:
# get manually labeled groundtruth lables
label_df = pd.read_csv("dataset/label.csv", sep='\t')
label_df

Unnamed: 0,id,query_id,product_id,label
0,0,0,25434,Exact
1,1,0,12088,Irrelevant
2,2,0,42931,Exact
3,3,0,2636,Exact
4,4,0,42923,Exact
...,...,...,...,...
233443,234010,478,15439,Partial
233444,234011,478,451,Partial
233445,234012,478,30764,Irrelevant
233446,234013,478,16796,Partial


In [17]:
def score_func(x: str) -> float:
    scores = {
        'exact': 1.0,
        'irrelevant': 0.0,
        'partial': 0.75
    }
    return scores.get(x.lower(), None)

In [18]:
label_df['label_score'] = label_df['label'].apply(score_func)

In [19]:
label_df['label_score'].value_counts()

label_score
0.75    146633
0.00     61201
1.00     25614
Name: count, dtype: int64

In [20]:
label_df['label'].value_counts()

label
Partial       146633
Irrelevant     61201
Exact          25614
Name: count, dtype: int64