In [17]:
import pandas as pd
import numpy as np

## Fetch products

In [18]:
product_df = pd.read_csv("dataset/product.csv", sep='\t')
product_df.head()

Unnamed: 0,product_id,product_name,product_class,category hierarchy,product_description,product_features,rating_count,average_rating,review_count
0,0,solid wood platform bed,Beds,Furniture / Bedroom Furniture / Beds & Headboa...,"good , deep sleep can be quite difficult to ha...",overallwidth-sidetoside:64.7|dsprimaryproducts...,15.0,4.5,15.0
1,1,all-clad 7 qt . slow cooker,Slow Cookers,Kitchen & Tabletop / Small Kitchen Appliances ...,"create delicious slow-cooked meals , from tend...",capacityquarts:7|producttype : slow cooker|pro...,100.0,2.0,98.0
2,2,all-clad electrics 6.5 qt . slow cooker,Slow Cookers,Kitchen & Tabletop / Small Kitchen Appliances ...,prepare home-cooked meals on any schedule with...,features : keep warm setting|capacityquarts:6....,208.0,3.0,181.0
3,3,all-clad all professional tools pizza cutter,"Slicers, Peelers And Graters",Browse By Brand / All-Clad,this original stainless tool was designed to c...,overallwidth-sidetoside:3.5|warrantylength : l...,69.0,4.5,42.0
4,4,baldwin prestige alcott passage knob with roun...,Door Knobs,Home Improvement / Doors & Door Hardware / Doo...,the hardware has a rich heritage of delivering...,compatibledoorthickness:1.375 '' |countryofori...,70.0,5.0,42.0


In [19]:
## Assemble the product text use our product description as that text unless there is no description in which case we will use the product name.

In [20]:
product_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42994 entries, 0 to 42993
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   product_id           42994 non-null  int64  
 1   product_name         42994 non-null  object 
 2   product_class        40142 non-null  object 
 3   category hierarchy   41438 non-null  object 
 4   product_description  36986 non-null  object 
 5   product_features     42994 non-null  object 
 6   rating_count         33542 non-null  float64
 7   average_rating       33542 non-null  float64
 8   review_count         33542 non-null  float64
dtypes: float64(3), int64(1), object(5)
memory usage: 3.0+ MB


In [21]:
product_df['product_text'] = np.where(pd.notna(product_df['product_description']), 
                                      product_df['product_description'], 
                                      product_df['product_name'])

In [22]:
product_df.iloc[42990]

product_id                                                         42990
product_name                       emmeline 5 piece breakfast dining set
product_class                                          Dining Table Sets
category hierarchy     Furniture / Kitchen & Dining Furniture / Dinin...
product_description                                                  NaN
product_features       basematerialdetails : steel| : gray wood|ofhar...
rating_count                                                      1314.0
average_rating                                                       4.5
review_count                                                       864.0
product_text                       emmeline 5 piece breakfast dining set
Name: 42990, dtype: object

In [23]:
from llama_index.core import Document

documents = [
    Document(
        text=row['product_text'],
        metadata={
            'product_id': row['product_id'],
            'product_name': row['product_name'],
        }
    )
    for _, row in product_df.iterrows()
]

In [24]:
print(documents[0])

Doc ID: d959786e-4174-4a0d-8cc6-aeb00f5ae6ee
Text: good , deep sleep can be quite difficult to have in this busy
age . fortunately , there ’ s an antidote to such a problem : a nice ,
quality bed frame like the acacia kaylin . solidly constructed from
acacia wood , this bed frame will stand the test of time and is fit to
rest your shoulders on for years and years . its sleek , natural wood
grain...


In [25]:
from llama_index.core.schema import MetadataMode

In [26]:
print(documents[0].get_content(metadata_mode=MetadataMode.ALL))

product_id: 0
product_name: solid wood platform bed

good , deep sleep can be quite difficult to have in this busy age . fortunately , there ’ s an antidote to such a problem : a nice , quality bed frame like the acacia kaylin . solidly constructed from acacia wood , this bed frame will stand the test of time and is fit to rest your shoulders on for years and years . its sleek , natural wood grain appearance provides a pleasant aesthetic to adorn any bedroom , acting both as a decorative piece as well as a place to give comfort after a hard day of work . our bed frame is designed to give ample under-bed space for easy cleaning and other usages , with a headboard attached to further express the craftiness . it can be used with other accessories such as a nightstand or bookcase headboard and is compatible with many types of mattresses including memory foam , spring , or hybrid ones . there ’ s nowhere better to relax than your own home , and with this bed frame that feeling of homeliness

In [27]:
print(documents[0].metadata)

{'product_id': 0, 'product_name': 'solid wood platform bed'}


### Extract Nodes

In [28]:
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.node_parser import SentenceSplitter

In [13]:
# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [14]:
# Extract nodes from documents
nodes = node_parser.get_nodes_from_documents(documents)

In [15]:
print(f"Text: \n{nodes[0].text}")
print("------------------")
print(f"Window: \n{nodes[0].metadata['window']}")

Text: 
good , deep sleep can be quite difficult to have in this busy age . 
------------------
Window: 
good , deep sleep can be quite difficult to have in this busy age .  fortunately , there ’ s an antidote to such a problem : a nice , quality bed frame like the acacia kaylin .  solidly constructed from acacia wood , this bed frame will stand the test of time and is fit to rest your shoulders on for years and years .  its sleek , natural wood grain appearance provides a pleasant aesthetic to adorn any bedroom , acting both as a decorative piece as well as a place to give comfort after a hard day of work . 


In [16]:
len(nodes)

164163

### Convert Product Info into Embeddings

In [29]:
import os
import openai
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv()) # read local .env file
# openai.api_key = os.getenv('OPENAI_API_KEY')

### Creating a Weaviate Client

In [30]:
import weaviate

In [31]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [32]:
client = weaviate.Client(
    url=os.getenv("WEAVIATE_URL"), # Replace with your Weaviate Cloud URL
    auth_client_secret=weaviate.auth.AuthApiKey(os.getenv("WEAVIATE_API_KEY")),  # Replace w/ your Weaviate instance API key
    additional_headers={"X-Cohere-Api-Key": os.environ['COHERE_API_KEY'],}
)

            Please consider upgrading to the latest version. See https://weaviate.io/developers/weaviate/client-libraries/python for details.


In [33]:
client.is_ready()

True

### Next, you will build a `VectorStoreIndex` from the Weaviate client to store your data in and interact with.

In [34]:
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.weaviate import WeaviateVectorStore

In [35]:
# construct vector store
vector_store = WeaviateVectorStore(
    weaviate_client=client, index_name="LlamaIndex"
)

In [36]:
# Set up the storage for the embeddings
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [79]:
# If an index with the same index name already exists within Weaviate, delete it
# if client.schema.exists("LlamaIndex"):
#     client.schema.delete_class("LlamaIndex")

# index = VectorStoreIndex(
#     nodes,
#     storage_context = storage_context,
# )

In [11]:
import json

index_name="LlamaIndex"
response = client.schema.get(index_name)

# print(json.dumps(response, indent=2))

### Loading the index

In [37]:
vector_store = WeaviateVectorStore(
    weaviate_client=client, index_name="LlamaIndex"
)

index = VectorStoreIndex.from_vector_store(vector_store)

### Query Index

In [39]:
query_engine = index.as_query_engine(                                                                           
    vector_store_query_mode="hybrid", similarity_top_k=2
)
response = query_engine.query(                                                                                     
    "salon chair",
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
{'e05f0fc2-15d3-40e1-a17f-d4ba60f6199b': {'window': 'offers a wide selection of professional salon products including styling chair , salon hairdryer , salon equipment , etc .  to showcase the uniqueness of each salon', 'original_text': 'to showcase the uniqueness of each salon', 'product_id': 7465, 'product_name': 'hair salon chair'}, 'ed7967fc-f7da-47c1-86c5-445027ee1fcd': {'window': 'office work chair beauty salon chair white', 'original_text': 'office work chair beauty salon chair white', 'product_id': 33690, 'product_name': 'office work chair beauty salon chair white'}}


In [43]:
print(response.metadata)

{'e05f0fc2-15d3-40e1-a17f-d4ba60f6199b': {'window': 'offers a wide selection of professional salon products including styling chair , salon hairdryer , salon equipment , etc .  to showcase the uniqueness of each salon', 'original_text': 'to showcase the uniqueness of each salon', 'product_id': 7465, 'product_name': 'hair salon chair'}, 'ed7967fc-f7da-47c1-86c5-445027ee1fcd': {'window': 'office work chair beauty salon chair white', 'original_text': 'office work chair beauty salon chair white', 'product_id': 33690, 'product_name': 'office work chair beauty salon chair white'}}


In [46]:
import json
print(json.dumps(response.metadata, indent=2))

{
  "e05f0fc2-15d3-40e1-a17f-d4ba60f6199b": {
    "window": "offers a wide selection of professional salon products including styling chair , salon hairdryer , salon equipment , etc .  to showcase the uniqueness of each salon",
    "original_text": "to showcase the uniqueness of each salon",
    "product_id": 7465,
    "product_name": "hair salon chair"
  },
  "ed7967fc-f7da-47c1-86c5-445027ee1fcd": {
    "window": "office work chair beauty salon chair white",
    "original_text": "office work chair beauty salon chair white",
    "product_id": 33690,
    "product_name": "office work chair beauty salon chair white"
  }
}


In [21]:
from IPython.display import Markdown, display

In [22]:
display(Markdown(f"<b>{response}</b>"))

<b>The products mentioned are suitable for outdoor use and are designed for bar settings.</b>

### Query Index with Hybrid Search

In [26]:
from llama_index.core.response.notebook_utils import display_response

In [45]:
query_engine = index.as_query_engine(
    vector_store_query_mode="hybrid", similarity_top_k=2
)
response = query_engine.query(
    "salon chair",
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [46]:
display_response(response)

**`Final Response:`** The product_id 7465 is associated with the product_name "hair salon chair," which is designed to showcase the uniqueness of each salon.

In [65]:
# from llama_index.core.postprocessor import MetadataReplacementPostProcessor

# # The target key defaults to `window` to match the node_parser's default
# postproc = MetadataReplacementPostProcessor(
#     target_metadata_key="window"
# )

In [61]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine(
    vector_store_query_mode="hybrid", similarity_top_k=5, alpha=0.0
)
response = query_engine.query(
    "salon chair",
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [62]:
display_response(response)

**`Final Response:`** The salon chair is designed to showcase the uniqueness of each salon and create a fashionable salon interior. It offers a wide selection of professional salon products including styling chair, salon hairdryer, salon equipment, etc. The salon chair is made with the highest quality parts to provide long-lasting performance and a comfortable experience for customers.

In [63]:
response.metadata

{'e05f0fc2-15d3-40e1-a17f-d4ba60f6199b': {'window': 'offers a wide selection of professional salon products including styling chair , salon hairdryer , salon equipment , etc .  to showcase the uniqueness of each salon',
  'original_text': 'to showcase the uniqueness of each salon',
  'product_id': 7465,
  'product_name': 'hair salon chair'},
 '81dead1c-a774-4be8-93bf-3ff79461f823': {'window': 'offers a wide selection of professional salon products including styling chair , salon hairdryer , salon equipment , etc .  to showcase the uniqueness of each salon',
  'original_text': 'offers a wide selection of professional salon products including styling chair , salon hairdryer , salon equipment , etc . ',
  'product_id': 7465,
  'product_name': 'hair salon chair'},
 'd3baf129-0497-47ad-a4ae-2bc95ea791d3': {'window': 'mercer41 beauty offers a wide selection professional beauty salon equipment including salon styling chair , salon hair dryer , barber pole light , etc .  to showcase the unique

In [56]:
# get manually labeled groundtruth lables
label_df = pd.read_csv("dataset/label.csv", sep='\t')

filtered_df = label_df[(label_df['query_id'] == 0) & (label_df['product_id'].isin([7465, 33690]))]
result_dict = filtered_df[['query_id', 'product_id', 'label']]
print(result_dict)

     query_id  product_id    label
80          0        7465    Exact
109         0       33690  Partial


{'query_id': [0, 0], 'product_id': [7465, 33690], 'label': ['Exact', 'Partial']}

In [55]:
label_df[(label_df['query_id'] == 0) & (label_df['product_id'] == 7465)]

Unnamed: 0,id,query_id,product_id,label
80,80,0,7465,Exact


In [64]:
label_df[(label_df['query_id'] == 0) & (label_df['product_id'] == 7468)]

Unnamed: 0,id,query_id,product_id,label
104,104,0,7468,Exact


In [65]:
label_df[(label_df['query_id'] == 0) & (label_df['product_id'] == 33691)]

Unnamed: 0,id,query_id,product_id,label
98,98,0,33691,Partial


### Query With MetadataReplacementPostProcessor

In [63]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor

In [64]:
query_engine = index.as_query_engine(
    vector_store_query_mode="hybrid", 
    similarity_top_k=2,
    # the target key defaults to `window` to match the node_parser's default
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ],
)

In [65]:
window_response = query_engine.query(
    "salon chair"
)
print(window_response)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
The product that falls under the category of a salon chair is the "hair salon chair."


In [66]:
import json
print(json.dumps(window_response.metadata, indent=2))

{
  "e05f0fc2-15d3-40e1-a17f-d4ba60f6199b": {
    "window": "offers a wide selection of professional salon products including styling chair , salon hairdryer , salon equipment , etc .  to showcase the uniqueness of each salon",
    "original_text": "to showcase the uniqueness of each salon",
    "product_id": 7465,
    "product_name": "hair salon chair"
  },
  "ed7967fc-f7da-47c1-86c5-445027ee1fcd": {
    "window": "office work chair beauty salon chair white",
    "original_text": "office work chair beauty salon chair white",
    "product_id": 33690,
    "product_name": "office work chair beauty salon chair white"
  }
}


In [67]:
window = window_response.source_nodes[0].node.metadata["window"]
sentence = window_response.source_nodes[0].node.metadata["original_text"]

print(f"Window: {window}")
print("------------------")
print(f"Original Sentence: {sentence}")

Window: offers a wide selection of professional salon products including styling chair , salon hairdryer , salon equipment , etc .  to showcase the uniqueness of each salon
------------------
Original Sentence: to showcase the uniqueness of each salon


In [68]:
for source_node in window_response.source_nodes:
    print(source_node.node.metadata["original_text"])
    print("--------")

to showcase the uniqueness of each salon
--------
office work chair beauty salon chair white
--------


### Custom Retrievers

In [69]:
from llama_index.core import Settings

nodes = Settings.node_parser.get_nodes_from_documents(documents)

In [70]:
from llama_index.core import StorageContext

# initialize storage context (by default it's in-memory)
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

In [89]:
from llama_index.core import SimpleKeywordTableIndex, VectorStoreIndex

# vector_index = VectorStoreIndex(nodes, storage_context=storage_context)
# keyword_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)

### LLM Reranker

In [72]:
import nest_asyncio

nest_asyncio.apply()

In [74]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [80]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import QueryBundle
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.postprocessor import LLMRerank
from llama_index.llms.openai import OpenAI

def get_retrieved_nodes(
    query_str, vector_top_k=10, reranker_top_n=2, with_reranker=False
):
    query_bundle = QueryBundle(query_str)
    retriever = VectorIndexRetriever(
        index=index,
        similarity_top_k=vector_top_k,
    )
    retrieved_nodes = retriever.retrieve(query_bundle)

    if with_reranker:
        reranker = LLMRerank(
            choice_batch_size=5,
            top_n=reranker_top_n,
        )
        retrieved_nodes = reranker.postprocess_nodes(
            retrieved_nodes, query_bundle
        )
    return retrieved_nodes

In [81]:
new_nodes = get_retrieved_nodes(
     "salon chair",
    vector_top_k=2,
    with_reranker=False,
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [92]:
for node in new_nodes:
    print(node)

Node ID: e05f0fc2-15d3-40e1-a17f-d4ba60f6199b
Text: to showcase the uniqueness of each salon
Score:  0.904

Node ID: 2c2fa1b0-e694-4eb6-a734-99f06ecb18b2
Text: it features a curved backrest design , an adjustable lift seat ,
and a five-star base with casters for easy mobility .
Score:  0.900



In [106]:
new_nodes = get_retrieved_nodes(
    "salon chair",
    reranker_top_n=2,
    with_reranker=True,
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [107]:
for node in new_nodes:
    print(node)

Node ID: e6cae8c0-df0a-4cea-8ecf-73792d09b11b
Text: barber chair salon chair styling chair salon chairs hair styling
chair haircut chair salon chair haircut chair salon barber chair heavy
duty barber chair shampoo chair hydraulic salon chair salon stool
chair beauty hair chair shampoo chair adjustable swivel barber chairs
shampoo chair with leg rest salon stool for hair stylist stool salon
haircu...
Score:  10.000

Node ID: e05f0fc2-15d3-40e1-a17f-d4ba60f6199b
Text: to showcase the uniqueness of each salon
Score:  8.000



In [102]:
query_engine = index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[
        LLMRerank(
            choice_batch_size=5,
            top_n=2,
        )
    ],
    response_mode="tree_summarize",
)
response = query_engine.query(
    "salon chair",
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST h

In [103]:
print(json.dumps(response.metadata, indent=2))

{
  "e6cae8c0-df0a-4cea-8ecf-73792d09b11b": {
    "window": "double-reinforced saddle sticking prevents ripping and tearing .  small cell high density foam for extra comfort of your client .  easy to assemble and clean .  barber chair salon chair styling chair salon chairs hair styling chair haircut chair salon chair haircut chair salon barber chair heavy duty barber chair shampoo chair hydraulic salon chair salon stool chair beauty hair chair shampoo chair adjustable swivel barber chairs shampoo chair with leg rest salon stool for hair stylist stool salon haircut chair home hydraulic lift styling chair heavy duty salon chair .",
    "original_text": "barber chair salon chair styling chair salon chairs hair styling chair haircut chair salon chair haircut chair salon barber chair heavy duty barber chair shampoo chair hydraulic salon chair salon stool chair beauty hair chair shampoo chair adjustable swivel barber chairs shampoo chair with leg rest salon stool for hair stylist stool salon

In [105]:
from llama_index.core.response.pprint_utils import pprint_response

pprint_response(response, show_source=True)

Final Response: The products mentioned include a massage chair, barber
chairs, styling chairs, salon chairs, hair styling chairs, haircut
chairs, heavy duty barber chairs, shampoo chairs, hydraulic salon
chairs, salon stool chairs, beauty hair chairs, adjustable swivel
barber chairs, shampoo chairs with leg rest, salon stools for hair
stylists, hydraulic lift styling chairs, and heavy duty salon chairs.
______________________________________________________________________
Source Node 1/2
Node ID: e6cae8c0-df0a-4cea-8ecf-73792d09b11b
Similarity: 10.0
Text: barber chair salon chair styling chair salon chairs hair styling
chair haircut chair salon chair haircut chair salon barber chair heavy
duty barber chair shampoo chair hydraulic salon chair salon stool
chair beauty hair chair shampoo chair adjustable swivel barber chairs
shampoo chair with leg rest salon stool for hair stylist stool salon
haircu...
______________________________________________________________________
Source Node 2/2

In [104]:
label_df[(label_df['query_id'] == 0) & (label_df['product_id'] == 15612)]

Unnamed: 0,id,query_id,product_id,label
101,101,0,15612,Exact


In [118]:
from llama_index.postprocessor.cohere_rerank import CohereRerank

In [124]:
api_key = os.environ["COHERE_API_KEY"]
cohere_rerank = CohereRerank(api_key=api_key, top_n=2)

In [125]:
query_engine = index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[cohere_rerank],
)
response = query_engine.query(
    "salon chair",
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.cohere.ai/v1/rerank "HTTP/1.1 200 OK"
HTTP Request: POST https://api.cohere.ai/v1/rerank "HTTP/1.1 200 OK"
HTTP Request: POST https://api.cohere.ai/v1/rerank "HTTP/1.1 200 OK"
HTTP Request: POST https://api.cohere.ai/v1/rerank "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [126]:
print(json.dumps(response.metadata, indent=2))

{
  "814c82a2-d82b-4214-be7b-dbdc8113fcf4": {
    "window": "salon chairs are a wonderful avenue for hairstylists and barbers to increase their revenue - have a comfortable chair for customers to enjoy during their visit .",
    "original_text": "salon chairs are a wonderful avenue for hairstylists and barbers to increase their revenue - have a comfortable chair for customers to enjoy during their visit .",
    "product_id": 25431,
    "product_name": "barberpub salon massage chair"
  },
  "e05f0fc2-15d3-40e1-a17f-d4ba60f6199b": {
    "window": "offers a wide selection of professional salon products including styling chair , salon hairdryer , salon equipment , etc .  to showcase the uniqueness of each salon",
    "original_text": "to showcase the uniqueness of each salon",
    "product_id": 7465,
    "product_name": "hair salon chair"
  }
}


In [129]:
label_df[(label_df['query_id'] == 0) & (label_df['product_id'] == 25431)]

Unnamed: 0,id,query_id,product_id,label
29,29,0,25431,Exact


## Advanced techniques for Generation

In [109]:
import time
from llama_index.core import VectorStoreIndex
from llama_index.core.postprocessor import SentenceEmbeddingOptimizer

In [110]:
print("Without optimization")
start_time = time.time()
query_engine = index.as_query_engine()
response = query_engine.query("what did users want for salon chair?")
end_time = time.time()
print("Total time elapsed: {}".format(end_time - start_time))
print("Answer: {}".format(response))

Without optimization
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Total time elapsed: 1.756270170211792
Answer: Users wanted a comfortable chair for customers to enjoy during their visit.


In [111]:
print(json.dumps(response.metadata, indent=2))

{
  "e05f0fc2-15d3-40e1-a17f-d4ba60f6199b": {
    "window": "offers a wide selection of professional salon products including styling chair , salon hairdryer , salon equipment , etc .  to showcase the uniqueness of each salon",
    "original_text": "to showcase the uniqueness of each salon",
    "product_id": 7465,
    "product_name": "hair salon chair"
  },
  "814c82a2-d82b-4214-be7b-dbdc8113fcf4": {
    "window": "salon chairs are a wonderful avenue for hairstylists and barbers to increase their revenue - have a comfortable chair for customers to enjoy during their visit .",
    "original_text": "salon chairs are a wonderful avenue for hairstylists and barbers to increase their revenue - have a comfortable chair for customers to enjoy during their visit .",
    "product_id": 25431,
    "product_name": "barberpub salon massage chair"
  }
}


In [112]:
label_df[(label_df['query_id'] == 0) & (label_df['product_id'] == 25431)]

Unnamed: 0,id,query_id,product_id,label
29,29,0,25431,Exact


In [114]:
print("With optimization")
start_time = time.time()
query_engine = index.as_query_engine(
    node_postprocessors=[SentenceEmbeddingOptimizer(percentile_cutoff=0.7)]
)
response = query_engine.query( "what did users want for salon chair?")
end_time = time.time()
print("Total time elapsed: {}".format(end_time - start_time))
print("Answer: {}".format(response))

With optimization
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/

In [115]:
print(json.dumps(response.metadata, indent=2))

{
  "e05f0fc2-15d3-40e1-a17f-d4ba60f6199b": {
    "window": "offers a wide selection of professional salon products including styling chair , salon hairdryer , salon equipment , etc .  to showcase the uniqueness of each salon",
    "original_text": "to showcase the uniqueness of each salon",
    "product_id": 7465,
    "product_name": "hair salon chair"
  },
  "814c82a2-d82b-4214-be7b-dbdc8113fcf4": {
    "window": "salon chairs are a wonderful avenue for hairstylists and barbers to increase their revenue - have a comfortable chair for customers to enjoy during their visit .",
    "original_text": "salon chairs are a wonderful avenue for hairstylists and barbers to increase their revenue - have a comfortable chair for customers to enjoy during their visit .",
    "product_id": 25431,
    "product_name": "barberpub salon massage chair"
  }
}


In [116]:
print("Alternate optimization cutoff")
start_time = time.time()
query_engine = index.as_query_engine(
    node_postprocessors=[SentenceEmbeddingOptimizer(percentile_cutoff=0.1)]
)
response = query_engine.query( "what did users want for salon chair?")
end_time = time.time()
print("Total time elapsed: {}".format(end_time - start_time))
print("Answer: {}".format(response))

Alternate optimization cutoff
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.

In [117]:
print(json.dumps(response.metadata, indent=2))

{
  "e05f0fc2-15d3-40e1-a17f-d4ba60f6199b": {
    "window": "offers a wide selection of professional salon products including styling chair , salon hairdryer , salon equipment , etc .  to showcase the uniqueness of each salon",
    "original_text": "to showcase the uniqueness of each salon",
    "product_id": 7465,
    "product_name": "hair salon chair"
  },
  "814c82a2-d82b-4214-be7b-dbdc8113fcf4": {
    "window": "salon chairs are a wonderful avenue for hairstylists and barbers to increase their revenue - have a comfortable chair for customers to enjoy during their visit .",
    "original_text": "salon chairs are a wonderful avenue for hairstylists and barbers to increase their revenue - have a comfortable chair for customers to enjoy during their visit .",
    "product_id": 25431,
    "product_name": "barberpub salon massage chair"
  }
}


### Evaluation

In [None]:
query_df = pd.read_csv("dataset/query.csv", sep='\t')
n = 10
query = query_df.head(n)

In [None]:
def process_row(row):
    query_id = row['query_id']
    item = row['query']
    raw_response = query_engine.query(item)
    results = []
    for v in raw_response.metadata.values():
        results.append({
            'query_id': query_id,
            'query': item,
            'product_id': v['product_id'],
            'product_name': v['product_name'],
            'product_text': v['original_text'],
        })
    return results

In [None]:
results = query.apply(process_row, axis=1).explode().tolist()
df = pd.DataFrame(results)