# Using searchlite for semantic search workflow

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("../data/interim/racquets_trimmed.csv", index_col = 0)

racquet_brand = [row.split(" ")[0] for row in df["racquet_name"]]

df.insert(1, "racquet_brand", racquet_brand)

df

Unnamed: 0,racquet_img,racquet_brand,racquet_name,racquet_rating,racquet_price,racquet_desc,racquet_swingweight,racquet_composition,racquet_power,racquet_stroke_style,...,racquet_length_in,racquet_balance_in,racquet_strung_weight_oz,racquet_balance_HH_HL,racquet_stiffness,racquet_avg_beam_width,racquet_mains,racquet_crosses,racquet_tension_lower,racquet_tension_upper
0,https://img.tennis-warehouse.com/watermark/rs....,Babolat,Babolat Pure Drive 2025,4.8,289.00,The Pure Drive is popular for a reason. Boast...,317.0,Graphite,Low-Medium,Medium-Full,...,27.0,12.99,11.2,4.0,69.0,24.000000,16.0,19.0,46.0,55.0
1,https://img.tennis-warehouse.com/watermark/rs....,Babolat,Babolat Pure Drive 98 2025,4.5,299.00,Originally launched in 2019 under the VS moni...,326.0,Graphite,Low-Medium,Medium-Full,...,27.0,13.18,11.4,3.0,69.0,21.666666,16.0,20.0,46.0,55.0
2,https://img.tennis-warehouse.com/watermark/rs....,Babolat,Babolat Pure Drive 98 2-Pack 2025,5.0,579.00,This product is for 2 Pure Drive 98 racquets....,323.0,Graphite,Low-Medium,Medium-Full,...,27.0,13.18,11.4,3.0,69.0,21.666666,16.0,20.0,46.0,55.0
3,https://img.tennis-warehouse.com/watermark/rs....,Babolat,Babolat Pure Drive Plus 2025,5.0,289.00,Babolat adds another chapter to one of the ga...,325.0,Graphite,Low-Medium,Medium-Full,...,27.5,13.00,11.2,6.0,69.0,24.000000,16.0,19.0,46.0,55.0
4,https://img.tennis-warehouse.com/watermark/rs....,Babolat,Babolat Pure Drive Team 2025,5.0,269.00,The Pure Drive Team 2025 is defined by its us...,308.0,Graphite,Low-Medium,Medium-Full,...,27.0,12.85,10.6,5.0,69.0,24.000000,16.0,19.0,44.0,53.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,https://img.tennis-warehouse.com/watermark/rs....,Solinco,Solinco Blackout 300 XTD,4.8,229.99,"With the Blackout 300 XTD, Solinco takes the ...",328.0,40T Carbon/Graphite,Low-Medium,Medium-Full,...,27.5,12.80,11.3,8.0,70.0,24.166666,16.0,19.0,50.0,60.0
393,https://img.tennis-warehouse.com/watermark/rs....,Solinco,Solinco Blackout 300 XTD+,5.0,229.99,"With the Blackout 300 XTD+, Solinco gives adv...",333.0,40T Carbon/Graphite,Low-Medium,Medium-Full,...,28.0,12.80,11.3,10.0,66.0,24.166666,16.0,19.0,50.0,60.0
394,https://img.tennis-warehouse.com/watermark/rs....,Lacoste,Lacoste L23,4.5,199.00,Introducing the Lascoste L23! Following on th...,318.0,Graphite,Low-Medium,Medium-Full,...,27.0,12.90,11.1,5.0,69.0,23.666666,16.0,19.0,51.0,55.0
395,https://img.tennis-warehouse.com/watermark/rs....,Lacoste,Lacoste L23L,5.0,199.00,Lacoste makes impressive updates to the L23L ...,310.0,Graphite,Low-Medium,Medium-Full,...,27.0,13.40,10.2,1.0,,23.666666,16.0,19.0,51.0,55.0


In [3]:
texts = df["racquet_desc"]
metadata = df[["racquet_brand", "racquet_name", "racquet_rating", "racquet_price"]].to_dict(orient = "records")

In [4]:
texts[0:3]

0     The Pure Drive is popular for a reason. Boast...
1     Originally launched in 2019 under the VS moni...
2     This product is for 2 Pure Drive 98 racquets....
Name: racquet_desc, dtype: object

In [5]:
metadata[0:3]

[{'racquet_brand': 'Babolat',
  'racquet_name': 'Babolat Pure Drive 2025',
  'racquet_rating': 4.8,
  'racquet_price': 289.0},
 {'racquet_brand': 'Babolat',
  'racquet_name': 'Babolat Pure Drive 98 2025',
  'racquet_rating': 4.5,
  'racquet_price': 299.0},
 {'racquet_brand': 'Babolat',
  'racquet_name': 'Babolat Pure Drive 98 2-Pack 2025',
  'racquet_rating': 5.0,
  'racquet_price': 579.0}]

In [6]:
from searchlite.document import Document

In [7]:
doc = Document(texts = texts, metadata = metadata)

In [8]:
doc

Document instance with 324 texts. Metadata contains the following fields: racquet_brand, racquet_name, racquet_rating, racquet_price. Embeddings: Not Ready.
Embedder:TFIDFEmbedder object implemented using scikit-learn.
 Embedder fitted: True

In [9]:
doc.embed()

In [10]:
res = doc.query(query_text = "beginner-friendly racquet with easy power")

In [11]:
doc.display_results(output_list_dicts = res, style = "tabulate")

+-----------------+----------------------+------------------+-----------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [12]:
from searchlite.embedders.sentence_transformer import SentenceTransformerEmbedder

In [13]:
embedder = SentenceTransformerEmbedder(model_name = "all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
embedder

This embedder is a SentenceTransformer instance in a wrapper.
Sentence Transformer __repr__: SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

In [15]:
doc_st = Document(texts = texts, metadata = metadata, embedder = embedder)

In [16]:
doc_st

Document instance with 324 texts. Metadata contains the following fields: racquet_brand, racquet_name, racquet_rating, racquet_price. Embeddings: Not Ready.
Embedder:This embedder is a SentenceTransformer instance in a wrapper.
Sentence Transformer __repr__: SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

In [17]:
doc_st.embed()

In [18]:
res = doc.query(query_text = "beginner-friendly racquet with easy power")

In [19]:
doc.display_results(output_list_dicts = res, style = "tabulate")

+-----------------+----------------------+------------------+-----------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------