### Import Packages

In [1]:
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


### Read Product CSV File

In [2]:
product_path = "../datasets/products.csv"

In [3]:
product_df = pd.read_csv(product_path)

In [4]:
product_df.head(1)

Unnamed: 0.1,Unnamed: 0,id,Product Name,Category,Description,Selling Price,Image,Dimensions,ItemWeight,ShippingWeight,ASIN,ItemModelNumber,RecommendedAge
0,0,8486ef0f-6ebe-456b-b9c7-ce473963d1f1,crafttastic empower poster craft kit design...,"('Toys & Games', 'Arts & Crafts', 'Craft Kits'...",perfect gift for ages 8 and above make this fe...,14.47,https://images-na.ssl-images-amazon.com/images...,3x10x15,15.2,15.2,B01D52Q1UC,CT1688,8-15years


### Make Data Frame For Vector Database 

In [5]:
vector_df = pd.DataFrame()

In [6]:
vector_df["category"] = product_df["Category"].apply(lambda x: " ".join(eval(x)))

In [7]:
vector_df.head(3)

Unnamed: 0,category
0,Toys & Games Arts & Crafts Craft Kits Paper Craft
1,Toys & Games Games & Accessories Board Games
2,Toys & Games Hobbies Remote & App Controlled V...


In [8]:
vector_df["text"] = product_df["Product Name"] + " " + product_df["Description"]

In [9]:
vector_df.head(3)

Unnamed: 0,category,text
0,Toys & Games Arts & Crafts Craft Kits Paper Craft,crafttastic empower poster craft kit design...
1,Toys & Games Games & Accessories Board Games,melissa doug dottodot letter coloring pad 3 ...
2,Toys & Games Hobbies Remote & App Controlled V...,rpm rear shock tower for the nitro slash nitro...


In [10]:
vector_df["combined"] = vector_df["text"] + " " + vector_df["category"]

In [11]:
vector_df.head()

Unnamed: 0,category,text,combined
0,Toys & Games Arts & Crafts Craft Kits Paper Craft,crafttastic empower poster craft kit design...,crafttastic empower poster craft kit design...
1,Toys & Games Games & Accessories Board Games,melissa doug dottodot letter coloring pad 3 ...,melissa doug dottodot letter coloring pad 3 ...
2,Toys & Games Hobbies Remote & App Controlled V...,rpm rear shock tower for the nitro slash nitro...,rpm rear shock tower for the nitro slash nitro...
3,Toys & Games Play Vehicles Toy Vehicles,disney pixar cars mini racers crank crash der...,disney pixar cars mini racers crank crash der...
4,Toys & Games Puzzles Brain Teasers Assembly & ...,areaware cubebot small great condition,areaware cubebot small great condition Toys & ...


In [12]:
vector_df.to_numpy()[0][2]

'crafttastic  empower poster  craft kit  design a oneofakind inspirational poster perfect gift for ages 8 and above make this feelgood poster by choosing the empowering words that describe why youre wonderful and truly unique Toys & Games Arts & Crafts Craft Kits Paper Craft'

### Make Embedding Model

In [13]:
model_name = "all-MiniLM-L6-v2"

In [14]:
model = SentenceTransformer(model_name)

In [15]:
test_encode = model.encode(
    ["Intent Search"],
    batch_size=256,
    show_progress_bar=True,
)

Batches: 100%|██████████| 1/1 [00:00<00:00, 44.89it/s]


In [16]:
test_encode

array([[ 5.27039357e-02, -1.87651347e-02, -1.23996204e-02,
        -6.82493523e-02, -1.60217378e-02,  6.08501323e-02,
         7.23666400e-02, -1.09136729e-02, -8.70088581e-03,
        -4.19286974e-02, -1.21289035e-02,  5.83972894e-02,
         2.52426174e-02,  3.68633308e-02, -1.48207117e-02,
        -6.92350976e-03,  3.93020138e-02, -6.47709565e-03,
        -4.87710126e-02, -1.81834754e-02,  9.50361863e-02,
         4.05716002e-02,  3.73550132e-02, -4.62344252e-02,
        -2.78359707e-02, -3.89942434e-03, -2.66977884e-02,
        -7.12052733e-02,  4.29163948e-02, -3.42337154e-02,
         7.72332102e-02,  9.65119302e-02,  1.56645626e-01,
         7.88405985e-02, -1.09409690e-02, -6.49769679e-02,
        -1.00952044e-01, -2.64214221e-02,  2.33069304e-02,
        -1.93701498e-02, -5.45176156e-02, -2.23043915e-02,
        -1.14275981e-02,  7.42796361e-02, -3.09175849e-02,
         4.47752187e-03, -4.82856892e-02, -4.90691960e-02,
         7.57272393e-02,  4.03110124e-03, -4.45104428e-0

In [17]:
test_encode.shape

(1, 384)

In [18]:
test_encode.dtype

dtype('float32')

In [19]:
vector_df["combined"] = vector_df["combined"].apply(
    lambda x: str(x) if isinstance(x, float) else x
)

### Embedding Vector Data Frame ( COMBINED )

In [20]:
vector_df['combined'].to_list()

['crafttastic  empower poster  craft kit  design a oneofakind inspirational poster perfect gift for ages 8 and above make this feelgood poster by choosing the empowering words that describe why youre wonderful and truly unique Toys & Games Arts & Crafts Craft Kits Paper Craft',
 'melissa  doug dottodot  letter coloring pad 3 pack abc farm 123 pets abc123 wild animals 3 jumbo connectthedots coloring pads abc farm 123 pets abc123 wild animals Toys & Games Games & Accessories Board Games',
 'rpm rear shock tower for the nitro slash nitro stampede nitro rustler and nitro sport black great condition Toys & Games Hobbies Remote & App Controlled Vehicles & Parts Remote & App Controlled Vehicle Parts',
 'disney pixar cars mini racers crank  crash derby playset disneypixar cars 3 new crazy 8 track Toys & Games Play Vehicles Toy Vehicles',
 'areaware cubebot small great condition Toys & Games Puzzles Brain Teasers Assembly & Disentanglement Puzzles',
 'melissa  doug giddyup  play baby activity t

In [21]:
embeddings = model.encode(
    vector_df["combined"].to_list(),
    batch_size=256,
    show_progress_bar=True,
)

Batches: 100%|██████████| 131/131 [07:47<00:00,  3.57s/it]


In [27]:
embeddings

array([[-1.02486178e-01,  1.24786116e-01,  6.47154227e-02, ...,
         3.89768220e-02,  8.45569652e-03,  1.96186099e-02],
       [ 8.54034163e-03, -5.95801473e-02,  6.35301992e-02, ...,
         6.39676228e-02, -4.35977103e-03, -7.40236836e-03],
       [-1.10650949e-01, -1.21332360e-02,  2.29156539e-02, ...,
         2.64788419e-02, -3.13764252e-02,  4.23190966e-02],
       ...,
       [-7.74671510e-02,  1.88781042e-02,  4.37666550e-02, ...,
         9.81596634e-02, -5.09832837e-02,  9.99077130e-03],
       [ 2.17688046e-02,  4.83368412e-02, -3.15467764e-06, ...,
        -1.38425427e-02,  1.66691616e-02,  2.55134776e-02],
       [-3.26752327e-02, -5.93887940e-02,  2.36065295e-02, ...,
        -4.66995165e-02,  6.16462110e-03,  8.31619501e-02]],
      shape=(33324, 384), dtype=float32)

### Save Embedding Into Vector Database ( FAISS )

In [28]:
dimension = 384

In [29]:
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

In [30]:
index

<faiss.swigfaiss_avx2.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x7547249838a0> >

In [31]:
faiss.write_index(index, "../database/products_vector.bin")