In [1]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct

import pandas as pd
import openai

  from .autonotebook import tqdm as notebook_tqdm


### Read the sampled dataset with Amazon inventory data

In [2]:
df_items = pd.read_json("../../data/meta_Electronics_2022_2023_with_category_ratings_100_sample_1000.jsonl", lines=True)

In [3]:
df_items.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,All Electronics,2 Pack-iPhone Earbuds Wired Lightning Headphon...,3.4,598,[],[],,[{'thumb': 'https://m.media-amazon.com/images/...,[],WASABI MANGO,"[Electronics, Headphones, Earbuds & Accessorie...",{'Product Dimensions': '23.62 x 19.69 x 27.56 ...,B0B1ZVC7GJ,,,
1,Computers,"Mini PC 16GB DDR4 256GB M.2 SSD,Quad-Core 2.7G...",4.3,450,[„ÄêMeet to Sufficient Memory Storage„ÄëThis Mini ...,[],,[{'thumb': 'https://m.media-amazon.com/images/...,[],OUVISLITE,"[Electronics, Computers & Accessories, Compute...","{'Screen Resolution': '3840 x 2160', 'Max Scre...",B0B1HNV2V9,,,
2,Computers,Samsers Foldable Bluetooth Keyboard with Touch...,4.4,308,[„ÄêFull-size Folding Wireless Keyboard„Äë Samsers...,[],48.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Love this keyboard!', 'url': 'http...",Samsers,"[Electronics, Computers & Accessories, Compute...",{'Product Dimensions': '13.5 x 4.5 x 0.4 inche...,B0C2Q8BDTX,,,
3,Computers,"Rolling Laptop Bag Women with Wheels, Rolling ...",4.5,152,[MOBILE OFFICE: EMPSIGN rolling bag with lapto...,[rolling laptop bag],,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Elegant Laptop Tote Bag for Women ...,Ytonet,"[Electronics, Computers & Accessories, Laptop ...","{'Brand': 'Ytonet', 'Item model number': '1332...",B092Z9CTNK,,,
4,All Electronics,"Wireless Mouse, 2.4G Silent Mouse with USB Rec...",4.6,677,[Plug & Play Super Easy to Use- Just plug and ...,[],10.99,[{'thumb': 'https://m.media-amazon.com/images/...,[],MagoFeliz,"[Electronics, Computers & Accessories, Compute...",{'Product Dimensions': '4.69 x 2.6 x 0.01 inch...,B0C77L1G7V,,,


In [4]:
list(df_items["features"].items())[0]

(0, [])

In [5]:
list(df_items["images"].items())[0]

(0,
 [{'thumb': 'https://m.media-amazon.com/images/I/31eziY1O3EL._AC_US40_.jpg',
   'large': 'https://m.media-amazon.com/images/I/31eziY1O3EL._AC_.jpg',
   'variant': 'MAIN',
   'hi_res': 'https://m.media-amazon.com/images/I/51tDCwwyEgL._AC_SL1417_.jpg'},
  {'thumb': 'https://m.media-amazon.com/images/I/51J0Iti28iL._AC_US40_.jpg',
   'large': 'https://m.media-amazon.com/images/I/51J0Iti28iL._AC_.jpg',
   'variant': 'PT01',
   'hi_res': 'https://m.media-amazon.com/images/I/71voGQrX7nL._AC_SL1276_.jpg'},
  {'thumb': 'https://m.media-amazon.com/images/I/419vSzdw3pL._AC_US40_.jpg',
   'large': 'https://m.media-amazon.com/images/I/419vSzdw3pL._AC_.jpg',
   'variant': 'PT02',
   'hi_res': 'https://m.media-amazon.com/images/I/61Iub5dt2SL._AC_SL1500_.jpg'},
  {'thumb': 'https://m.media-amazon.com/images/I/4111pWhpUIL._AC_US40_.jpg',
   'large': 'https://m.media-amazon.com/images/I/4111pWhpUIL._AC_.jpg',
   'variant': 'PT03',
   'hi_res': 'https://m.media-amazon.com/images/I/612nAIai-WL._AC_SL1

### Preprocess title and features

In [6]:
def preprocess_description(row):
    return f"{row['title']} {' '.join(row['features'])}"

In [7]:
def extract_first_large_image(row):
    return row["images"][0].get("large", "")

In [8]:
df_items["description"] = df_items.apply(preprocess_description, axis=1)
df_items["image"] = df_items.apply(extract_first_large_image, axis=1)

In [9]:
df_items.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author,image
0,All Electronics,2 Pack-iPhone Earbuds Wired Lightning Headphon...,3.4,598,[],2 Pack-iPhone Earbuds Wired Lightning Headphon...,,[{'thumb': 'https://m.media-amazon.com/images/...,[],WASABI MANGO,"[Electronics, Headphones, Earbuds & Accessorie...",{'Product Dimensions': '23.62 x 19.69 x 27.56 ...,B0B1ZVC7GJ,,,,https://m.media-amazon.com/images/I/31eziY1O3E...
1,Computers,"Mini PC 16GB DDR4 256GB M.2 SSD,Quad-Core 2.7G...",4.3,450,[„ÄêMeet to Sufficient Memory Storage„ÄëThis Mini ...,"Mini PC 16GB DDR4 256GB M.2 SSD,Quad-Core 2.7G...",,[{'thumb': 'https://m.media-amazon.com/images/...,[],OUVISLITE,"[Electronics, Computers & Accessories, Compute...","{'Screen Resolution': '3840 x 2160', 'Max Scre...",B0B1HNV2V9,,,,https://m.media-amazon.com/images/I/31d48aRFMM...
2,Computers,Samsers Foldable Bluetooth Keyboard with Touch...,4.4,308,[„ÄêFull-size Folding Wireless Keyboard„Äë Samsers...,Samsers Foldable Bluetooth Keyboard with Touch...,48.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Love this keyboard!', 'url': 'http...",Samsers,"[Electronics, Computers & Accessories, Compute...",{'Product Dimensions': '13.5 x 4.5 x 0.4 inche...,B0C2Q8BDTX,,,,https://m.media-amazon.com/images/I/51bW+ihg8x...
3,Computers,"Rolling Laptop Bag Women with Wheels, Rolling ...",4.5,152,[MOBILE OFFICE: EMPSIGN rolling bag with lapto...,"Rolling Laptop Bag Women with Wheels, Rolling ...",,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Elegant Laptop Tote Bag for Women ...,Ytonet,"[Electronics, Computers & Accessories, Laptop ...","{'Brand': 'Ytonet', 'Item model number': '1332...",B092Z9CTNK,,,,https://m.media-amazon.com/images/I/01RmK+J4pJ...
4,All Electronics,"Wireless Mouse, 2.4G Silent Mouse with USB Rec...",4.6,677,[Plug & Play Super Easy to Use- Just plug and ...,"Wireless Mouse, 2.4G Silent Mouse with USB Rec...",10.99,[{'thumb': 'https://m.media-amazon.com/images/...,[],MagoFeliz,"[Electronics, Computers & Accessories, Compute...",{'Product Dimensions': '4.69 x 2.6 x 0.01 inch...,B0C77L1G7V,,,,https://m.media-amazon.com/images/I/31cV2lTLOG...


In [10]:
list(df_items["description"].items())[0]

(0,
 '2 Pack-iPhone Earbuds Wired Lightning Headphone„ÄêApple MFi Certified„Äëin-Ear Headset Stereo Noise Canceling with Built-in Microphone & Volume Control Compatible with iPhone 13/12/11/SE/X/XR/8/7-All iOS ')

### Sample 50 items from the dataset

In [11]:
df_sample = df_items.sample(50, random_state=42)

In [12]:
len(df_sample)

50

In [13]:
data_to_embed = df_sample[["description", "image", "rating_number", "price", "average_rating", "parent_asin"]].to_dict(orient="records")

In [14]:
data_to_embed

[{'description': 'YKZ 3 in 1 Magnetic C to C Cable 4 Pack, Magnet Type C to Type C Charging Cable, [3.3FT+6.0FT] PD 60W Fast Charger Nylon Braided Cord for Type C/MicroUSB and i-Product „Äê3 in 1 USB C to USB C cable„Äë- This cable has 3 different magnet connectors, suitable for Type-c, Micro-USB devices and i-Product. (NOTE: This is the USB-C output cable, you may need a USB C wall charger to charge your device.) „ÄêPD 60W Fast Charge„Äë- YKZ type C to type C Cable supports Power Delivery fast charge up to 20V/3A (max), 15V/3A, 12V/3A, 9V/3A, 5V/3A at 60W with USB-C PD adapters such as PD 18W/30W/45W/60W. (Note: Charging speeds also depends on your device and charger. ) „ÄêMagnetic Charging Cable„Äë- Magnetic charging cable has a strong magnet that can attract the phone without falling off. You can easily connect with one hand while driving or doing other things. Besides, Magnetic tips can be used as a dust plug that reducing abrasion during inserting or plugging out the cable, greatl

### Define the embedding function

In [15]:
response = openai.embeddings.create(
    input="Random text",
    model="text-embedding-3-small",
)

In [16]:
len(response.data[0].embedding)

1536

In [17]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )
    return response.data[0].embedding

In [18]:
get_embedding("Hi")

[-0.0069594960659742355,
 -0.035274259746074677,
 0.0015957315918058157,
 0.06534460932016373,
 0.03293841332197189,
 -0.024201158434152603,
 -0.02610827423632145,
 0.04937804862856865,
 0.01623266376554966,
 -0.05168433114886284,
 -0.013357206247746944,
 -0.014599049463868141,
 -0.026019571349024773,
 -0.003257990349084139,
 0.024585537612438202,
 0.001171619864180684,
 -0.05345839262008667,
 0.015057348646223545,
 0.011487049050629139,
 0.03394371271133423,
 0.04934848099946976,
 0.020372141152620316,
 -0.01396334357559681,
 0.01887897402048111,
 0.017149262130260468,
 0.024156806990504265,
 0.01827283576130867,
 -0.0011956436792388558,
 0.01955902948975563,
 -0.03678221255540848,
 0.027675362303853035,
 -0.028207581490278244,
 0.027645794674754143,
 -0.01623266376554966,
 -0.011716199107468128,
 -0.01604047417640686,
 -0.01407422311604023,
 0.03758053854107857,
 0.01887897402048111,
 -0.037698812782764435,
 0.04343494400382042,
 -0.012411040253937244,
 0.020948711782693863,
 0.01355

### Create Qdrant collection

In [19]:
qdrant_client = QdrantClient(url="http://localhost:6333")

In [29]:
qdrant_client.create_collection(
    collection_name="Amazon-items-collection-00",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

True

### Embed data

##### Test

In [30]:
pointstruct = PointStruct(
    id=0,
    vector=get_embedding("Test text"),
    payload={
        "text": "Test text",
        "model": "text-embedding-3-small",
    },
)

In [31]:
pointstruct

PointStruct(id=0, vector=[-0.020057253539562225, 0.006970119196921587, 0.037700485438108444, -0.040323127061128616, -0.01916317082941532, -0.0343029722571373, 0.0005480913096107543, -0.02425944246351719, 0.03871377930045128, 0.0015953787369653583, 0.030667034909129143, 0.012792832218110561, -0.010863103903830051, 0.011287793517112732, 0.02639034017920494, 0.04303517937660217, -0.046820126473903656, -0.00690678833052516, -0.02174110896885395, 0.05108192190527916, 0.0070856050588190556, 0.013113211840391159, 0.01130269467830658, -0.032544609159231186, -0.00393396383151412, -0.039041608572006226, -0.036955416202545166, 0.004965884145349264, 0.05841339752078056, -0.07462609559297562, 0.031382299959659576, -0.044018667191267014, -0.00025891143013723195, -0.010974864475429058, 0.006005255039781332, 0.03814752772450447, 0.03030940145254135, 0.037640880793333054, 0.010274499654769897, -0.029355714097619057, -0.014916278421878815, -0.010386260226368904, 0.021279167383909225, 0.01814987696707248

### Amazon data

In [32]:
pointstructs = []
for i, data in enumerate(data_to_embed):
    embediing = get_embedding(data["description"])
    pointstructs.append(
        PointStruct(
            id=i,
            vector=embediing,
            payload=data,
        )
    )

In [33]:
pointstructs

[PointStruct(id=0, vector=[0.019009515643119812, -0.03079500049352646, -0.033127330243587494, -0.01179580669850111, -0.056595101952552795, -0.0018679273780435324, -0.013663733378052711, 0.020227279514074326, 0.018173592165112495, -0.028008589521050453, 0.021383123472332954, -0.0010307140182703733, -0.026481224223971367, -0.008581114001572132, 0.031187161803245544, 0.041920006275177, -0.06159000098705292, -0.016801025718450546, 0.009607957676053047, 0.02278664894402027, 0.004179616458714008, 0.04367441311478615, 0.03564542159438133, -0.011073403991758823, -0.04388081282377243, -0.02462361752986908, 0.023550333455204964, 0.0410737618803978, 0.012518209405243397, 0.016027022153139114, -0.02553177997469902, -0.01791558973491192, -0.027513228356838226, -0.03420061618089676, -0.040764160454273224, 0.013508932664990425, -0.017162228003144264, 0.025160258635878563, 0.028772274032235146, 0.0038080948870629072, 0.009721478447318077, -0.012961970642209053, 0.049990277737379074, -0.014747338369488

In [34]:
len(pointstructs)

50

### Write embedded data to Qdrant

In [35]:
qdrant_client.upsert(
    collection_name="Amazon-items-collection-00",
    wait=True,
    points=pointstructs,
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

### Define a function for data retrieval

In [36]:
def retrieve_data(query, k=5):
    query_embedding = get_embedding(query)
    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )
    return results

### Test retrieval

In [37]:
retrieve_data("What kind of charging cords do you offer?", k=10).points

[ScoredPoint(id=13, version=1, score=0.45503902, payload={'description': 'Surge Protector Power Strip Flat Plug, Swivel Extension Cord 6 Feet 3 Prong, 4 USB Ports(PD 24W), 8 Outlets, 2700 Joules, 15A/1875W, Circuit Breaker Switch, SGS Approved „Äê8-outlet Surge Protector 2700J„ÄëThe 3 level complementary Surge Protector Circuit which composed of TVS (transient voltage suppressor),MOV (metal oxide varistor), GDT (gas discharge tube), with minimum 2700 Joules energy absorbing capacity, could protect your devices much more quickly and reliably than other brand‚Äôs 1 level MOV Surge Protection Circuits. „ÄêMultiple protection„Äë2700J surge protector, USB over charging protection, grounded protection, overload current protection, short circuit protection, high temperature protection, combine to provide a superior safety. „ÄêUSB-A 24W FAST CHARGING„Äë The powerful 24-Watt charger port brings USB A Power Delivery up to 3x faster than original 5W chargers. Charges the USB device to 50% in arou