In [41]:
import pandas as pd
import openai

from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance

### Read sampled dataset - Amazon inventory data

In [20]:
df_items = pd.read_json('../../data/meta_Electronics_2022_2023_with_category_ratings_100_sample_1000.jsonl', lines=True)

In [21]:
# Display the first five rows of the dataframe to provide a quick overview of the data structure and some sample entries.
df_items.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,Industrial & Scientific,"RAVODOI USB C Cable, [2Pack/3.3ft+6.6ft] USB T...",4.4,119,[„ÄêFast Charging Cord„ÄëThese USB C cables provid...,[],,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Type-C Charger Cable ', 'url': 'ht...",RAVODOI,"[Electronics, Computers & Accessories, Compute...","{'Brand': 'RAVODOI', 'Connector Type': 'USB Ty...",B09R4Y2HKY,,,
1,All Electronics,"SNESH-2 Pack USB-C Female to USB Male Adapter,...",4.5,352,[üîπ(Light & compact) Easy to carry and light we...,[],4.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'USB Male & Female Adapter', 'url':...",SNESH,"[Electronics, Computers & Accessories, Compute...",{'Package Dimensions': '3.54 x 2.4 x 0.35 inch...,B09JV5FM2S,,,
2,All Electronics,USB C Docking Station Dual Monitor for MacBook...,3.9,1193,[„Äê18-in-1Docking Station„ÄëWith USB C Docking St...,[],,[{'thumb': 'https://m.media-amazon.com/images/...,[],ZMUIPNG,"[Electronics, Computers & Accessories, Laptop ...","{'Product Dimensions': '3.94""L x 1.18""W x 3.94...",B09SFN9NRX,,,
3,Camera & Photo,[2023 Upgraded] Telescopes for Adults Astronom...,4.1,219,[üéÅ„Äê2023 All New Experience„ÄëThe newly upgraded ...,[],169.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Good picture quality', 'url': 'htt...",HUTACT,"[Electronics, Camera & Photo, Binoculars & Sco...","{'Product Dimensions': '32.5""D x 5.5""W x 9.7""H...",B09TP3SZ7C,,,
4,AMAZON FASHION,"Laptop Bag 15.6 Inch, Laptop Briefcase Messeng...",4.5,222,"[Leather,Mesh, Imported, Multi-pockets and Lar...",[],24.95,[{'thumb': 'https://m.media-amazon.com/images/...,[],KPIQIU,"[Electronics, Computers & Accessories, Laptop ...",{'Product Dimensions': '16 x 2 x 12 inches; 1....,B0B5H7T7XZ,,,


In [22]:
# This line takes the "features" column of the dataframe (which is likely a pandas Series),
# calls .items() to get iterable of (index, value) pairs, converts that to a list,
# and returns the first (index, value) pair. This is essentially showing the first entry
# in the "features" column along with its row index.
list(df_items["features"].items())[0]

(0,
 ['„ÄêFast Charging Cord„ÄëThese USB C cables provide up to a 3A charging current to greatly shorten the charging time, meets QC2.0 /3.0 fast charging protocol,Incredibly charge your phone from 0 to 80% in 50 minute. 480Mbps (40-60M/s) ultra fast data transmission, which leads to a faster data sync.(Note:Cables support fast charging,but require a USB-A QC3.0/QC2.0/AFC charger)',
  '„ÄêUniversal Compatibility„ÄëThe USB C Charger Cable is compatible with Samsung Galaxy S20 / S10 / S9 / S8+ / S8 / A02s / A03s,A12 A20 A21 A22 A23 A31 A32 A33 A41 A42 A50 A52 A52s 5G A71 A72 A73,M11 M21 M31 M51,M12 M22 M32 M52,iPad Pro 2018 / 2020,Sony Xperia XZ/X Compact/L1 / XZs / XA1 / X Premium, HTC 10 LG G5 G6,OnePlus 5T / 6T, Lumia 950 / 950XL,Huawei P9 P9 Plus P10 P10 Plus Honor Mate 9 Mate 9 pro Mate 10 pro Mate 10 lite and more',
  '„ÄêPremium Workmanship„ÄëUnique increased friction design allows you to easily unplug the cable from your charger,combine 250d bulletproof fiber core to build a cabl

### Preprocess title and features

In [23]:
def preprocess_description(row):
    """
    Preprocess the description of an item - join title and features.
    """
    return f"{row['title']} {' '.join(row['features'])}"



In [24]:
list(df_items["images"].items())[0]

(0,
 [{'thumb': 'https://m.media-amazon.com/images/I/51G07yWoOBL._SX38_SY50_CR,0,0,38,50_.jpg',
   'large': 'https://m.media-amazon.com/images/I/51G07yWoOBL.jpg',
   'variant': 'MAIN',
   'hi_res': 'https://m.media-amazon.com/images/I/611AVJMH+JL._SL1200_.jpg'},
  {'thumb': 'https://m.media-amazon.com/images/I/41c+40oKkKL._SX38_SY50_CR,0,0,38,50_.jpg',
   'large': 'https://m.media-amazon.com/images/I/41c+40oKkKL.jpg',
   'variant': 'PT01',
   'hi_res': 'https://m.media-amazon.com/images/I/61ihhPW7VCL._SL1200_.jpg'},
  {'thumb': 'https://m.media-amazon.com/images/I/51y1YnwiUZL._SX38_SY50_CR,0,0,38,50_.jpg',
   'large': 'https://m.media-amazon.com/images/I/51y1YnwiUZL.jpg',
   'variant': 'PT02',
   'hi_res': 'https://m.media-amazon.com/images/I/61UkcVETKcL._SL1200_.jpg'},
  {'thumb': 'https://m.media-amazon.com/images/I/41Nvr++q39L._SX38_SY50_CR,0,0,38,50_.jpg',
   'large': 'https://m.media-amazon.com/images/I/41Nvr++q39L.jpg',
   'variant': 'PT03',
   'hi_res': 'https://m.media-amazon.c

In [25]:
def extract_main_image(row):
    """
    Extract the main image from the images list.
    """
    return row["images"][0].get("large", "")


In [26]:
df_items["description"] = df_items.apply(preprocess_description, axis=1)
df_items["image"] = df_items.apply(extract_main_image, axis=1)

In [27]:
df_items.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author,image
0,Industrial & Scientific,"RAVODOI USB C Cable, [2Pack/3.3ft+6.6ft] USB T...",4.4,119,[„ÄêFast Charging Cord„ÄëThese USB C cables provid...,"RAVODOI USB C Cable, [2Pack/3.3ft+6.6ft] USB T...",,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Type-C Charger Cable ', 'url': 'ht...",RAVODOI,"[Electronics, Computers & Accessories, Compute...","{'Brand': 'RAVODOI', 'Connector Type': 'USB Ty...",B09R4Y2HKY,,,,https://m.media-amazon.com/images/I/51G07yWoOB...
1,All Electronics,"SNESH-2 Pack USB-C Female to USB Male Adapter,...",4.5,352,[üîπ(Light & compact) Easy to carry and light we...,"SNESH-2 Pack USB-C Female to USB Male Adapter,...",4.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'USB Male & Female Adapter', 'url':...",SNESH,"[Electronics, Computers & Accessories, Compute...",{'Package Dimensions': '3.54 x 2.4 x 0.35 inch...,B09JV5FM2S,,,,https://m.media-amazon.com/images/I/41bOA5-ogW...
2,All Electronics,USB C Docking Station Dual Monitor for MacBook...,3.9,1193,[„Äê18-in-1Docking Station„ÄëWith USB C Docking St...,USB C Docking Station Dual Monitor for MacBook...,,[{'thumb': 'https://m.media-amazon.com/images/...,[],ZMUIPNG,"[Electronics, Computers & Accessories, Laptop ...","{'Product Dimensions': '3.94""L x 1.18""W x 3.94...",B09SFN9NRX,,,,https://m.media-amazon.com/images/I/416IzmVKiC...
3,Camera & Photo,[2023 Upgraded] Telescopes for Adults Astronom...,4.1,219,[üéÅ„Äê2023 All New Experience„ÄëThe newly upgraded ...,[2023 Upgraded] Telescopes for Adults Astronom...,169.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Good picture quality', 'url': 'htt...",HUTACT,"[Electronics, Camera & Photo, Binoculars & Sco...","{'Product Dimensions': '32.5""D x 5.5""W x 9.7""H...",B09TP3SZ7C,,,,https://m.media-amazon.com/images/I/41wO4J3TT0...
4,AMAZON FASHION,"Laptop Bag 15.6 Inch, Laptop Briefcase Messeng...",4.5,222,"[Leather,Mesh, Imported, Multi-pockets and Lar...","Laptop Bag 15.6 Inch, Laptop Briefcase Messeng...",24.95,[{'thumb': 'https://m.media-amazon.com/images/...,[],KPIQIU,"[Electronics, Computers & Accessories, Laptop ...",{'Product Dimensions': '16 x 2 x 12 inches; 1....,B0B5H7T7XZ,,,,https://m.media-amazon.com/images/I/41mwlYqT5p...


In [28]:
list(df_items["description"].items())[0]


(0,
 "RAVODOI USB C Cable, [2Pack/3.3ft+6.6ft] USB Type C Fast Charging Cord - Nylon Braided USB C Charger Cable for Galaxy A20/A50/S10/S9/S8+/S8, iPad Pro 2018, Sony XZ, HTC 10, OnePlus 5T, Huawei P9 etc. „ÄêFast Charging Cord„ÄëThese USB C cables provide up to a 3A charging current to greatly shorten the charging time, meets QC2.0 /3.0 fast charging protocol,Incredibly charge your phone from 0 to 80% in 50 minute. 480Mbps (40-60M/s) ultra fast data transmission, which leads to a faster data sync.(Note:Cables support fast charging,but require a USB-A QC3.0/QC2.0/AFC charger) „ÄêUniversal Compatibility„ÄëThe USB C Charger Cable is compatible with Samsung Galaxy S20 / S10 / S9 / S8+ / S8 / A02s / A03s,A12 A20 A21 A22 A23 A31 A32 A33 A41 A42 A50 A52 A52s 5G A71 A72 A73,M11 M21 M31 M51,M12 M22 M32 M52,iPad Pro 2018 / 2020,Sony Xperia XZ/X Compact/L1 / XZs / XA1 / X Premium, HTC 10 LG G5 G6,OnePlus 5T / 6T, Lumia 950 / 950XL,Huawei P9 P9 Plus P10 P10 Plus Honor Mate 9 Mate 9 pro Mate 10 pr

In [29]:
list(df_items["image"].items())[0]

(0, 'https://m.media-amazon.com/images/I/51G07yWoOBL.jpg')

## Sample 50 items from dataset

In [30]:
df_sample = df_items.sample(50, random_state=42)

In [31]:
df_sample.head()


Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author,image
521,All Electronics,"KEEPRO Pencil 2nd Generation for iPad, Magneti...",4.3,2131,[[Compatibility]- ONLY compatible with iPad mi...,"KEEPRO Pencil 2nd Generation for iPad, Magneti...",22.97,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Works great', 'url': 'https://www....",KEEPRO,"[Electronics, Computers & Accessories, Tablet ...",{'Product Dimensions': '6.5 x 0.35 x 0.35 inch...,B0BF18F6R7,,,,https://m.media-amazon.com/images/I/21XciKpEmp...
737,Industrial & Scientific,HOSONGIN 1/4 TRS Stereo Jack to Dual 1/4 TS Mo...,4.6,114,[EASILY CONNECT DEVICES: HOSONGIN 1/4 inch TRS...,HOSONGIN 1/4 TRS Stereo Jack to Dual 1/4 TS Mo...,14.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'YPP-136 TRS to TS 1/4"" Stereo Brea...",HOSONGIN,"[Electronics, Home Audio, Home Audio Accessori...","{'Brand': 'HOSONGIN', 'Connector Type': 'Auxil...",B0B96LV4C5,,,,https://m.media-amazon.com/images/I/41J3p7fyJH...
740,Computers,"Tenda A33 AX3000 WiFi 6 Extender, WiFi Booster...",4.4,560,[Improved WiFi Coverage - With 2 * 5dbi dual-b...,"Tenda A33 AX3000 WiFi 6 Extender, WiFi Booster...",84.99,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Tenda A33 - AX3000 WiFi 6 Extender...,Tenda,"[Electronics, Computers & Accessories, Network...",{'Package Dimensions': '7.64 x 4.65 x 3.58 inc...,B0BZ5R7CVP,,,,https://m.media-amazon.com/images/I/31dq3Sl6On...
660,All Electronics,"Bluetooth Transmitter Receiver, 5.0 Bluetooth ...",3.9,220,[„Äê2-In-1 Bluetooth Transceiver„Äë In the transmi...,"Bluetooth Transmitter Receiver, 5.0 Bluetooth ...",,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Reference video for the use of lar...,Fossduck,"[Electronics, Home Audio, Wireless & Streaming...",{'Package Dimensions': '3.66 x 3.58 x 2.13 inc...,B09PFSVK44,,,,https://m.media-amazon.com/images/I/41ih5c0-Xk...
411,Industrial & Scientific,"iPhone Charger Cord Lightning Cables, Original...",4.5,4709,[Original Apple Cable - iPhone charging cable ...,"iPhone Charger Cord Lightning Cables, Original...",12.59,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Original Apple Cable', 'url': 'htt...",Cablex,"[Electronics, Computers & Accessories, Compute...","{'Brand': 'HFHSCN', 'Connector Type': 'Lightni...",B0BYYLJRHT,,,,https://m.media-amazon.com/images/I/41lk7yDhf3...


In [32]:
data_to_embed = df_sample[["description", "image", "rating_number", "price", "average_rating", "parent_asin"]].to_dict(orient="records")

In [33]:
data_to_embed

[{'description': 'KEEPRO Pencil 2nd Generation for iPad, Magnetic Wireless Charge Tilt Sensitivity Palm Rejection Active Pen for Apple iPad Pro 11" 4/3/2/1, iPad Pro 12.9" 6/5/4/3, iPad Air 4/5, iPad Mini 6 [Compatibility]- ONLY compatible with iPad mini (6th generation), iPad Air (4th and 5th generation), iPad Pro 12.9-inch (3rd, 4th, 5th and 6th generation), iPad Pro 11-inch (1st, 2nd, 3rd and 4th generation), check and confirm your device before place the order (Note: If the pen doesn\'t charge, fully charge your iPad first then try charging the pen again) [Charging and Pairs Magnetically]- Charges wirelessly, attaches and pairs magnetically to the compatible iPad, this pen is a preferable alternative to the Apple Pencil 2nd Generation [Tilt Sensitivity & Pixel Precision]- Pixel-perfect precision and industry-leading low latency with tilt sensitivity making drawing, sketching, coloring, taking notes, and marking up PDFs, as easy and natural as a real pencil [Native Palm Rejection]- 

### using open AI text embedding small model - define embeddings function

In [38]:
response = openai.embeddings.create(
    model="text-embedding-3-small",
    input="random text"
)
len(response.data[0].embedding)



1536

In [39]:
def get_embeddings(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        model=model,
        input=text
    )
    return response.data[0].embedding


### create qdrant collection

In [45]:
qdrant_client = QdrantClient(url="http://localhost:6333")


In [None]:
qdrant_client.create_collection(
    collection_name="amazon_items-collection-00",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

qdrant_client.get_collection("amazon_items-collection-00")


CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=None, indexed_vectors_count=0, points_count=0, segments_count=5, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1536, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=10000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quan

### embed data

#### testing point struct before using with actual database

In [47]:
pointstruct = PointStruct(
    id=0,
    vector=get_embeddings("some description"),
    payload={
        "text": "some description",
        "model": "text-embedding-3-small"
    }
)



In [48]:
pointstruct

PointStruct(id=0, vector=[-0.02134859375655651, 0.017555031925439835, -0.05452263355255127, -0.011773121543228626, -0.020441627129912376, -0.04587157070636749, -0.0034360073041170835, 0.036069355905056, 0.020214885473251343, -0.008563855662941933, -0.003891670610755682, -0.04866223409771919, -0.019918376579880714, -0.046394817531108856, 0.0402553528547287, 0.022848576307296753, 0.003429466625675559, 0.037918172776699066, 0.002424390986561775, 0.06488297879695892, 0.04241811856627464, 0.003139499109238386, 0.0180172361433506, 0.06341788172721863, -0.015209128148853779, -0.03854607045650482, -0.02745317481458187, -0.0025464824866503477, 0.08078977465629578, -0.006078418809920549, 0.03383682295680046, -0.05717376619577408, 0.04615063592791557, -0.05919699743390083, -0.000171555089764297, -0.01817421056330204, 0.06460390985012054, -0.01581086590886116, 0.035301923751831055, 0.001763787935487926, -0.03634842112660408, -0.047371551394462585, 0.015278895385563374, 0.005877839867025614, 0.0263

#### actual amazon database - with pointstruct


In [49]:
pointstructs = []
for i, row in enumerate(data_to_embed):
    embedding = get_embeddings(row["description"])
    pointstructs.append(PointStruct(
        id=i,
        vector=embedding,
        payload=row
    ))

In [50]:
pointstructs

[PointStruct(id=0, vector=[0.010930042713880539, -0.015056160278618336, 0.01137808058410883, -0.011284304782748222, -0.0503886416554451, -0.0004936232580803335, -0.03859378397464752, 0.04171963036060333, 0.002547564683482051, -0.010346551425755024, -0.0291745662689209, 0.014462249353528023, -0.03096671774983406, 0.08677349239587784, 0.01424343977123499, 0.004819013178348541, -0.03596807271242142, 0.009127471596002579, -0.018807174637913704, 0.016889989376068115, 0.01028924435377121, 0.0174943208694458, 0.033800818026065826, 0.03059161640703678, 0.021776730194687843, 0.02110988274216652, -0.03313397243618965, -0.026548855006694794, -0.023027068004012108, 0.05222247168421745, -0.013222330249845982, -0.020338840782642365, -0.01941150613129139, -0.049179982393980026, -0.04113613814115524, -0.019265633076429367, -0.017202574759721756, 0.01579594425857067, -0.018254943192005157, 0.0005453950725495815, 0.024214889854192734, 0.04019838199019432, 0.00795527920126915, 0.0011038144584745169, -0.0

### publish this data to qdrant

In [51]:
qdrant_client.upsert(
    collection_name="amazon_items-collection-00",
    wait=True,
    points=pointstructs
)



UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

### Data retrival function

In [61]:
def retrieve_data(query, k=5):
    """
    Retrieve k most similar items to the query from Qdrant collection.
    """
    response = qdrant_client.query_points(
        collection_name="amazon_items-collection-00",
        query=get_embeddings(query),
        limit=k
    )
    return response

#### Test retrival


In [67]:
retrieve_data("what various tablets are available", k=10).points

[ScoredPoint(id=16, version=0, score=0.25013784, payload={'description': 'ESR for iPad Air 5th Generation Case 2022 / iPad Air 4th Case 2020, Rugged Protection, Detachable Magnetic Cover, Adjustable Portrait/Landscape Stand with Raised Screen View, 8 Stand Angles, Black Compatibility: only compatible with iPad Air 5th Generation 2022 and iPad Air 4th Generation 2020; not compatible with any other iPad models Detachable Magnetic Cover: keep your screen covered, fold it up into a stand for viewing/writing/portrait modes, or remove it for better ergonomics while gaming Rugged Protection: rugged shock-resistant polymer provides robust protection against scratches and drops Stable and Adjustable Stand: magnetically locks to 8 different stand positions that stay stable in bed, on the couch, or anywhere else Stand Your Way: raised screen view, portrait mode, and magnetic fridge mounting let you comfortably view and use your iPad in the office, kitchen, and beyond Complete Customer Support: de