In [1]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct

import pandas as pd
import openai

In [2]:
# Load the dataset we developed in the previous notebook
df_items = pd.read_json(
    "../../data/meta_Electronics_2022_2023_with_category_ratings_100_sample_1000.jsonl",
    lines=True,
)

In [3]:
# Display the dataset raw data
df_items.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,Industrial & Scientific,"RAVODOI USB C Cable, [2Pack/3.3ft+6.6ft] USB T...",4.4,119,[„ÄêFast Charging Cord„ÄëThese USB C cables provid...,[],,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Type-C Charger Cable ', 'url': 'ht...",RAVODOI,"[Electronics, Computers & Accessories, Compute...","{'Brand': 'RAVODOI', 'Connector Type': 'USB Ty...",B09R4Y2HKY,,,
1,All Electronics,"SNESH-2 Pack USB-C Female to USB Male Adapter,...",4.5,352,[üîπ(Light & compact) Easy to carry and light we...,[],4.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'USB Male & Female Adapter', 'url':...",SNESH,"[Electronics, Computers & Accessories, Compute...",{'Package Dimensions': '3.54 x 2.4 x 0.35 inch...,B09JV5FM2S,,,
2,All Electronics,USB C Docking Station Dual Monitor for MacBook...,3.9,1193,[„Äê18-in-1Docking Station„ÄëWith USB C Docking St...,[],,[{'thumb': 'https://m.media-amazon.com/images/...,[],ZMUIPNG,"[Electronics, Computers & Accessories, Laptop ...","{'Product Dimensions': '3.94""L x 1.18""W x 3.94...",B09SFN9NRX,,,
3,Camera & Photo,[2023 Upgraded] Telescopes for Adults Astronom...,4.1,219,[üéÅ„Äê2023 All New Experience„ÄëThe newly upgraded ...,[],169.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Good picture quality', 'url': 'htt...",HUTACT,"[Electronics, Camera & Photo, Binoculars & Sco...","{'Product Dimensions': '32.5""D x 5.5""W x 9.7""H...",B09TP3SZ7C,,,
4,AMAZON FASHION,"Laptop Bag 15.6 Inch, Laptop Briefcase Messeng...",4.5,222,"[Leather,Mesh, Imported, Multi-pockets and Lar...",[],24.95,[{'thumb': 'https://m.media-amazon.com/images/...,[],KPIQIU,"[Electronics, Computers & Accessories, Laptop ...",{'Product Dimensions': '16 x 2 x 12 inches; 1....,B0B5H7T7XZ,,,


In [4]:
# Let's make some preprocessing to prepare this dataset for the embedding into the vector database
# we will add into the description field the concatenation of title + features and we will place
# into the image field the first image URL available

def preprocess_description(row):
    return f"{row['title']} {' '.join(row['features'])}"


def extract_first_large_image(row):
    return row["images"][0].get("large", "")

df_items["description"] = df_items.apply(preprocess_description, axis=1)
df_items["image"] = df_items.apply(extract_first_large_image, axis=1)

In [5]:
# Display the preprocessed dataset
df_items.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author,image
0,Industrial & Scientific,"RAVODOI USB C Cable, [2Pack/3.3ft+6.6ft] USB T...",4.4,119,[„ÄêFast Charging Cord„ÄëThese USB C cables provid...,"RAVODOI USB C Cable, [2Pack/3.3ft+6.6ft] USB T...",,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Type-C Charger Cable ', 'url': 'ht...",RAVODOI,"[Electronics, Computers & Accessories, Compute...","{'Brand': 'RAVODOI', 'Connector Type': 'USB Ty...",B09R4Y2HKY,,,,https://m.media-amazon.com/images/I/51G07yWoOB...
1,All Electronics,"SNESH-2 Pack USB-C Female to USB Male Adapter,...",4.5,352,[üîπ(Light & compact) Easy to carry and light we...,"SNESH-2 Pack USB-C Female to USB Male Adapter,...",4.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'USB Male & Female Adapter', 'url':...",SNESH,"[Electronics, Computers & Accessories, Compute...",{'Package Dimensions': '3.54 x 2.4 x 0.35 inch...,B09JV5FM2S,,,,https://m.media-amazon.com/images/I/41bOA5-ogW...
2,All Electronics,USB C Docking Station Dual Monitor for MacBook...,3.9,1193,[„Äê18-in-1Docking Station„ÄëWith USB C Docking St...,USB C Docking Station Dual Monitor for MacBook...,,[{'thumb': 'https://m.media-amazon.com/images/...,[],ZMUIPNG,"[Electronics, Computers & Accessories, Laptop ...","{'Product Dimensions': '3.94""L x 1.18""W x 3.94...",B09SFN9NRX,,,,https://m.media-amazon.com/images/I/416IzmVKiC...
3,Camera & Photo,[2023 Upgraded] Telescopes for Adults Astronom...,4.1,219,[üéÅ„Äê2023 All New Experience„ÄëThe newly upgraded ...,[2023 Upgraded] Telescopes for Adults Astronom...,169.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Good picture quality', 'url': 'htt...",HUTACT,"[Electronics, Camera & Photo, Binoculars & Sco...","{'Product Dimensions': '32.5""D x 5.5""W x 9.7""H...",B09TP3SZ7C,,,,https://m.media-amazon.com/images/I/41wO4J3TT0...
4,AMAZON FASHION,"Laptop Bag 15.6 Inch, Laptop Briefcase Messeng...",4.5,222,"[Leather,Mesh, Imported, Multi-pockets and Lar...","Laptop Bag 15.6 Inch, Laptop Briefcase Messeng...",24.95,[{'thumb': 'https://m.media-amazon.com/images/...,[],KPIQIU,"[Electronics, Computers & Accessories, Laptop ...",{'Product Dimensions': '16 x 2 x 12 inches; 1....,B0B5H7T7XZ,,,,https://m.media-amazon.com/images/I/41mwlYqT5p...


In [None]:
# Show example of preprocessed description
list(df_items["description"].items())[0]

(0,
 "RAVODOI USB C Cable, [2Pack/3.3ft+6.6ft] USB Type C Fast Charging Cord - Nylon Braided USB C Charger Cable for Galaxy A20/A50/S10/S9/S8+/S8, iPad Pro 2018, Sony XZ, HTC 10, OnePlus 5T, Huawei P9 etc. „ÄêFast Charging Cord„ÄëThese USB C cables provide up to a 3A charging current to greatly shorten the charging time, meets QC2.0 /3.0 fast charging protocol,Incredibly charge your phone from 0 to 80% in 50 minute. 480Mbps (40-60M/s) ultra fast data transmission, which leads to a faster data sync.(Note:Cables support fast charging,but require a USB-A QC3.0/QC2.0/AFC charger) „ÄêUniversal Compatibility„ÄëThe USB C Charger Cable is compatible with Samsung Galaxy S20 / S10 / S9 / S8+ / S8 / A02s / A03s,A12 A20 A21 A22 A23 A31 A32 A33 A41 A42 A50 A52 A52s 5G A71 A72 A73,M11 M21 M31 M51,M12 M22 M32 M52,iPad Pro 2018 / 2020,Sony Xperia XZ/X Compact/L1 / XZs / XA1 / X Premium, HTC 10 LG G5 G6,OnePlus 5T / 6T, Lumia 950 / 950XL,Huawei P9 P9 Plus P10 P10 Plus Honor Mate 9 Mate 9 pro Mate 10 pr

In [7]:
# Now sample just 50 items from the dataset for faster development processing
df_sample = df_items.sample(50, random_state=42)
# and define just a subset of the fields to be stored in the vector database
data_to_embed = df_sample[
    ["description", "image", "rating_number", "price", "average_rating", "parent_asin"]
].to_dict(orient="records")
# Show example of data to be embedded
data_to_embed

[{'description': 'KEEPRO Pencil 2nd Generation for iPad, Magnetic Wireless Charge Tilt Sensitivity Palm Rejection Active Pen for Apple iPad Pro 11" 4/3/2/1, iPad Pro 12.9" 6/5/4/3, iPad Air 4/5, iPad Mini 6 [Compatibility]- ONLY compatible with iPad mini (6th generation), iPad Air (4th and 5th generation), iPad Pro 12.9-inch (3rd, 4th, 5th and 6th generation), iPad Pro 11-inch (1st, 2nd, 3rd and 4th generation), check and confirm your device before place the order (Note: If the pen doesn\'t charge, fully charge your iPad first then try charging the pen again) [Charging and Pairs Magnetically]- Charges wirelessly, attaches and pairs magnetically to the compatible iPad, this pen is a preferable alternative to the Apple Pencil 2nd Generation [Tilt Sensitivity & Pixel Precision]- Pixel-perfect precision and industry-leading low latency with tilt sensitivity making drawing, sketching, coloring, taking notes, and marking up PDFs, as easy and natural as a real pencil [Native Palm Rejection]- 

In [8]:
# Let's define the embedding model (OpenAI) to use
model = "text-embedding-3-small"
# And the embedding function
def get_embedding(text, model=model):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )
    return response.data[0].embedding

In [9]:
# Before proceeding let's make a test to retrieve the size of the embedding produced 
# by the model we defined. This is important because when we create
# Qdrant client collection we need to specify the size of the vectors to be stored
test_embedding = get_embedding("This is a test embedding")
len(test_embedding)  # Should be 1536 for text-embedding-3-small

1536

In [10]:
# Create the Qdrant client (ensure first to have Qdrant server running locally via docker compose; 
# run the command `make run-docker-compose` in the root directory, then execute this cell)
qdrant_client = QdrantClient(url="http://localhost:6333")

In [11]:
# Define the Qdrant collection to store the data_to_embed items
collection_name = "Amazon-items-collection-00"

In [12]:
# Create the Qdrant collection to store the Amazon items
qdrant_client.create_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

True

In [13]:
# Embed th dataset data (data_to_embed) and store into Qdrant collection
pointstructs = []
for i, data in enumerate(data_to_embed):
    pointstructs.append(
        PointStruct(
            id=i,
            # we create a vector embedding for the description field
            vector=get_embedding(data["description"]),
            # we store all the data fields as payload metadata in Qdrant
            payload=data,
        )
    )

In [14]:
# Show the pointstructs to be uploaded
pointstructs

[PointStruct(id=0, vector=[0.010930042713880539, -0.015056160278618336, 0.01137808058410883, -0.011284304782748222, -0.0503886416554451, -0.0004936232580803335, -0.03859378397464752, 0.04171963036060333, 0.002547564683482051, -0.010346551425755024, -0.0291745662689209, 0.014462249353528023, -0.03096671774983406, 0.08677349239587784, 0.01424343977123499, 0.004819013178348541, -0.03596807271242142, 0.009127471596002579, -0.018807174637913704, 0.016889989376068115, 0.01028924435377121, 0.0174943208694458, 0.033800818026065826, 0.03059161640703678, 0.021776730194687843, 0.02110988274216652, -0.03313397243618965, -0.026548855006694794, -0.023027068004012108, 0.05222247168421745, -0.013222330249845982, -0.020338840782642365, -0.01941150613129139, -0.049179982393980026, -0.04113613814115524, -0.019265633076429367, -0.017202574759721756, 0.01579594425857067, -0.018254943192005157, 0.0005453950725495815, 0.024214889854192734, 0.04019838199019432, 0.00795527920126915, 0.0011038144584745169, -0.0

In [15]:
# Insert the points into Qdrant collection
qdrant_client.upsert(
    collection_name=collection_name,
    wait=True,
    points=pointstructs,
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

In [None]:
# Define the function for data retrieval from Qdrant based on a query text
# top K similar items will be retrieved
def retrieve_data(query, k=5):
    results = qdrant_client.query_points(
        collection_name=collection_name,
        # Notice that we are using the same embedding function to convert the query text into a vector
        query=get_embedding(query),
        limit=k,
    )
    return results

In [17]:
# Finally test the retrieval function
retrieve_data("What kind of charging cords do you offer?", k=10).points

[ScoredPoint(id=4, version=1, score=0.4795945, payload={'description': 'iPhone Charger Cord Lightning Cables, Original 2022 Upgraded [3Pack 3ft] Apple MFi Certified USB A Charging Cable for iPhone 13 12 11 Mini Pro XR Xs Max X SE 8 7 6 Plus iPad iPod AirPods - Black Original Apple Cable - iPhone charging cable with a reinforced joint design that has passed 30,000+ times bending tests for extra protection and durability. Insulation and precisely layer-welded connectors, which make the cable more durable and sturdier than normal iPhone charger cables but also flexible and tangle-free. Instant Responses - No error message pops up. Real high-speed iPhone charging cable with stable and efficient current output, you can plug it into laptop computers and in-car charging adapter. Super Fast Charge - The iPhone Lightning cable build-in four-core high-quality copper wires and multiple safety protections overcharge/stable current/automatic switching/battery protection to promote maximum signal qu