In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct

import openai
import pandas as pd

#### Initiate Qdrant client

In [None]:
qdrant_client = QdrantClient(url="http://localhost:6333")

qdrant_client.create_collection(
    collection_name="Amazon-items-collection-01",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

#### Read the sampled dataset with Amazon inventory metadata

In [None]:
df_items = pd.read_json("../data/meta_Electronics_2022_2023_with_category_ratings_100_sample_1000.jsonl", lines=True)

#### Concatenate title and features

In [None]:
def preprocess_data(row):
    return f"{row['title']} {' '.join(row['features'])}"

In [None]:
df_items["preprocessed_data"] = df_items.apply(preprocess_data, axis=1)

#### Sample 50 items from the dataset

In [None]:
df_sample = df_items.sample(n=50, random_state=42)

#### Define the embeddings function

In [None]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=[text],
        model=model,
    )
    return response.data[0].embedding

#### Embed data

In [None]:
data_to_embed = df_sample["preprocessed_data"].tolist()
pointstructs = []
for i, data in enumerate(data_to_embed):
    embedding = get_embedding(data)
    pointstructs.append(
        PointStruct(
            id=i,
            vector=embedding,
            payload={"text": data},
        )
    )

#### Write embedded data to Qdrant

In [None]:
qdrant_client.upsert(
    collection_name="Amazon-items-collection-00",
    wait=True,
    points=pointstructs,
)

#### Define a function for data retrieval

In [None]:
def retrieve_data(query):
    query_embedding = get_embedding(query)
    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=10,
    )
    return results

#### Test data retrieval

In [None]:
retrieve_data("What airphones can I get?").points