In [1]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
import json
import os
from openai import OpenAI
import logging
import json
import numpy as np
from transformers import CLIPProcessor, CLIPModel

In [3]:
with open ('electronics.json', 'r') as file:
    data = json.load(file)

In [4]:
for phone in data['products']['phones'][:3]:
    print(phone)

{'brand': 'Apple', 'model': 'iPhone 11 Pro Max', 'color': 'Gray', 'storage': '128 GB', 'condition': 'New', 'price': 1233.33, 'city': 'Warsaw'}
{'brand': 'Samsung', 'model': 'Galaxy S21', 'color': 'Blue', 'storage': '64 GB', 'condition': 'Used', 'price': 740.0, 'city': 'Krakow'}
{'brand': 'Xiaomi', 'model': 'Mi 11', 'color': 'Black', 'storage': '256 GB', 'condition': 'New', 'price': 999.99, 'city': 'Wroclaw'}


In [10]:
api_token = os.getenv("MY_API_KEY")

In [11]:
openai_client = OpenAI(api_key=api_token)

In [12]:
def generate_description(category, product):
    prompt = f"""
Generate a concise, engaging product description in two sentences, highlighting distinct features and unique usage details.
Include relevant condition insights, such as battery health for USED phones OR years of use for other eletronic devices. If a phone condition is NEW then do not inculde any information about battery health.
Example below:
Iphone 11 Pro with 128 GB storage, meticulously cared for in a silicone case and glass protector, showing only minor screen wear. Battery health is at 80%, reflecting typical use over four years, and it comes with a charging cable and screen protector in the box.
Let's get started, the item below:
Product Type= {category}\n
Brand= {product['brand']}\n
Model= {product['model']}\n
Color= {product.get('color', 'N/A')}\n
Storage= {product.get('storage', 'N/A')}\n
Type= {product.get('type', 'N/A')}\n
Size= {product.get('size', 'N/A')}\n
Resolution= {product.get('resolution', 'N/A')}\n
Condition= {product['condition']}\n
Price= {product['price']}\n
City= {product['city']}\n
Description: """.format(
        category=category,
        brand=product['brand'],
        model=product['model'],
        color=product.get('color', ''),
        storage=product.get('storage', ''),
        type=product.get('type', ''),
        size=product.get('size', ''),
        resolution=product.get('resolution', ''),
        condition=product['condition'],
        price=product['price'],
        city=product['city']
    ).strip()
    for attempt in range(3): 
        try:
            response = openai_client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[{"role": "user", "content": prompt}],
                max_tokens=100,
                temperature=0.2
            )
            description = response.choices[0].message.content.strip()
            return description
        except Exception as e:
            logging.error(f"Error generating description for {product['brand']} {product['model']}: {e}")
            return "Description not available."

In [16]:
def add_description(devices):
    for category, products in devices['products'].items():
        for product in products:
            product['description'] = generate_description(category, product)
        print(f'{category} are done')

In [17]:
add_description(data)

phones are done
tv are done
coffee_machines are done
tablets are done


In [22]:
data['products']['phones'][0]

{'brand': 'Apple',
 'model': 'iPhone 11 Pro Max',
 'color': 'Gray',
 'storage': '128 GB',
 'condition': 'New',
 'price': 1233.33,
 'city': 'Warsaw',
 'description': 'Introducing the brand-new iPhone 11 Pro Max in sleek Gray, featuring a generous 128 GB of storage for all your apps, photos, and videos. Experience stunning performance and cutting-edge technology, all packaged in a pristine device that’s ready to elevate your mobile experience in Warsaw.',
 'id': 1}

In [23]:
def add_id(devices):
    id = 1
    for category, product in devices['products'].items():
        for item in product:
            item['id'] = id
            id += 1

In [24]:
add_id(data)

In [25]:
data['products']['phones'][0]

{'brand': 'Apple',
 'model': 'iPhone 11 Pro Max',
 'color': 'Gray',
 'storage': '128 GB',
 'condition': 'New',
 'price': 1233.33,
 'city': 'Warsaw',
 'description': 'Introducing the brand-new iPhone 11 Pro Max in sleek Gray, featuring a generous 128 GB of storage for all your apps, photos, and videos. Experience stunning performance and cutting-edge technology, all packaged in a pristine device that’s ready to elevate your mobile experience in Warsaw.',
 'id': 1}

In [26]:
with open ('updated_info.json', 'w') as file:
    json.dump(data, file, indent = 4)

In [27]:
with open ('updated_info.json', 'r') as items:
    electronic_products = json.load(items)

In [28]:
electronic_products['products']['phones'][1]

{'brand': 'Samsung',
 'model': 'Galaxy S21',
 'color': 'Blue',
 'storage': '64 GB',
 'condition': 'Used',
 'price': 740.0,
 'city': 'Krakow',
 'description': 'Samsung Galaxy S21 in a stunning blue finish, featuring 64 GB of storage and a sleek design that fits comfortably in your hand. This gently used device shows minimal signs of wear, with a battery health of 85%, ensuring reliable performance for your daily needs, and it comes with a charging cable for your convenience.',
 'id': 2}

In [29]:
# Device per category
for category, products in electronic_products['products'].items():
    for product in products:
        print(product)
        break

{'brand': 'Apple', 'model': 'iPhone 11 Pro Max', 'color': 'Gray', 'storage': '128 GB', 'condition': 'New', 'price': 1233.33, 'city': 'Warsaw', 'description': 'Introducing the brand-new iPhone 11 Pro Max in sleek Gray, featuring a generous 128 GB of storage for all your apps, photos, and videos. Experience stunning performance and cutting-edge technology, all packaged in a pristine device that’s ready to elevate your mobile experience in Warsaw.', 'id': 1}
{'brand': 'Samsung', 'model': 'QN90A', 'size': '65 inch', 'resolution': '4K', 'condition': 'New', 'price': 1399.99, 'city': 'Warsaw', 'description': 'Experience stunning visuals with the brand-new Samsung QN90A 65-inch 4K TV, designed to elevate your home entertainment with vibrant colors and exceptional clarity. With its sleek design and advanced Quantum HDR technology, this TV is perfect for movie nights or gaming sessions, ensuring every detail comes to life in your living room.', 'id': 46}
{'brand': "De'Longhi", 'model': 'Magnific

In [34]:
import torch
from transformers import CLIPModel, CLIPProcessor
from qdrant_client import QdrantClient
from qdrant_client.http import models as qdrant_models

In [31]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")

In [32]:
client = QdrantClient(url='http://localhost:6333') 

In [35]:
collection_name = 'electronic_devices'
if not client.collection_exists(collection_name):
    client.create_collection(
        collection_name,
        vectors_config=qdrant_models.VectorParams(size=512, distance='Cosine')  # Adjust size based on CLIP
    )

In [36]:
points = []
for category, item in electronic_products['products'].items():
    for product in item:
        combined_text = f"{category}{product['brand']}{product['model']}{product.get('color', '')}{product.get('storage', '')}{product.get('size', '')}{product.get('resolution', '')}{product['condition']}{product['price']}{product['city']}{product['description']}"
        inputs = processor(text=combined_text, return_tensors="pt", padding=True, truncation=True)
        with torch.no_grad():
            embeddings = model.get_text_features(**inputs)
            point = {
                'id': product['id'],
                'vector': embeddings.squeeze().tolist(),
                'payload': product 
            }
            points.append(point)

In [37]:
client.upsert(
    collection_name, 
    points)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [40]:
query = 'Please find a NEW Samsung phone for me in blue color with a storage size of at least 120GB'

inputs = processor(text=query, return_tensors="pt", padding=True, truncation=True)
with torch.no_grad():
    query_embedding = model.get_text_features(**inputs)

# Convert to list format for Qdrant
query_embedding = query_embedding.squeeze().tolist()

In [41]:
collection_name = 'electronic_devices'

# Perform the search
results = client.search(
    collection_name=collection_name,
    query_vector=query_embedding,
    limit=3
)

# Print the results
for result in results:
    print(result.score, result.payload)

0.7632795 {'brand': 'Samsung', 'model': 'Galaxy A52', 'color': 'Awesome Blue', 'storage': '128 GB', 'condition': 'New', 'price': 499.99, 'city': 'Torun', 'description': 'Introducing the Samsung Galaxy A52 in a stunning Awesome Blue, featuring 128 GB of storage for all your apps, photos, and videos. This brand-new device combines sleek design with powerful performance, making it the perfect companion for your daily adventures, all for just $499.99 in Torun.', 'id': 12}
0.6951234 {'brand': 'Apple', 'model': 'iPhone 12 Mini', 'color': 'Blue', 'storage': '128 GB', 'condition': 'New', 'price': 949.99, 'city': 'Swidnica', 'description': 'Introducing the brand-new iPhone 12 Mini in a stunning blue finish, featuring 128 GB of storage for all your apps, photos, and videos. With its compact design and powerful performance, this phone is perfect for on-the-go users who value both style and functionality, all at an unbeatable price of $949.99.', 'id': 40}
0.6595078 {'brand': 'Samsung', 'model': 'G