In [None]:
# Install required libraries
!pip install torch torchvision diffusers transformers accelerate
!pip install faker pandas

import os
import uuid
import random
import pandas as pd
from faker import Faker
import torch
from diffusers import StableDiffusionPipeline

fake = Faker()


In [None]:
# Prepare folder for images
os.makedirs("synthetic_images", exist_ok=True)

# Load Stable Diffusion pipeline
model_id = "runwayml/stable-diffusion-v1-5"
device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = StableDiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)

In [None]:
# Categories and product types
categories = {
    "Men's Clothing": ["T-shirt", "Jeans", "Hoodie", "Jacket", "Sneakers"],
    "Women's Clothing": ["Dress", "Blouse", "Skirt", "Handbag", "Heels"],
    "Jewelery": ["Necklace", "Bracelet", "Ring", "Earrings", "Watch"],
    "Electronics": ["Smartphone", "Laptop", "Headphones", "Camera", "Smartwatch"],
    "Home Appliances": ["Blender", "Microwave", "Vacuum Cleaner", "Air Fryer", "Coffee Maker"],
    "Sports & Outdoors": ["Yoga Mat", "Tent", "Bicycle", "Dumbbells", "Running Shoes"]
}

brands = {
    "Men's Clothing": ["Nike", "Adidas", "Puma", "Levi's", "H&M"],
    "Women's Clothing": ["Zara", "H&M", "Mango", "Uniqlo", "Forever21"],
    "Jewelery": ["Tiffany", "Cartier", "Swarovski", "Pandora", "Bvlgari"],
    "Electronics": ["Sony", "Samsung", "Apple", "LG", "Dell"],
    "Home Appliances": ["Philips", "Bosch", "LG", "Panasonic", "Samsung"],
    "Sports & Outdoors": ["Adidas", "Nike", "Decathlon", "Reebok", "Puma"]
}

materials = {
    "Men's Clothing": ["cotton", "wool", "denim", "polyester", "leather"],
    "Women's Clothing": ["cotton", "silk", "denim", "linen", "lace"],
    "Jewelery": ["gold", "silver", "platinum", "diamond", "rose gold"],
    "Electronics": ["metallic", "plastic", "aluminum", "glass", "carbon fiber"],
    "Home Appliances": ["stainless steel", "plastic", "ceramic", "aluminum", "glass"],
    "Sports & Outdoors": ["rubber", "foam", "nylon", "aluminum", "polyester"]
}

# Large range of adjectives for every category
adjectives = {
    "Men's Clothing": [
        "Comfortable", "Stylish", "Casual", "Durable", "Soft", "Classic", "Trendy", "Lightweight",
        "Versatile", "Warm", "Breathable", "Modern", "Fitted", "Premium", "Relaxed", "Urban"
    ],
    "Women's Clothing": [
        "Elegant", "Chic", "Stylish", "Flattering", "Lightweight", "Feminine", "Trendy", "Comfortable",
        "Modern", "Flowy", "Sophisticated", "Casual", "Colorful", "Premium", "Soft", "Versatile"
    ],
    "Jewelery": [
        "Elegant", "Luxury", "Shiny", "Exquisite", "Timeless", "Stylish", "Premium", "Classic",
        "Chic", "Sophisticated", "Delicate", "Sparkling", "Refined", "Polished", "High-quality"
    ],
    "Electronics": [
        "High-performance", "Reliable", "Advanced", "Sleek", "Powerful", "Innovative", "Compact",
        "Cutting-edge", "Durable", "Portable", "Smart", "Efficient", "Premium", "Robust", "Professional"
    ],
    "Home Appliances": [
        "Durable", "Efficient", "Powerful", "Compact", "Reliable", "Modern", "User-friendly",
        "High-quality", "Versatile", "Innovative", "Sleek", "Practical", "Premium", "Energy-saving"
    ],
    "Sports & Outdoors": [
        "Durable", "Lightweight", "Reliable", "Comfortable", "Premium", "High-performance",
        "Versatile", "Portable", "Innovative", "Ergonomic", "Robust", "Flexible", "Sturdy", "Safe"
    ]
}

payment_methods = ["cash", "card"]


In [None]:
# Function to generate AI image
def generate_synthetic_image(title, category, save_path):
    prompt = (
        f"high quality product photo of {title}, {category.lower()}, "
        "studio lighting, isolated on white background, e-commerce style, ultra realistic"
    )
    image = pipe(prompt, num_inference_steps=25, guidance_scale=7.5).images[0]
    image.save(save_path)
    return save_path


In [None]:
# Function to create meaningful title and description with varied adjectives
def create_product_info(category):
    product_type = random.choice(categories[category])
    brand = random.choice(brands[category])
    material = random.choice(materials[category])
    adj = random.choice(adjectives[category])

    # Construct title
    if category in ["Men's Clothing", "Women's Clothing"]:
        title = f"{brand} {product_type} in {material}"
        description = f"{adj} {material} {product_type.lower()} designed for everyday wear and style."
    elif category == "Jewelery":
        title = f"{material.capitalize()} {product_type} by {brand}"
        description = f"{adj} {material} {product_type.lower()} crafted by {brand} for timeless elegance."
    elif category == "Electronics":
        title = f"{brand} {product_type} with {material} finish"
        description = f"{adj} {product_type.lower()} from {brand}, featuring premium {material} build quality."
    elif category == "Home Appliances":
        title = f"{brand} {product_type} with {material} design"
        description = f"{adj} {product_type.lower()} by {brand} with {material} construction for long-lasting use."
    else:  # Sports & Outdoors
        title = f"{brand} {product_type} for sports and outdoor activities"
        description = f"{adj} {product_type.lower()} by {brand}, perfect for training, outdoor adventures, and fitness."

    return title, description


In [None]:
# Generate products
products = []

for i in range(200):
    category = random.choice(list(categories.keys()))
    title, description = create_product_info(category)

    image_filename = f"synthetic_images/{category.replace(' ', '_').lower()}_{i+1}.png"

    # Generate image for this product
    print(f"Generating image for product {i+1}: {title}...")
    generate_synthetic_image(title, category, image_filename)

    product = {
        "id": str(uuid.uuid4()),
        "title": title,
        "price": round(random.uniform(5, 500), 2),
        "description": description,
        "category": category,
        "image": image_filename,
        "rating": {
            "rate": round(random.uniform(1, 5), 1),
            "count": random.randint(0, 1000)
        },
        "payment_methods": random.sample(payment_methods, k=random.randint(1, len(payment_methods))),
        "availability": random.choice([True, False])
    }
    products.append(product)


In [None]:
# Save to CSV
df = pd.DataFrame(products)
df.to_csv("synthetic_products.csv", index=False)
print("200 synthetic products with varied adjectives, accurate titles, descriptions, and AI images saved to 'synthetic_products.csv'")

In [None]:
import json

# Save products to JSON
with open("synthetic_products.json", "w", encoding="utf-8") as f:
    json.dump(products, f, ensure_ascii=False, indent=4)

print("Products also saved to 'synthetic_products.json'")
