# ArbitrageAI Demo

This notebook demonstrates the ArbitrageAI pipeline:
1. Load product data
2. Generate embeddings using sentence-transformers
3. Upload to Qdrant vector database
4. Perform semantic search

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
from vectorize import get_embedding, get_embeddings
from qdrant_utils import get_client, create_collection, upload_batch

print('âœ… Imports successful')

## 1. Load Data

In [None]:
# Load sample data
clothing_df = pd.read_parquet('../data/clothing.parquet')
watches_df = pd.read_csv('../data/watches.csv')

print('Clothing data:')
print(clothing_df)
print('\nWatches data:')
print(watches_df)

## 2. Generate Embeddings

In [None]:
# Generate embeddings for clothing items
clothing_texts = (clothing_df['title'] + ' ' + clothing_df['subtitle']).tolist()
clothing_embeddings = get_embeddings(clothing_texts)

print(f'Generated {len(clothing_embeddings)} embeddings')
print(f'Embedding dimension: {len(clothing_embeddings[0])}')

## 3. Search Using Vector Similarity

In [None]:
# Connect to Qdrant
client = get_client()

# Search for Nike
query = "Nike running shoes"
query_embedding = get_embedding(query)

# Try to search
try:
    results = client.search(
        collection_name='clothing',
        query_vector=query_embedding,
        limit=3
    )
    
    print(f'ðŸ”Ž Search results for: "{query}"\n')
    for i, result in enumerate(results, 1):
        print(f'{i}. {result.payload}')
except:
    print('Collection not found. Run main.py first to upload data.')

## 4. Summary

- âœ… Data loaded and embedded
- âœ… Connected to Qdrant
- âœ… Performed semantic search
- ðŸš€ Ready for production!