# Amazon Bandit Experiment
Test contrastive vs reconstruction embeddings on real Amazon product data

In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
import torch
from tqdm import tqdm
import matplotlib.pyplot as plt

from src.embeddings import get_extractor
from src.models import LinearContextualBandit, SimpleNeuralBandit
from src.datasets import AmazonDataset

## 1. Load Amazon Data

In [None]:
# Download real Amazon Electronics data (streams, stops at n_items_per_category)
# Default: 3 categories x 3333 items = ~10K total items
dataset = AmazonDataset(
    categories=['Electronics'],  # Start with one category
    n_items_per_category=3000,
    cache_dir='../data/amazon'
)
print(f"Loaded {len(dataset)} items")

# Get text for embeddings
texts = dataset.get_item_texts()
print(f"Sample: {texts[0][:200]}...")

## 2. Compute Embeddings

In [None]:
# Contrastive (expected to work well)
simcse = get_extractor('simcse')
simcse_embs = simcse.encode(texts)
print(f"SimCSE embeddings: {simcse_embs.shape}")

# Reconstruction-based (expected to struggle)
bert = get_extractor('bert')
bert_embs = bert.encode(texts)
print(f"BERT embeddings: {bert_embs.shape}")

## 3. Compute Effective Dimension

In [None]:
from src.analysis.eigenvalues import compute_eigenvalues, compute_effective_dimension

bert_eigs = compute_eigenvalues(bert_embs)
simcse_eigs = compute_eigenvalues(simcse_embs)

bert_deff = compute_effective_dimension(bert_eigs)
simcse_deff = compute_effective_dimension(simcse_eigs)

print(f"BERT d_eff: {bert_deff:.1f}")
print(f"SimCSE d_eff: {simcse_deff:.1f}")

## 4. Run Bandit Experiment

In [None]:
def run_bandit_experiment(embeddings, dataset, n_rounds=5000, n_candidates=500, seed=42):
    """
    Run Thompson Sampling bandit with given embeddings.
    
    Key design choices for valid regret measurement:
    - User preference is a random embedding direction (tests embedding geometry)
    - Reward depends on embedding similarity (directly tests hypothesis)
    - Regret computed using EXPECTED rewards (not stochastic samples)
    - Oracle best = max expected reward over candidates
    """
    np.random.seed(seed)
    
    bandit = LinearContextualBandit(embedding_dim=embeddings.shape[1], algorithm='ts')
    
    rewards = []
    regrets = []
    cumulative_regret = 0
    
    for t in tqdm(range(n_rounds)):
        # Sample random user preference (in embedding space)
        user_pref = np.random.randn(embeddings.shape[1])
        user_pref = user_pref / np.linalg.norm(user_pref)
        
        # Get candidate items
        candidates = np.random.choice(len(dataset), n_candidates, replace=False)
        candidate_embs = embeddings[candidates]
        
        # Select action using bandit
        action_idx = bandit.select_action(candidate_embs)
        selected_item = candidates[action_idx]
        
        # Compute EXPECTED rewards for all candidates (for regret)
        expected_rewards = np.array([
            dataset.compute_reward_prob(c, user_pref, embeddings)
            for c in candidates
        ])
        
        # Oracle best action
        best_expected = np.max(expected_rewards)
        selected_expected = expected_rewards[action_idx]
        
        # Instantaneous regret (using expected values)
        instant_regret = best_expected - selected_expected
        cumulative_regret += instant_regret
        
        # Sample actual reward (for bandit update)
        reward = dataset.sample_reward(selected_item, user_pref, embeddings)
        
        # Update bandit
        bandit.update(candidate_embs[action_idx], reward)
        
        rewards.append(reward)
        regrets.append(instant_regret)
    
    return np.array(rewards), np.array(regrets)

In [None]:
# Run experiments
print("Running BERT bandit...")
bert_rewards, bert_regrets = run_bandit_experiment(bert_embs, dataset)

print("Running SimCSE bandit...")
simcse_rewards, simcse_regrets = run_bandit_experiment(simcse_embs, dataset)

## 5. Plot Results

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Cumulative regret
ax1 = axes[0]
ax1.plot(np.cumsum(bert_regrets), label=f'BERT (d_eff={bert_deff:.0f})', color='red')
ax1.plot(np.cumsum(simcse_regrets), label=f'SimCSE (d_eff={simcse_deff:.0f})', color='blue')
ax1.set_xlabel('Round')
ax1.set_ylabel('Cumulative Regret')
ax1.set_title('Cumulative Regret: Contrastive vs Reconstruction')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Rolling average reward
ax2 = axes[1]
window = 200
bert_rolling = np.convolve(bert_rewards, np.ones(window)/window, mode='valid')
simcse_rolling = np.convolve(simcse_rewards, np.ones(window)/window, mode='valid')
ax2.plot(bert_rolling, label='BERT', color='red', alpha=0.7)
ax2.plot(simcse_rolling, label='SimCSE', color='blue', alpha=0.7)
ax2.set_xlabel('Round')
ax2.set_ylabel('Average Reward (200-round window)')
ax2.set_title('Learning Curves')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../results/plots/amazon_bandit_results.png', dpi=150)
plt.show()

print(f"\nFinal cumulative regret:")
print(f"  BERT: {np.sum(bert_regrets):.1f}")
print(f"  SimCSE: {np.sum(simcse_regrets):.1f}")
print(f"  Reduction: {100*(np.sum(bert_regrets) - np.sum(simcse_regrets))/np.sum(bert_regrets):.1f}%")