# Amazon Real User-Item Interactions Experiment

Tests BERT vs SimCSE on real user purchase data from Amazon Reviews 2023.
Uses linear kernel bandit where user embeddings come from purchase history.

In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

from src.datasets import AmazonReviewsDataset
from src.embeddings import get_extractor
from src.models import LinearKernelBandit, UserEmbeddingManager
from src.analysis.eigenvalues import compute_effective_dimension, compute_eigenvalue_spectrum

## 1. Load Dataset
Downloads Amazon Reviews 2023 (All_Beauty category) from HuggingFace.

In [None]:
dataset = AmazonReviewsDataset(
    category='All_Beauty',
    min_interactions_per_user=5,
    cache_dir='../data/amazon_reviews'
)

print(f"Users: {len(dataset.user_histories)}")
print(f"Items: {len(dataset.items_dict)}")
print(f"Test interactions: {len(dataset.test_interactions)}")

## 2. Compute Item Embeddings

In [None]:
item_texts = dataset.get_item_texts()
item_asins = dataset.get_item_asins()
texts = [item_texts[asin] for asin in item_asins]

print(f"Computing embeddings for {len(texts)} items...")
print(f"Sample text: {texts[0][:200]}...")

In [None]:
print("Computing BERT embeddings...")
bert_enc = get_extractor('bert')
bert_embs = bert_enc.encode(texts)
bert_item_embs = {asin: bert_embs[i] for i, asin in enumerate(item_asins)}
print(f"BERT: {bert_embs.shape}")

print("\nComputing SimCSE embeddings...")
simcse_enc = get_extractor('simcse')
simcse_embs = simcse_enc.encode(texts)
simcse_item_embs = {asin: simcse_embs[i] for i, asin in enumerate(item_asins)}
print(f"SimCSE: {simcse_embs.shape}")

## 3. Compute Effective Dimensions

In [None]:
bert_eigs, _ = compute_eigenvalue_spectrum(bert_embs)
simcse_eigs, _ = compute_eigenvalue_spectrum(simcse_embs)

bert_deff = compute_effective_dimension(bert_eigs)
simcse_deff = compute_effective_dimension(simcse_eigs)

print(f"BERT d_eff: {bert_deff:.1f}")
print(f"SimCSE d_eff: {simcse_deff:.1f}")
print(f"\nTheory: Higher d_eff → better exploration → lower regret")

## 4. Compute User Embeddings from Purchase History

In [None]:
bert_user_mgr = UserEmbeddingManager(bert_item_embs)
simcse_user_mgr = UserEmbeddingManager(simcse_item_embs)

print("Computing BERT user embeddings...")
bert_user_embs = {
    user_id: bert_user_mgr.get_user_embedding(history, user_id=user_id)
    for user_id, history in tqdm(dataset.train_histories.items())
}

print("Computing SimCSE user embeddings...")
simcse_user_embs = {
    user_id: simcse_user_mgr.get_user_embedding(history, user_id=user_id)
    for user_id, history in tqdm(dataset.train_histories.items())
}

print(f"\nComputed embeddings for {len(bert_user_embs)} users")

## 5. Run Bandit Experiments

In [None]:
def run_bandit_experiment(
    dataset,
    item_embs: dict,
    user_embs: dict,
    n_rounds: int = 3000,
    n_candidates: int = 100,
    seed: int = 42
):
    """
    Run linear kernel bandit on test interactions.
    
    For each round:
    1. Sample a test interaction (user, true_item, true_reward)
    2. Sample candidate items (including true item)
    3. Bandit selects best item for user
    4. Observe reward and compute regret
    """
    np.random.seed(seed)
    
    item_asins = list(item_embs.keys())
    item_array = np.array([item_embs[asin] for asin in item_asins])
    asin_to_idx = {asin: i for i, asin in enumerate(item_asins)}
    
    embedding_dim = item_array.shape[1]
    bandit = LinearKernelBandit(embedding_dim, algorithm='ts')
    
    rewards = []
    regrets = []
    
    test_data = dataset.test_interactions
    n_rounds = min(n_rounds, len(test_data))
    
    for t in tqdm(range(n_rounds), desc="Bandit rounds"):
        user_id, true_item, true_reward = test_data[t % len(test_data)]
        
        if user_id not in user_embs:
            continue
        if true_item not in asin_to_idx:
            continue
            
        user_emb = user_embs[user_id]
        
        true_idx = asin_to_idx[true_item]
        candidate_indices = [true_idx]
        other_indices = np.random.choice(
            [i for i in range(len(item_asins)) if i != true_idx],
            min(n_candidates - 1, len(item_asins) - 1),
            replace=False
        )
        candidate_indices.extend(other_indices.tolist())
        np.random.shuffle(candidate_indices)
        
        candidate_embs = item_array[candidate_indices]
        
        action_idx = bandit.select_arm(user_emb, candidate_embs)
        selected_idx = candidate_indices[action_idx]
        selected_asin = item_asins[selected_idx]
        
        if selected_asin == true_item:
            reward = true_reward
            regret = 0.0
        else:
            reward = 0
            regret = true_reward
        
        bandit.update(user_emb, item_array[selected_idx], reward)
        
        rewards.append(reward)
        regrets.append(regret)
    
    return {
        'rewards': np.array(rewards),
        'regrets': np.array(regrets),
        'cumulative_regret': np.cumsum(regrets),
        'final_regret': np.sum(regrets),
        'avg_reward': np.mean(rewards),
        'hit_rate': np.mean([r > 0 for r in rewards])
    }

In [None]:
print("Running BERT bandit...")
bert_results = run_bandit_experiment(
    dataset, bert_item_embs, bert_user_embs, n_rounds=3000
)

print("\nRunning SimCSE bandit...")
simcse_results = run_bandit_experiment(
    dataset, simcse_item_embs, simcse_user_embs, n_rounds=3000
)

## 6. Plot Results

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(16, 5))

# Cumulative Regret
ax1 = axes[0]
ax1.plot(bert_results['cumulative_regret'], label=f'BERT (d_eff={bert_deff:.0f})', color='red')
ax1.plot(simcse_results['cumulative_regret'], label=f'SimCSE (d_eff={simcse_deff:.0f})', color='blue')
ax1.set_xlabel('Round')
ax1.set_ylabel('Cumulative Regret')
ax1.set_title('Cumulative Regret (Real User Data)')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Rolling Hit Rate
ax2 = axes[1]
window = 100
bert_rolling = np.convolve(bert_results['rewards'], np.ones(window)/window, mode='valid')
simcse_rolling = np.convolve(simcse_results['rewards'], np.ones(window)/window, mode='valid')
ax2.plot(bert_rolling, label='BERT', color='red', alpha=0.7)
ax2.plot(simcse_rolling, label='SimCSE', color='blue', alpha=0.7)
ax2.set_xlabel('Round')
ax2.set_ylabel('Hit Rate (100-round window)')
ax2.set_title('Learning Curves')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Eigenvalue Spectra
ax3 = axes[2]
ax3.semilogy(bert_eigs[:100] / bert_eigs.sum(), label=f'BERT (d_eff={bert_deff:.0f})', color='red')
ax3.semilogy(simcse_eigs[:100] / simcse_eigs.sum(), label=f'SimCSE (d_eff={simcse_deff:.0f})', color='blue')
ax3.set_xlabel('Eigenvalue Index')
ax3.set_ylabel('Normalized Eigenvalue')
ax3.set_title('Eigenvalue Spectrum (Top 100)')
ax3.legend()
ax3.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../results/plots/amazon_real_users_results.png', dpi=150)
plt.show()

print(f"\n=== Final Results ===")
print(f"BERT:   Regret = {bert_results['final_regret']:.1f}, Hit Rate = {bert_results['hit_rate']*100:.1f}%")
print(f"SimCSE: Regret = {simcse_results['final_regret']:.1f}, Hit Rate = {simcse_results['hit_rate']*100:.1f}%")

improvement = 100 * (bert_results['final_regret'] - simcse_results['final_regret']) / max(bert_results['final_regret'], 1)
print(f"\nRegret Reduction: {improvement:.1f}%")

## 7. Theory Validation

The core hypothesis:
- **High d_eff** (contrastive) → **uniform embedding space** → **better coverage** → **lower regret**
- **Low d_eff** (reconstruction) → **anisotropic space** → **poor coverage** → **higher regret**

In [None]:
print("=== Theory Validation ===")
print(f"\n1. Effective Dimension (d_eff):")
print(f"   BERT:   {bert_deff:.1f}")
print(f"   SimCSE: {simcse_deff:.1f}")
print(f"   Ratio:  {simcse_deff/bert_deff:.2f}x")

print(f"\n2. Final Cumulative Regret:")
print(f"   BERT:   {bert_results['final_regret']:.1f}")
print(f"   SimCSE: {simcse_results['final_regret']:.1f}")
print(f"   Improvement: {improvement:.1f}%")

print(f"\n3. Interpretation:")
if simcse_deff > bert_deff and simcse_results['final_regret'] < bert_results['final_regret']:
    print("   ✓ Results SUPPORT the theory: higher d_eff correlates with lower regret")
else:
    print("   Results need more investigation")