In [1]:
import numpy as np
import pandas as pd
from typing import List, Dict
import random
from datetime import datetime

In [2]:
class ProductBandit:
    def __init__(self, product_data: pd.DataFrame, epsilon: float = 0.1, decay_rate: float = 0.001):
        self.product_data = product_data
        self.initial_epsilon = epsilon
        self.current_epsilon = epsilon
        self.decay_rate = decay_rate
        
        # Initialize estimates using available product data
        self.reward_estimates = {pid: 0.0 for pid in product_data['product_id']}
        self.selection_counts = {pid: 0 for pid in product_data['product_id']}
        self.total_rewards = {pid: 0.0 for pid in product_data['product_id']}

    def select_product(self) -> str:
        if random.random() < self.current_epsilon:
            # Exploration: randomly select a product
            return self.product_data.sample(1)['product_id'].iloc[0]
        else:
            # Exploitation: select product with highest estimated reward
            return max(self.reward_estimates.items(), key=lambda x: x[1])[0]

    def update_reward(self, product_id: str, reward: float):
        self.selection_counts[product_id] += 1
        self.total_rewards[product_id] += reward
        
        n = self.selection_counts[product_id]
        current_estimate = self.reward_estimates[product_id]
        self.reward_estimates[product_id] = ((n - 1) * current_estimate + reward) / n

    def get_product_stats(self) -> pd.DataFrame:
        stats = []
        for pid in self.product_data['product_id']:
            stats.append({
                'product_id': pid,
                'estimated_reward': self.reward_estimates[pid],
                'times_selected': self.selection_counts[pid],
                'total_reward': self.total_rewards[pid],
                'average_reward': self.total_rewards[pid] / max(1, self.selection_counts[pid])
            })
        return pd.DataFrame(stats)


In [3]:
product_data = pd.DataFrame({
    'product_id': [f'P{i:03d}' for i in range(10)],
    'price': np.random.uniform(10, 1000, 10),
    'category': np.random.choice(['Electronics', 'Clothing', 'Books', 'Home'], 10),
    'avg_rating': np.random.uniform(3, 5, 10)
})

# Create sample interactions
interaction_data = pd.DataFrame({
    'user_id': [f'U{i:03d}' for i in range(100)],
    'product_id': [f'P{np.random.randint(0, 10):03d}' for _ in range(100)],
    'interaction_type': np.random.choice(['view', 'purchase', 'cart'], 100)
})

In [4]:
recommender = ProductBandit(product_data)

# Train with historical data
for _, interaction in interaction_data.iterrows():
    reward = 1.0 if interaction['interaction_type'] == 'purchase' else 0.5 if interaction['interaction_type'] == 'cart' else 0.1
    recommender.update_reward(interaction['product_id'], reward)

In [5]:
print("Top 5 Product Recommendations:")
for i in range(5):
    recommended_product = recommender.select_product()
    product_info = product_data[product_data['product_id'] == recommended_product].iloc[0]
    print(f"\nRecommendation {i+1}:")
    print(f"Product ID: {product_info['product_id']}")
    print(f"Category: {product_info['category']}")
    print(f"Price: ${product_info['price']:.2f}")
    print(f"Average Rating: {product_info['avg_rating']:.2f}")

Top 5 Product Recommendations:

Recommendation 1:
Product ID: P006
Category: Home
Price: $686.64
Average Rating: 3.60

Recommendation 2:
Product ID: P006
Category: Home
Price: $686.64
Average Rating: 3.60

Recommendation 3:
Product ID: P006
Category: Home
Price: $686.64
Average Rating: 3.60

Recommendation 4:
Product ID: P006
Category: Home
Price: $686.64
Average Rating: 3.60

Recommendation 5:
Product ID: P006
Category: Home
Price: $686.64
Average Rating: 3.60


In [6]:
stats = recommender.get_product_stats()
print("\nProduct Performance Statistics:")
print(stats.sort_values('estimated_reward', ascending=False))


Product Performance Statistics:
  product_id  estimated_reward  times_selected  total_reward  average_reward
6       P006          0.733333               9           6.6        0.733333
8       P008          0.709091              11           7.8        0.709091
0       P000          0.641667              12           7.7        0.641667
9       P009          0.635714              14           8.9        0.635714
3       P003          0.581250              16           9.3        0.581250
4       P004          0.475000               8           3.8        0.475000
7       P007          0.457143               7           3.2        0.457143
2       P002          0.454545              11           5.0        0.454545
5       P005          0.450000               6           2.7        0.450000
1       P001          0.400000               6           2.4        0.400000
