In [3]:
import random
import math
class User:
    def __init__(self, user_id):
        self.user_id = user_id
        self.preferences = {}  

    def update_preference(self, content_id, reward):
        if content_id not in self.preferences:
            self.preferences[content_id] = (0, 0)
        old_reward, num_impressions = self.preferences[content_id]
        self.preferences[content_id] = (old_reward + reward, num_impressions + 1)

    def get_preference(self, content_id):
        return self.preferences.get(content_id, (0, 0))

class Content:
    def __init__(self, content_id, category):
        self.content_id = content_id
        self.category = category
class UCB1Bandit:
    def __init__(self, exploration_constant=1.0):
        self.exploration_constant = exploration_constant

    def choose_content(self, user, content_options):
        ucb_values = {}
        total_impressions = sum(
            user.preferences.get(content.content_id, (0, 0))[1] for content in content_options
        )

        for content in content_options:
            reward, impressions = user.get_preference(content.content_id)
            if impressions == 0:
                ucb_values[content] = float('inf')  
            else:
                ucb_values[content] = (reward / impressions) + self.exploration_constant * \
                    math.sqrt(math.log(total_impressions) / impressions)
        return max(ucb_values, key=ucb_values.get)
users = {
    1: User(1),
    2: User(2)
}

contents = {
    "A": Content("A", "News"),
    "B": Content("B", "Sports"),
    "C": Content("C", "Tech")
}

bandit = UCB1Bandit()
for user_id in users:
    user = users[user_id]
    print(f"Simulating recommendations for User {user_id}")
    for _ in range(10):
        available_contents = list(contents.values())
        chosen_content = bandit.choose_content(user, available_contents)
        reward = random.uniform(0, 1)
        print(f"User {user_id} recommended: {chosen_content.content_id} (Reward: {reward:.2f})")

        user.update_preference(chosen_content.content_id, reward)
for user_id in users:
    print(f"\nUser {user_id} Preferences:")
    for content_id, (total_reward, impressions) in users[user_id].preferences.items():
        print(f"Content {content_id}: Total Reward = {total_reward:.2f}, Impressions = {impressions}")


Simulating recommendations for User 1
User 1 recommended: A (Reward: 0.98)
User 1 recommended: B (Reward: 0.08)
User 1 recommended: C (Reward: 0.72)
User 1 recommended: A (Reward: 0.50)
User 1 recommended: C (Reward: 0.61)
User 1 recommended: A (Reward: 0.93)
User 1 recommended: C (Reward: 0.72)
User 1 recommended: A (Reward: 0.67)
User 1 recommended: B (Reward: 0.57)
User 1 recommended: C (Reward: 0.54)
Simulating recommendations for User 2
User 2 recommended: A (Reward: 0.40)
User 2 recommended: B (Reward: 0.86)
User 2 recommended: C (Reward: 0.20)
User 2 recommended: B (Reward: 0.19)
User 2 recommended: A (Reward: 0.78)
User 2 recommended: A (Reward: 0.30)
User 2 recommended: C (Reward: 0.63)
User 2 recommended: B (Reward: 0.34)
User 2 recommended: C (Reward: 0.46)
User 2 recommended: A (Reward: 0.87)

User 1 Preferences:
Content A: Total Reward = 3.07, Impressions = 4
Content B: Total Reward = 0.65, Impressions = 2
Content C: Total Reward = 2.59, Impressions = 4

User 2 Preferences