In [1]:
import numpy as np
import random



In [2]:
articles = [100, 120, 80, 60, 50, 40, 30, 20, 10, 5]
aligned_articles = [0, 1, 2]  # Indices of aligned articles


In [3]:
# Number of arms (articles)
k = len(articles)


In [4]:
# Number of rounds (time steps)
num_rounds = 1000

In [5]:
# Initialize variables for tracking views and selections
views = np.zeros(k)
selections = np.zeros(k)
total_views = 0


In [6]:
# Epsilon-Greedy parameters
epsilon = 0.1
# UCB parameters
ucb_c = 2
# Thompson Sampling parameters
alpha = np.ones(k)
beta = np.ones(k)

In [7]:
def epsilon_greedy(epsilon):
    if random.random() < epsilon:
        # Exploration: choose a random article
        arm = random.randint(0, k - 1)
    else:
        # Exploitation: choose the best-performing article
        arm = np.argmax(views / (selections + 1e-5))
    return arm

In [8]:
def ucb(ucb_c, t):
    ucb_values = views / (selections + 1e-5) + ucb_c * np.sqrt(np.log(t + 1) / (selections + 1e-5))
    return np.argmax(ucb_values)

In [9]:
def thompson_sampling():
    samples = [np.random.beta(alpha[i], beta[i]) for i in range(k)]
    return np.argmax(samples)

In [10]:
for t in range(num_rounds):
    # Choose an arm using one of the strategies
    # arm = epsilon_greedy(epsilon)
    # arm = ucb(ucb_c, t)
    arm = thompson_sampling()
    
    # Simulate the reward (views)
    reward = np.random.normal(articles[arm], 10)
    
    # Update the total views and selections
    total_views += reward
    views[arm] += reward
    selections[arm] += 1
    
    # Update Thompson Sampling parameters
    if arm in aligned_articles:
        alpha[arm] += reward / 100
        beta[arm] += (100 - reward) / 100
    else:
        alpha[arm] += reward / 100
        beta[arm] += (100 - reward) / 100
    beta = np.maximum(beta, 1e-5)

# Print the results
print("Total Views:", total_views)
print("Views per Article:", views)
print("Selections per Article:", selections)

Total Views: 119144.06030146807
Views per Article: [1.76517434e+02 1.18376296e+05 2.27009808e+02 1.27562100e+02
 1.31743185e+02 6.16552374e+01 0.00000000e+00 1.17753885e+01
 3.15007611e+01 0.00000000e+00]
Selections per Article: [  2. 987.   3.   2.   2.   1.   0.   1.   2.   0.]
