In [12]:

import numpy as np

number_of_arms = 10
number_of_rounds = 1000
epsilon = 0.1
prob_change_interval = 100
sliding_window_size = 50

# Initialize true probabilities
true_probabilities = np.random.rand(number_of_arms)
print(f"True Probabilities: {true_probabilities}")

# Initialize data structures
estimated_greedy_rewards = np.zeros(number_of_arms)
greedy_counts = np.zeros(number_of_arms)
total_greedy_reward = 0
greedy_rewards = np.zeros(number_of_rounds)
cumulative_greedy_regret = np.zeros(number_of_rounds)

optimal_rewards = np.zeros(number_of_rounds)
optimal_reward = 0

reward_history = {arm: [] for arm in range(number_of_arms)}

def update_probabilities(probs, step_size=0.05):
    new_prob = probs + step_size * np.random.randn(number_of_arms)
    new_prob = np.clip(new_prob, 0, 1)
    return new_prob

for round in range(number_of_rounds):
    if round % prob_change_interval == 0:
        true_probabilities = update_probabilities(true_probabilities)
        print(f"Round {round}: Updated True Probabilities: {true_probabilities}")


    if np.random.rand() < epsilon:
        greedy_arm = np.random.randint(number_of_arms)
    else:
        greedy_arm = np.argmax(estimated_greedy_rewards)

    greedy_reward = np.random.rand() < true_probabilities[greedy_arm]

    reward_history[greedy_arm].append(greedy_reward)
    if len(reward_history[greedy_arm]) > sliding_window_size:
        reward_history[greedy_arm].pop(0)

    estimated_greedy_rewards[greedy_arm] = np.mean(reward_history[greedy_arm])

    greedy_counts[greedy_arm] += 1
    total_greedy_reward += greedy_reward
    greedy_rewards[round] = total_greedy_reward

    optimal_rewards[round] = true_probabilities[np.argmax(true_probabilities)]
    optimal_reward += optimal_rewards[round]
    cumulative_greedy_regret[round] = optimal_reward - total_greedy_reward

print(f"Greedy Rewards: {greedy_rewards}")
print(f"Cumulative Greedy Regret: {cumulative_greedy_regret}")
print(f"Estimated Greedy Rewards: {estimated_greedy_rewards}")
print(f"Greedy Counts: {greedy_counts}")


True Probabilities: [0.41544786 0.59114598 0.77984055 0.08477861 0.00496531 0.8219451
 0.04434593 0.37650419 0.56280541 0.69202764]
Round 0: Updated True Probabilities: [0.39668228 0.54360855 0.76409313 0.06563445 0.         0.81262991
 0.06992892 0.32106071 0.6308334  0.56698372]
Round 100: Updated True Probabilities: [0.38098784 0.5614961  0.7337476  0.08215471 0.         0.84081387
 0.1085415  0.28931792 0.57500237 0.58834969]
Round 200: Updated True Probabilities: [0.38552832 0.62446678 0.76636705 0.11731212 0.         0.88856594
 0.00992014 0.26519466 0.60605834 0.58135524]
Round 300: Updated True Probabilities: [0.3447983  0.7021173  0.77002902 0.0908565  0.         0.92055019
 0.         0.28137558 0.5056439  0.60708245]
Round 400: Updated True Probabilities: [0.3749982  0.70203394 0.80797458 0.17057857 0.         0.88640514
 0.03800986 0.335409   0.51223858 0.63513034]
Round 500: Updated True Probabilities: [0.40441031 0.69660841 0.79200274 0.18579403 0.         0.9671682
 0.03