In [5]:
import numpy as np
import matplotlib.pyplot as plt

number_of_arms = 10
number_of_rounds = 1000
epsilon_value = 0.1

true_probabilities = np.random.rand(number_of_arms)
print(f"True probabilities of each arm: {true_probabilities}")

True probabilities of each arm: [0.26331658 0.31259234 0.46678743 0.46317223 0.30187385 0.08824049
 0.76810588 0.80704436 0.46556597 0.41239922]


In [6]:
estimated_rewards = np.zeros(number_of_arms)  # Estimated rewards for each arm
counts = np.zeros(number_of_arms)  # Number of times each arm is pulled
rewards = np.zeros(number_of_rounds)  # Rewards obtained at each round
total_rewards = 0  # Total rewards obtained
optimal_reward = 0  # Total reward if best arm was pulled every time
optimal_rewards = np.zeros(number_of_rounds)  # Optimal rewards at each round
best_arm = np.argmax(true_probabilities)  # Best arm based on true probabilities
cumulative_regret = np.zeros(number_of_rounds)  # Cumulative regret

In [7]:
# Epsilon-Greedy Algorithm
for i in range(number_of_rounds):
    if np.random.rand() < epsilon_value:
        arm = np.random.randint(number_of_arms)
    else:
        arm = np.argmax(estimated_rewards)


    reward = np.random.rand() < true_probabilities[arm]      # Pull the chosen arm and observe the reward

    counts[arm] += 1
    estimated_rewards[arm] += (reward - estimated_rewards[arm]) / counts[arm]

    total_rewards += reward
    rewards[i] = reward

    optimal_rewards[i] = true_probabilities[best_arm]
    optimal_reward += optimal_rewards[i]
    cumulative_regret[i] = optimal_reward - total_rewards

print(f"Total rewards obtained: {total_rewards}")
print(f"Total optimal rewards: {optimal_reward}")
print(f"Cumulative regret at each round: {cumulative_regret}")

Total rewards obtained: 730
Total optimal rewards: 807.0443597023602
Cumulative regret at each round: [-0.19295564  0.61408872  0.42113308  1.22817744  2.0352218   1.84226616
  1.64931052  2.45635488  3.26339924  4.0704436   4.87748796  5.68453232
  5.49157668  5.29862104  5.1056654   5.91270976  6.71975411  6.52679847
  6.33384283  6.14088719  6.94793155  7.75497591  7.56202027  8.36906463
  8.17610899  8.98315335  8.79019771  9.59724207  9.40428643  9.21133079
 10.01837515 10.82541951 10.63246387 10.43950823 11.24655259 12.05359695
 11.86064131 11.66768567 12.47473003 13.28177439 13.08881875 12.89586311
 13.70290747 13.50995183 14.31699619 15.12404055 14.93108491 15.73812927
 15.54517363 16.35221799 17.15926234 17.9663067  17.77335106 18.58039542
 18.38743978 18.19448414 19.0015285  19.80857286 20.61561722 21.42266158
 22.22970594 23.0367503  22.84379466 22.65083902 23.45788338 23.26492774
 23.0719721  22.87901646 23.68606082 24.49310518 25.30014954 25.1071939
 25.91423826 25.7212826