## Multi-armed Bandit Problem: Epsilon-Greedy

### Create class with functions to select and update arms

In [None]:
import numpy as np
class EpsilonGreedy:
    def __init__(self, n_arms, epsilon):
        self.n_arms = n_arms
        self.epsilon = epsilon
        self.counts = np.zeros(n_arms)
        self.values = np.zeros(n_arms)
    def select_arm(self):
        if np.random.rand() < self.epsilon:
            return np.random.randint(0, self.n_arms)
        else:
            return np.argmax(self.values)

    def update(self, chosen_arm, reward):
        self.counts[chosen_arm] += 1
        n = self.counts[chosen_arm]
        value = self.values[chosen_arm]
        self.values[chosen_arm] = ((n - 1) / n) * value + (1 / n) * reward

### calculate reward

In [None]:
n_arms = 4
epsilon = 0.1
n_trials = 1000
rewards = np.random.randn(n_arms, n_trials)

agent = EpsilonGreedy(n_arms, epsilon)
total_reward = 0

for t in range(n_trials):
    arm = agent.select_arm()
    reward = rewards[arm, t]
    agent.update(arm, reward)
    total_reward += reward

print("Total Reward:", total_reward)

Total Reward: 81.66027232228254
