# Epsilon-Greedy Strategy

In this notebook, we will implement the Epsilon-Greedy strategy for the Multi-Armed Bandit problem.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output

## Implementation of Epsilon-Greedy Strategy

In [None]:
class EpsilonGreedyBandit:
    def __init__(self, num_arms, epsilon=0.1):
        self.num_arms = num_arms
        self.epsilon = epsilon
        self.arm_counts = np.zeros(num_arms)
        self.arm_rewards = np.zeros(num_arms)
        self.total_reward = 0
    
    def select_arm(self):
        if np.random.random() < self.epsilon:
            return np.random.randint(0, self.num_arms)
        else:
            return np.argmax(self.arm_rewards / (self.arm_counts + 1e-10))
    
    def update(self, arm, reward):
        self.arm_counts[arm] += 1
        self.arm_rewards[arm] += reward
        self.total_reward += reward

## Experiment

In [None]:
np.random.seed(42)
true_rewards = np.random.normal(0.5, 1, 5)
bandit = EpsilonGreedyBandit(num_arms=5, epsilon=0.1)

num_trials = 1000
rewards = []

for trial in range(num_trials):
    arm = bandit.select_arm()
    reward = np.random.normal(true_rewards[arm], 1)
    bandit.update(arm, reward)
    rewards.append(reward)

## Results

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(rewards)
plt.xlabel('Trial')
plt.ylabel('Reward')
plt.title('Epsilon-Greedy Bandit Performance')
plt.show()

print(f"Total Reward: {bandit.total_reward}")
print(f"Average Reward per Trial: {bandit.total_reward / num_trials}")