In [1]:
import numpy as np
import matplotlib.pyplot as plt
from agent import Agent
from bandit import Bandits_one
from bandit import Bandits_two
import algorithms

In [2]:
num_episodes = 1000
num_runs = 20
epsilon = 0.1

def run_exp(algorithm, args, num_episodes, num_runs):
    cumulative_rewards = np.zeros(num_episodes)
    cumulative_regrets = np.zeros(num_episodes)

    for _ in range(num_runs):
        #bandit_env = Bandits_one()
        bandit_env = Bandits_two()
        optimal_action = bandit_env.get_optimal_action()
        
        # Make a copy of args to avoid modifying the original dictionary
        current_args = args.copy()

        agent = Agent(k=bandit_env.k, algorithm=algorithm, args=current_args)

        total_reward = 0
        for episode in range(num_episodes):
            # Update the 'episode' argument for decaying epsilon-greedy
            current_args['episode'] = episode

            action = agent.select_action()
            observation, reward, terminated, turncated, information = bandit_env.step(action)
            agent.update(action, reward)

            total_reward += reward
            cumulative_rewards[episode] += total_reward
            regret = bandit_env.means[optimal_action] - reward
            cumulative_regrets[episode] += regret
    average_cumulative_rewards = cumulative_rewards / num_runs
    average_cumulative_regrets = cumulative_regrets / num_runs

    return average_cumulative_rewards, average_cumulative_regrets


In [2]:
c1_rewards, c1_regrets = run_exp(algorithms.epsilon_greedy, {'epsilon': epsilon}, num_episodes, num_runs)
c2_rewards, c2_regrets = run_exp(algorithms.decaying_epsilon_greedy, {'episodes': num_episodes, 'init_epsilon': epsilon}, num_episodes, num_runs)
c3_rewards, c3_regrets = run_exp(algorithms.ucb,{'c':2},num_episodes,num_runs)
# Plotting
plt.figure(figsize=(10, 6))
plt.plot(c1_rewards, label='ε-greedy')
plt.plot(c2_rewards, label='Decaying ε-greedy')
plt.plot(c3_rewards, label='ucb values')
plt.xlabel('Episodes')
plt.ylabel('Cumulative Regrets')
plt.title('Comparison of Bandit Strategies')
plt.legend()
plt.show()

NameError: name 'run_exp' is not defined