# Visualizing Rewards

## Motivation

I've been playing around with relatively simple strategies like the $\epsilon$-greedy algorithm just to see how the agent does in general. I wanted a way to compare the different models other than looking at the final score.

# Class Definition

Here we define our class to make life easy to plot multiple simulations to compare

In [None]:
%%writefile SimulationExplorer.py

import matplotlib.pyplot as plt

class SimViz:
    '''
    '''
    
    def __init__(self, sims=None):
        
        # Copy simulations to object
        if sims:
            self.sims = {name:env for name,env in sims.items()}
            # Get simulation environments (JSON)
            self.envs = {k:self.get_env_json(env) for k,env in self.sims.items()}
        else:
            self.sims = {}
            self.envs = {}
            
    def get_env_json(self, env):
        '''
        '''
        try:
            env_json = env.toJSON()
            return env_json
        except:
            print('ERROR: Invalid environment')
            
    def add_env(self, env, name=None):
        '''
        '''
        # TODO: Make sure it is a unique name
        if name is None:
            name =  f'{len(self.envs) + 1}'
            name += f'_{"".join(k[-1] for k in self.envs.keys())}'
        # Add to both sims and envs
        self.sims[name] = env
        self.envs[name] = self.get_env_json(env)
        
    def add_sim(self, env, name=None):
        '''
        '''
        # TODO: Make sure it is a unique name
        if name is None:
            name =  f'{len(self.sims) + 1}'
            name += f'_{"".join(k[-1] for k in self.sims.keys())}'
        # Add to both sim and env
        self.sims[name] = env
        self.add_env(env, name)
            

    def get_rewards(self, name):
        '''
        '''
        # TODO:  Check for error
        agent_steps = self.envs.get(name).get('steps')
        n_steps = len(agent_steps)
        rewards = [agent_steps[s][0].get('reward') for s in range(n_steps)]
        return rewards
    
    def plot_total_reward(self, names=None, *args, **kwargs):
        '''
        '''
        fig,ax = plt.subplots(figsize=(12,8))
        ax.set_title('Total Rewards Over Steps')
        ax.set_xlabel('Steps')
        ax.set_ylabel('Rewards (cummulative)')
        if names is None:
            names = list(self.envs.keys())
        elif not isinstance(names, list):
            names = [names]
        
        # Plot all the simulations given
        for name in names:
            rewards = self.get_rewards(name)
            ax.plot(rewards, label=name, *args, **kwargs)
        
        ax.legend(loc='upper left', ncol=2)
#         ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left',
#            ncol=2)

        return fig,ax
   
        
            

# Example Usage

## Simulate Agents

### Example agent

In [None]:
%%writefile always0_agent.py

# Randomly pick different bandits/machines
def always0_agent(observation, configuration):
    '''Always select machine #0
    '''
    choice = 0
    return choice


In [None]:
%%writefile random_agent.py

import numpy as np
np.random.seed(27)

# Randomly pick different bandits/machines
def random_agent(observation, configuration):
    '''Randomly select machine
    '''
    # Cast from NumPy integer type
    choice = int(np.random.choice(np.arange(configuration.banditCount)))
    return choice


### Run simulation

In [None]:
!pip install kaggle-environments --upgrade

In [None]:
from kaggle_environments import make

In [None]:
always0_v_always0 = make("mab", debug=True)

always0_v_always0.run(["always0_agent.py", "always0_agent.py"])
always0_v_always0.render(mode="ipython", width=800, height=300)

In [None]:
random_v_random_env = make("mab", debug=True)

random_v_random_env.run(["random_agent.py", "random_agent.py"])
random_v_random_env.render(mode="ipython", width=800, height=300)

In [None]:
random_v_always0 = make("mab", debug=True)

random_v_always0.run(["random_agent.py", "always0_agent.py"])
random_v_always0.render(mode="ipython", width=800, height=300)

In [None]:
always0_v_random = make("mab", debug=True)

always0_v_random.run(["always0_agent.py", "random_agent.py"])
always0_v_random.render(mode="ipython", width=800, height=300)

## Visualize Agents

In [None]:
import SimulationExplorer as Explorer

In [None]:
sims = {
    'always_0-v-always_0': always0_v_always0,
    'random-v-always_0': random_v_always0,
    'random-v-random': random_v_random_env,
    'always_0-v-random': always0_v_random,
}

In [None]:
test = Explorer.SimViz(sims)

In [None]:
f ,ax = test.plot_total_reward(linestyle='dashed')

In [None]:
f_rand, ax_rand = test.plot_total_reward(['always_0-v-always_0','always_0-v-random'])

### Check that simulation total results match with plots

In [None]:
for n,env in sims.items():
    print(n,env.toJSON().get('rewards'))