# Abstract Bandit

In [1]:
# Import modules.
from abc import ABC, abstractmethod
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
class Bandit(ABC):

    def __init__(self, means, sigmas, arms=10, iters=10000, deterministic=False, stationary=True):
        """Initializes the abstarct bandit object and model attributes."""
        # Model variables initialization.
        # Means of the normal distribution for rewards.
        self.means = means
        # Standard deviations of the normal distribution for rewards.
        self.sigmas = sigmas
        # Number of arms.
        self.arms = arms
        # Counters for actions taken.
        self.Ns = np.zeros(arms)
        # Number of iterations of the episode.
        self.iters = iters
        # If True, rewards are deterministic.
        self.deterministic = deterministic
        # If True, means don't change.
        self.stationary = stationary

        # Simulation data initialization.
        # Array to store the trend of the avarage reward.
        self.avg_rewards = np.zeros(iters + 1)
        # Matrix to store the trend of the means.
        self.qs = np.zeros((iters + 1, arms))
        self.qs[0, :] = self.means
        # Counter for optimal actions taken.
        self.opt_actions = 0

    @abstractmethod
    def choose_action(self):
        pass
        
    def pull_arm(self, arm):
        """Draws a reward using a normal distribution you can set."""
        if self.deterministic == True:
            return self.means[arm]
        else:
            return np.random.normal(self.means[arm], self.sigmas[arm])
        
    @abstractmethod
    def update_model(self, arm, reward, iteration):
        pass        

    @abstractmethod
    def update_sim_data(self, arm , reward, iteration):
        pass
            
    def run(self):
        """Runs a set of episodes."""
        for i in range(self.iters):
            arm = self.choose_action()
            reward = self.pull_arm(arm)
            self.update_model(arm, reward, i)
            self.update_sim_data(arm, reward, i)
                
    def get_avg_rewards(self):
        """Return the trend of the avarage reward."""
        return np.copy(self.avg_rewards)
    
    def get_real_means(self):
        """Returns real actions values."""
        return np.copy(self.qs)

    def get_actions(self):
        """Returns actions counters."""
        return np.copy(self.Ns)

    def get_opt_actions(self):
        """Return percentage of optimal actions taken."""
        return float(self.opt_actions) / float(self.iters)