In [None]:
import sys
%matplotlib qt5
import pylab as plb
from matplotlib import pyplot as plt
import numpy as np
import mountaincar

In [None]:
class DummyAgent():
    """A not so good agent for the mountain-car task.
    """

    def __init__(self, mountain_car = None, parameter1 = 3.0):
        
        if mountain_car is None:
            self.mountain_car = mountaincar.MountainCar()
        else:
            self.mountain_car = mountain_car

        self.parameter1 = parameter1

    def visualize_trial(self, n_steps = 200):
        """Do a trial without learning, with display.

        Parameters
        ----------
        n_steps -- number of steps to simulate for
        """
        
        # prepare for the visualization
        plb.ion()
        mv = mountaincar.MountainCarViewer(self.mountain_car)
        mv.create_figure(n_steps, n_steps)
        plb.draw()
        plb.pause(1e-3)
            
        # make sure the mountain-car is reset
        self.mountain_car.reset()

        for n in range(n_steps):
            print('\rt =', self.mountain_car.t,)
            sys.stdout.flush()
            
            # choose a random action
            self.mountain_car.apply_force(np.random.randint(3) - 1)
            # simulate the timestep
            self.mountain_car.simulate_timesteps(100, 0.01)

            # update the visualization
            mv.update_figure()
            plb.draw()
            plb.pause(1e-3)
            
            # check for rewards
            if self.mountain_car.R > 0.0:
                print("\rreward obtained at t = ", self.mountain_car.t)
                break

    def learn(self):
        # This is your job!
        pass

In [None]:
if __name__ == "__main__":
    d = DummyAgent()
    d.visualize_trial()
    plb.show()

In [None]:
# range for x neurons grid
x_values = np.linspace(-150, 30, 20)

# range for v neurons grid
v_values = np.linspace(-15, 15, 20)

# steps x and v
delta_x = x_values[1] - x_values[0]
delta_v = v_values[1] - v_values[0]

# sigmas x and v
sigma_x = np.array([delta_x] * len(x_values))
sigma_v = np.array([delta_v] * len(v_values))

# number of actions
n_actions = 3

# number of neurons
n_neurons = len(x_values) * len(v_values)

# weight matrix
w = np.random.randn(n_actions, n_neurons)

In [None]:
def r(x, v):
    ''' get neuron activations for s = (x, v) '''
    # x in rows, v in columns
    part_x = np.reshape(np.divide((x_values - x) ** 2, sigma_x ** 2), (-1, 1))
    part_v = np.reshape(np.divide((v_values - v) ** 2, sigma_v ** 2), (1, -1))
    return np.exp(-(part_x + part_v))

In [None]:
def get_Q(x, v, w):
    ''' Get Q-function at given s = (x, v) with weights w '''
    return np.reshape(w @ np.reshape(r(x, v), (-1, 1)), (-1,))

In [None]:
def get_action_probas(Q, tau):
    ''' get action probabilities as a vector '''
    
    vector = np.exp(Q / tau)
    return vector / np.sum(vector)

In [None]:
def get_action(x, v, w, tau):
    ''' Sample action for s = (x, v) and weights w with parameter tau '''
    Q = get_Q(x, v, w)
    action_probas = get_action_probas(Q, tau)
    print(action_probas)
    action = np.random.choice(range(n_actions), p = action_probas)
    return action - 1

In [None]:
get_action(10, 10, w, 1)