## Mountain Car Environment

In [58]:
import math
class MountainCarEnvironment:
    '''Implementation of Sutton & Barto (1998) Mountain Car Problem environment.'''
    velocity_boundaries = (-0.07, 0.07)
    position_boundaries = (-1.2, 0.6)  
   
    # Constructor for MountainCarEnvironment
    # Input: agent for the MountainCarEnvironment
    # Output: MountainCarEnvironment object
    def __init__(self, car):
        self.car = car
        self.reset()
        
    # Compute next state (feature)
    # Output: (new velocity, new position)
    def nextState(self):
        # Get current state (velocity, position) and the action chosen by the agent
        velocity = self.car.state[0]
        position = self.car.state[1]
        action = self.car.doAction()
        
        # Calculate the new velocity and new position
        velocity += action * 0.001 + math.cos(3*position) * -0.0025
        position += + velocity
        
        new_state = (velocity, position)
        return(new_state)
    
    # Reset to the initial state    
    def reset(self):
        self.car.state[0] = 0.0
        self.car.state[1] = -0.5
        
    # Give reward for each of the chosen action, depending on what the next state that the agent end up in
    # Output: terminal state = 0, non-terminal state = -1
    def calculateReward(self):
        # Get current position of the agent
        position = self.car.state[1]
        
        # Determine the reward given
        if (position >= 0.6):
            return(0)
        else:
            return(-1)
        

## Agent (Car)

In [None]:
class Agent:
    '''Implementation of agent (car) that will be used in the Mountain Car Environment'''
    initial_velocity = 0.0
    initial_position = -0.5
    
    # Constructor
    # Input: algorithm (class of algorithm that are used as the based method for our agent)
    def __init__(self, algorithm):
        self.state = [initial_velocity, initial_position]
        self.algorithm = algorithm
    
    # Allow car to choose action
    # Output: -1 (left), 0 (neutral), 1 (right)
    def doAction(self):
        # TODO: epsilon greedy method for choosing action. For now, just always choose right
        return(1)

## kNN Algorithm

In [None]:
class KNN:
    '''Using the kNN TD algorithm approach'''
    
    # Constructor
    # Input: the number of k-neighbours, size of the stored Q value
    def __init__(self, k, size):
        self.k = k
        self.q_storage = []
    
    