# Explicit Policy for Mountain Car

In [33]:
import gym
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt

import time

env = gym.make("MountainCar-v0")

backwards, forwards = 0, 2

# run one episode 
def run_episode(weight_vector, state):
    for _ in np.arange(100):
        # If the car is moving away from the flag, accelerate backwards
        # Otherwise, accelerate forwards
        # This will allow the mountain car to gain enough momentum to reach the flag!
        action = backwards if np.matmul(weight_vector, state) > 0 else forwards
        state, reward, done, _ = env.step(action)
        if done:
            break
    return state[0]


# returns the number of guesses performed till success and the successful weight vector
def guess_till_success(state):
    counter = 0
    position = 0
    weight_vector = None
    while position < 0.5:
        counter += 1
        weight_vector = np.random.rand(2) * 2.0 - 1.0
        position = run_episode(weight_vector, state)
    
    print "Reached flag with final car position at", position

    return counter, weight_vector


[33mWARN: gym.spaces.Box autodetected dtype as <type 'numpy.float32'>. Please provide explicit dtype.[0m


## Test Runs

In [34]:
def run():
    state = env.reset()
    counter, weight_vector = guess_till_success(state)
    print("Number of guesses:", counter)
    print("Weight vector:", weight_vector)

In [35]:
run()

Reached flag with final car position at 0.501850245261335
('Number of guesses:', 44689)
('Weight vector:', array([-0.78635608,  0.73615851]))


In [36]:
run()

Reached flag with final car position at 0.5122463278204781
('Number of guesses:', 20428)
('Weight vector:', array([ 0.30940503, -0.28989041]))


In [37]:
run()

Reached flag with final car position at 0.504073067438042
('Number of guesses:', 3139)
('Weight vector:', array([0.9377231 , 0.68650374]))


In [38]:
run()

Reached flag with final car position at 0.5021445414307124
('Number of guesses:', 22374)
('Weight vector:', array([ 0.43257542, -0.54964087]))
