# Q-Learner for lunar lander

- Q-Learning algo written in PyTorch for the cartpole

- Batch version used here, Q function updated after each episode of simulation

In [1]:
# imports
import os 
import sys
sys.path.append('../../')

# import custom reinforcement library
import reinforcement_library as reinlib

# import cartpole + pytorch online q-learner
learner = reinlib.deep_Q_Learning.qfitted_deepQlearning_pytorch
plotter = reinlib.deep_Q_Learning.history_plotter

# load in autoreload so any changes made to backend files mirrored in notebook
# without the need to restart kernel
%load_ext autoreload
%autoreload 2

Using TensorFlow backend.


- Q-Learner Algo can be loaded in from backend file by activating the command

```learner.QLearner??```

in a code cell

## Q Learning setup interface

In [13]:
# feed in current directory name and savename = experiment name
dirname = os.getcwd()
savename = 'qfitted_cartpole_experiment_1'
gymname = 'LunarLander-v2'

# initialize Q Learn process
num_episodes = 1000
explore_decay = 1
explore_val = 0.01
exit_level = 200
exit_window = 10

# initialize memory
episode_update = 1
memory_length = 1

# load into instance of learner
demo = learner.QLearner(gymname,dirname,savename,num_episodes=num_episodes,explore_decay=explore_decay,explore_val=explore_val,memory_length=memory_length,episode_update=episode_update,exit_level=exit_level,exit_window=exit_window)

# initialize Q function
layer_sizes = [300,300]
alpha = 10**(-2)
activation = 'relu'
demo.initialize_Q(layer_sizes=layer_sizes,alpha=alpha,activation=activation)

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [14]:
demo.train()

episode 1 of 1000 complete,  explore val = 0.01, episode reward = -539.5, ave reward = -53.9
episode 2 of 1000 complete,  explore val = 0.01, episode reward = -1026.1, ave reward = -156.6
episode 3 of 1000 complete,  explore val = 0.01, episode reward = -187.9, ave reward = -175.3
episode 4 of 1000 complete,  explore val = 0.01, episode reward = -576.4, ave reward = -233.0
episode 5 of 1000 complete,  explore val = 0.01, episode reward = -170.9, ave reward = -250.1
episode 6 of 1000 complete,  explore val = 0.01, episode reward = -797.0, ave reward = -329.8
episode 7 of 1000 complete,  explore val = 0.01, episode reward = -961.4, ave reward = -425.9
episode 8 of 1000 complete,  explore val = 0.01, episode reward = -359.8, ave reward = -461.9
episode 9 of 1000 complete,  explore val = 0.01, episode reward = -452.9, ave reward = -507.2
episode 10 of 1000 complete,  explore val = 0.01, episode reward = -178.6, ave reward = -525.0
episode 11 of 1000 complete,  explore val = 0.01, episode r

KeyboardInterrupt: 

# plot total episode reward history

In [None]:
reward_logname = 'reward_logs/' + savename + '.txt'
plotter.plot_reward_history(reward_logname,window_length = 100)