In [None]:
import gym
import tensorflow as tf
import itertools
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import importlib

import IPython

%autosave 240

In [None]:
import sys
sys.path.append('./..')
import embodiedMG as emg
importlib.reload(emg)

## Cartpole Benchmark Setup

In [None]:
tf.reset_default_graph()
env = gym.make('CartPole-v0')
cprf = emg.EmbodiedAgentRFBaselined(name="cartpole-emb",
                                    env_=env, space_size = (4,1),
                                    alpha_p=5.e-2, alpha_v=1.e-1
                                   )
#(...,sensorium=SensoriumNetworkTemplate, latentDim=_zdim_, space_size = (_s_size_, _a_size_))
# cprf = emg.EmbodiedAgentRFBaselined(name="cartpole-emb", env_=env, space_size = (4,1) )
print(cprf, cprf.s_size, cprf.a_size)

In [None]:
saver = tf.train.Saver(max_to_keep=1)  #n_epochs = 1000
sess = tf.InteractiveSession() 
cprf.init_graph(sess)

num_episodes = 100
n_epochs = 50001

In [None]:
## Verify step + play set up
state = cprf.env.reset()
print(state, cprf.act(state, sess))
cprf.env.step(cprf.act(state, sess))
cprf.play(sess)
len(cprf.episode_buffer)

## Baseline

In [None]:
print('Baselining untrained pnet...')
uplen0 = []
for k in range(num_episodes):
    cprf.play(sess)
    uplen0.append(cprf.last_total_return) # uplen0.append(len(cprf.episode_buffer))
    if k%20 == 0: print("\rEpisode {}/{}".format(k, num_episodes),end="")
base_perf = np.mean(uplen0)
print("\nCartpole stays up for an average of {} steps".format(base_perf))

## Train

In [None]:
# Train pnet on cartpole episodes
print('Training...')
saver = tf.train.Saver(max_to_keep=2)
cprf.work(sess, saver, num_epochs = n_epochs)

## Test

In [None]:
# Test pnet!
print('Testing...')
uplen = []
for k in range(num_episodes):
    cprf.play(sess)
    uplen.append(cprf.last_total_return) # uplen.append(len(cprf.episode_buffer))
    if k%20 == 0: print("\rEpisode {}/{}".format(k, num_episodes),end="")
trained_perf = np.mean(uplen)
print("\nCartpole stays up for an average of {} steps compared to baseline {} steps".format(trained_perf, base_perf) )

## Evaluate

In [None]:
fig, axs = plt.subplots(2, 1, sharex=True)
sns.boxplot(uplen0, ax = axs[0])
axs[0].set_title('Baseline Episode Lengths')
sns.boxplot(uplen, ax = axs[1])
axs[1].set_title('Trained Episode Lengths')

In [None]:
sess.close()