In [None]:
import gym
import tensorflow as tf
import itertools
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import importlib
import IPython

%autosave 240

In [None]:
import sys
sys.path.append('./..')

import minority_agent
from minority_env import MinorityGame1vN_env

import embodiedMG as emg

In [None]:
importlib.reload(minority_agent) 
importlib.reload(emg)

## Minority Game Benchmark Setup

In [None]:
num_episodes = 100
n_epochs = 50001

importlib.reload(minority_agent) 
importlib.reload(emg)

In [None]:
tf.reset_default_graph()
menv = MinorityGame1vN_env(33, 4, 4, 0.5)
embrf = emg.EmbodiedAgentRFBaselined(
    name="mingame-RFB", 
    env_=menv,
    alpha_p=5.e-2, alpha_v=1.e-1
)

print(menv.state_space_size, menv.action_space_size)

In [None]:
embrf.max_episode_length = 30 #101  # dangerous... may incentivize finite n behavior
print(embrf, embrf.s_size, embrf.a_size)

sess = tf.InteractiveSession()
embrf.init_graph(sess) # note tboard log dir

In [None]:
## Verify step + play set up
state = embrf.env.reset()
print(state, embrf.act(state, sess))

embrf.env.step(embrf.act(state, sess))
embrf.play(sess)
embrf.last_total_return

### Pre-test Agent

In [None]:
print('Baselining untrained pnet...')
rwd_mg0 = []
for k in range(num_episodes):
    embrf.play(sess)
    rwd_mg0.append(embrf.last_total_return)
    if k%int(num_episodes/5) == 0: print("\rEpisode {}/{}".format(k, num_episodes),end="")
base_perf_mg = np.mean(rwd_mg0)
print("\nAgent wins an average of {} pct".format(100.0*base_perf_mg))

### Train Agent w/ Algo on Experience Tuples

In [None]:
# Train pnet on mingame episodes
print('Training...')
n_epochs = 501
saver = tf.train.Saver(max_to_keep=1)
embrf.work(sess, saver, num_epochs = n_epochs)

### Post-test Agent

In [None]:
# Test pnet!
print('Testing...')
rwd_mg = []
for k in range(num_episodes):
    embrf.play(sess)
    rwd_mg.append(embrf.last_total_return)
    if k%int(num_episodes/5) == 0: print("\rEpisode {}/{}".format(k, num_episodes),end="")
trained_perf_mg = np.mean(rwd_mg)
print("\nAgent wins an average of {} pct compared to baseline of {} pct".format(
    100*trained_perf_mg, 100*base_perf_mg) )

In [None]:
rwd_mg

In [None]:
fig, axs = plt.subplots(2, 1, sharex=True)
sns.boxplot(rwd_mg0, ax = axs[0])
axs[0].set_title('Baseline Mean Success Percentage')
sns.boxplot(rwd_mg, ax = axs[1])
axs[1].set_title('Trained Mean Success Percentage')

print("\nAgent wins an average of {} pct \ncompared to baseline of {} pct".format(
    100*np.mean(rwd_mg), 100*base_perf_mg) )

In [None]:
sess.close()

## Debug

In [None]:
# embrf.play(sess)

# len(embrf.episode_buffer)