# Imports and Resets

In [1]:
import glob, itertools, importlib, sys
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

# ML libs
import tensorflow as tf
print("Tensorflow version:", tf.__version__)

%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (18,7)

log_path = './log/mingame'
sys.path.append('./embodied_arch')
import minoritygame.minority_env as MGE
import tensorflow as tf
import embodied as emg
importlib.reload(MGE)
importlib.reload(emg)

Tensorflow version: 1.13.1

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



<module 'embodied' from './embodied_arch/embodied.py'>

## Set up Game

In [2]:
tf.reset_default_graph()
menv = MGE.MinorityGame1vN_env(nagents=301, m=2, s=2, mrl=3, p=0.5) # Creates the MGEnvironment
embrf = emg.EmbodiedAgentRF(name="mgRF", env_=menv, alpha=.000001) # Creates the learner, alpha scales losses, equivalent to LR?


Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Colocations handled automatically by placer.


In [3]:
num_episodes = 5 # Number of runs per update step?
n_epochs = 200 #Number of times to update (If my memory serves me correctly)

embrf.max_episode_length = 500 #This should be an environment parameter, I tihnk. 
sess = tf.InteractiveSession()
embrf.init_graph(sess) # note tboard log dir

## Verify step + play set up
state = embrf.env.reset() 
print(state, embrf.act(state, sess)) 

embrf.env.step(embrf.act(state, sess))

Tensorboard logs in:  ./log/train_mgRF
[0 0 1] 0


(array([0, 1, 0]), 1.0, False, {})

In [4]:
# ### Pre-test Agent
print('Baselining untrained pnet...', flush=True)
rwd_mg0 = []
for k in range(num_episodes):
    embrf.play(sess)
    rwd_mg0.append(float(embrf.last_total_return)/embrf.max_episode_length)
    #if k%int(num_episodes/5) == 0: 
    print("\rEpisode {}/{}".format(k, num_episodes),end="")
base_perf_mg = np.mean(rwd_mg0)
print("\nAgent wins an average of {} pct".format(100.0*base_perf_mg), flush=True)

Baselining untrained pnet...
Episode 4/5
Agent wins an average of 50.879999999999995 pct


In [5]:
seqs = []
for i in range(2):
    print('Training...')
    saver = tf.train.Saver(max_to_keep=1)
    totals = embrf.work(sess, saver, num_epochs = n_epochs)
    seqs.append(totals)
    sess.run(tf.global_variables_initializer())
    adam_vars = [var for var in tf.all_variables() if 'adam' in var.name]
    sess.run(tf.variables_initializer(adam_vars))

Training...
Starting agent mgRF
Epoch no.: 0/200
Step 0: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [248.0, -0.13768339, 0.10811748] )
Saved Model
Epoch no.: 1/200
Step 1: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [267.0, -0.16800764, 0.15076193] )
Instructions for updating:
Use standard file APIs to delete files with this prefix.
Saved Model
Epoch no.: 2/200
Step 2: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [257.0, -0.20959897, 0.20545614] )
Saved Model
Epoch no.: 3/200
Step 3: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [255.0, -0.28850323, 0.26401198] )
Saved Model
Epoch no.: 4/200
Step 4: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [258.0, -0.36249083, 0.34490785] )
Saved Model
Epoch no.: 5/200
Step 5: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [254.0, -0.45077544, 0.44904876] )
Saved Model
Epoch no.: 6/200
Step 6: S

Epoch no.: 55/200
Step 55: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [293.0, -0.17090298, 0.13623525] )
Saved Model
Epoch no.: 56/200
Step 56: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [314.0, -0.18846661, 0.16519874] )
Saved Model
Epoch no.: 57/200
Step 57: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [306.0, -0.21597584, 0.21696237] )
Saved Model
Epoch no.: 58/200
Step 58: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [331.0, -0.26951623, 0.25989085] )
Saved Model
Epoch no.: 59/200
Step 59: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [338.0, -0.2687553, 0.323227] )
Saved Model
Epoch no.: 60/200
Step 60: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [345.0, -0.37466055, 0.34336042] )
Saved Model
Epoch no.: 61/200
Step 61: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [332.0, -0.38673162, 0.39267537] )


Epoch no.: 112/200
Step 112: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [369.0, -0.044290945, 0.056864996] )
Saved Model
Epoch no.: 113/200
Step 113: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [369.0, -0.044420283, 0.053131875] )
Saved Model
Epoch no.: 114/200
Step 114: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [372.0, -0.023845326, 0.04951277] )
Saved Model
Epoch no.: 115/200
Step 115: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [373.0, -0.019732486, 0.045238923] )
Saved Model
Epoch no.: 116/200
Step 116: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [372.0, -0.027456777, 0.04050873] )
Saved Model
Epoch no.: 117/200
Step 117: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [372.0, -0.022211308, 0.03606484] )
Saved Model
Epoch no.: 118/200
Step 118: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [370.0, -

Epoch no.: 167/200
Step 167: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [373.0, -0.01141322, 0.006672895] )
Saved Model
Epoch no.: 168/200
Step 168: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [374.0, -0.0010235856, 0.0068325396] )
Saved Model
Epoch no.: 169/200
Step 169: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [373.0, -0.011316837, 0.0068663643] )
Saved Model
Epoch no.: 170/200
Step 170: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [374.0, -0.0010662241, 0.007030309] )
Saved Model
Epoch no.: 171/200
Step 171: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [374.0, -0.0010737772, 0.007056859] )
Saved Model
Epoch no.: 172/200
Step 172: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [374.0, -0.0010577745, 0.0069631077] )
Saved Model
Epoch no.: 173/200
Step 173: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( 

Epoch no.: 21/200
Step 21: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [250.0, -0.0006507085, 0.004429336] )
Saved Model
Epoch no.: 22/200
Step 22: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [250.0, -0.0005719005, 0.0039643757] )
Saved Model
Epoch no.: 23/200
Step 23: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [250.0, -0.0005015116, 0.0035410356] )
Saved Model
Epoch no.: 24/200
Step 24: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [250.0, -0.0004391898, 0.0031590308] )
Saved Model
Epoch no.: 25/200
Step 25: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [250.0, -0.0003844955, 0.0028168086] )
Saved Model
Epoch no.: 26/200
Step 26: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [250.0, -0.00033719733, 0.002515125] )
Saved Model
Epoch no.: 27/200
Step 27: Stats(['Perf/Recent Reward', 'Losses/Policy LL', 'Losses/Entropy']): ( [250.0, -

KeyboardInterrupt: 

In [None]:
# ### Post-test Agent# Test pnet!
print('Testing...', flush=True)
rwd_mg = []
for k in range(num_episodes):
    embrf.play(sess)
    rwd_mg.append(float(embrf.last_total_return)/embrf.max_episode_length)
    if k%int(num_episodes/5) == 0: 
        print("\rEpisode {}/{}".format(k, num_episodes),end="")
trained_perf_mg = np.mean(rwd_mg)
print("\nAgent wins an average of {} pct \ncompared to baseline of {} pct".format(
    100*trained_perf_mg, 100*base_perf_mg), flush=True )

In [None]:
sess.run(tf.trainable_variables()[0])

In [None]:
sess.run(tf.global_variables_initializer())

In [None]:
sess.run(tf.trainable_variables()[0])

In [None]:
embrf.states