## k-vs-(N-k) Flu ABM Env
- k-vs-(N-k) experiment
- Kicking tires on multiplayer instance of Flu ABM with RL learners 
- Basic indepRL

In [None]:
import itertools, importlib, sys, warnings, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# ML libs
import tensorflow as tf
print("Tensorflow version:", tf.__version__)
# warnings.filterwarnings("ignore")

log_path = './log/flu'
#tensorboard --logdir=flugame_worker_1:'./log/train_rf_flugame_worker'

## suppress annoy verbose tf msgs
warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # '3' to block all including error msgs
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [None]:
sys.path.append('./embodied_arch')

import embodied_indep as emi 
import flumodel_python.flu_env as Fenv
from embodied_misc import ActionPolicyNetwork, SensoriumNetworkTemplate, ValueNetwork

## Env Setup

In [None]:
# exos = [1,2,3,10]  # (np.random.sample(9223) < 0.3)
exos = (np.random.sample(9223) < 0.004)
exos = [j for j in range(len(exos)) if exos[j]==True]
print(len(exos))

importlib.reload(Fenv);
importlib.reload(emi);

In [None]:
tf.reset_default_graph()
flu_menv = Fenv.Flu_env(
    exo_idx=exos,
    model_path="./flumodel_python/"
)

In [None]:
print(flu_menv.actor_count)
print(flu_menv.state_space_size, flu_menv.action_space_size)

### MARL Setup Demo

In [None]:
actor = lambda s: ActionPolicyNetwork(s, hSeq=(8,), gamma_reg=1e-1)
value = lambda s: ValueNetwork(s, hSeq=(8,), gamma_reg=1.)
sensor = lambda st, out_dim: SensoriumNetworkTemplate(st, hSeq=(16,8,8), out_dim=out_dim, gamma_reg=5.)

# num_episodes, n_epochs, max_len = (100, 1501, 25)
# num_episodes, max_len, n_epochs, evry = (100, 35, 1501, 300)
# num_episodes, max_len, n_epochs, evry = (10, 15, 400, 100)
num_episodes, max_len, n_epochs, evry = (100, 35, 501, 100)

In [None]:
flumrf = emi.EmbodiedAgent_IRFB(
    name="flu_mRFB", env_=flu_menv,
    alpha_p=150, alpha_v=50., latentDim=4,
    max_episode_length=max_len, _every_=evry, 
    actorNN=actor, valueNN=value, sensorium=sensor
)
(flumrf.a_size, flumrf.env.action_space_size)

In [None]:
sess = tf.InteractiveSession()
flumrf.init_graph(sess) # note tboard log dir
saver = tf.train.Saver(max_to_keep=1) 

## Baseline

In [None]:
print('Baselining untrained pnet...')
rwds0 = []
acts_cov = np.zeros([flumrf.actor_count,flumrf.actor_count])
for k in range(num_episodes):
    flumrf.play(sess, terminal_reward=0.);
    rwds0.append(flumrf.last_total_returns)
    actions = np.array(flumrf.episode_buffer['actions']).T
    acts_cov = acts_cov + (np.cov(actions)/num_episodes)
    print("\rEpisode {}/{}".format(k, num_episodes),end="")

# Compute average rewards
base_perf = 100.*np.mean(np.array(rwds0)/float(flumrf.max_episode_length))
base_per_agent = 100.*np.mean(np.array(rwds0)/float(flumrf.max_episode_length), axis=0)

print("\nAgent is flu-free for an average of {}pct of seasons".format(
    1.*base_perf))

In [None]:
acts_corr = acts_cov.copy()
jm, km = acts_corr.shape
for j in range(jm):
    for k in range(km):
        denom = np.sqrt((acts_corr[j,j])*(acts_corr[k,k]))
        acts_corr[j,k] = acts_corr[j,k]/denom

print("Agent Action Correlations:")
sns.heatmap(acts_corr, center=0)

## Train Agent Population

In [None]:
# ### Train Agents
print('Training...')
hist = flumrf.work(sess, num_epochs=n_epochs, saver=saver)
hist

## Test

In [None]:
# Test pnet!
print('Testing...')
rwds = []
acts_cov_trained = np.zeros([flumrf.actor_count,flumrf.actor_count])
for k in range(num_episodes):
    flumrf.play(sess)
    rwds.append(flumrf.last_total_returns)
    actions = np.array(flumrf.episode_buffer['actions']).T
    acts_cov_trained = acts_cov_trained + (np.cov(actions)/num_episodes)
    print("\rEpisode {}/{}".format(k, num_episodes),end="")

trained_perf = 100.*np.mean(np.array(rwds)/float(flumrf.max_episode_length))
trained_per_agent = 100.*np.mean(np.array(rwds)/float(flumrf.max_episode_length), axis=0)

print("\nAgent is flu-free for an average of {} pct compared to baseline of {} pct".format(
    1.*trained_perf, 1.*base_perf) )

In [None]:
acts_corr_trained = acts_cov_trained.copy()
jm, km = acts_corr_trained.shape
for j in range(jm):
    for k in range(km):
        denom = np.sqrt((acts_cov_trained[j,j])*(acts_cov_trained[k,k]))
        acts_corr_trained[j,k] = acts_corr_trained[j,k]/denom

mask = np.zeros_like(acts_corr_trained)
mask[np.triu_indices_from(mask,k=0)] = True
with sns.axes_style("darkgrid"):
    plt.rcParams['figure.figsize'] = (15, 12)
    ax = sns.heatmap(acts_corr_trained, 
                     mask=mask, vmax=0.125, center=0)
    ax.set_ylabel("Agent Index")
    ax.set_xlabel("Agent Index")
    ax.set_title("Action Correlations")

## Evaluate

In [None]:
rwds0_df = pd.DataFrame(100.*(np.array(rwds0)/float(flumrf.max_episode_length)))
rwds_df = pd.DataFrame(100.*(np.array(rwds)/float(flumrf.max_episode_length)))

rwds0_df['Wave'] = "Baseline"
rwds_df['Wave'] = "Trained"

resDF = pd.concat([rwds0_df, rwds_df])
resDF.columns = ["Agent"+str(tc) if tc is not "Wave" else tc for tc in resDF.columns]
# resDF['id'] = resDF.index
print(resDF.shape)
# resDF.head()


In [None]:
resDF = resDF.melt(
    id_vars=['Wave'], #['id', 'Wave'],
    value_vars=[tc for tc in resDF.columns if "Agent" in tc]
)
resDF = resDF.rename(columns={"variable": "Agent", "value": "Immune_pct"})
print(resDF.shape)

res_tabs = resDF.groupby(['Agent','Wave']).aggregate(['mean','std']) # res_tabs

# resDF.head()

In [None]:
plt.rcParams['figure.figsize'] = (9, 35)
sns.set(font_scale=1.25)

fig = sns.violinplot(data=resDF, inner="box", cut=0,
                     x="Immune_pct", y="Agent", hue="Wave",
                     split=True);
fig.set_title(
    'Average Episode Rewards: Baseline vs Trained Agents.');
fig.legend(loc='upper left');

In [None]:
base_meanDF = resDF[resDF.Wave=="Baseline"].groupby(['Agent']).aggregate(['mean'])
base_meanDF.sort_index(inplace=True)

trained_meanDF = resDF[resDF.Wave=="Trained"].groupby(['Agent']).aggregate(['mean'])
trained_meanDF.sort_index(inplace=True)

mean_diffDF = (trained_meanDF - base_meanDF)
mean_diffDF.columns = ['Mean_Immune_Pct_Change']
# mean_diffDF.head()

In [None]:
plt.rcParams['figure.figsize'] = (9, 19)
sns.set_color_codes("dark")
fig, axs = plt.subplots(2,1, sharex=True, gridspec_kw={'height_ratios': [1,4]})
cmp = sns.violinplot(x='Mean_Immune_Pct_Change', cut=0, inner='quartile',
                     data=mean_diffDF, ax=axs[0])
axs[0].set_ylabel('Agent Aggregate');
axs[0].set_title(
    'Distribution of Changes in Flu Immunity Rates:\nIn Aggregate & Per-Agent.'
);

sns.barplot(y=mean_diffDF.index, x="Mean_Immune_Pct_Change", 
            data=mean_diffDF, color="r",
            label="Success Rate", ax=axs[1]);
plt.subplots_adjust(wspace=0, hspace=0)
axs[1].set_xlabel('Avg. Change in Immunity Rates');