### Rewards

### Testing model trained using DDPG + HER with Parameter Tuning

In [1]:
%matplotlib inline
from tensorboard.backend.event_processing import event_accumulator
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import warnings
warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# function to open log and get reward
def getReward(log_dir):
    ea = event_accumulator.EventAccumulator(log_dir)
    ea.Reload()
    series = ea.scalars.Items('Reward')
    rewards = [s.value for s in series]
    df = pd.DataFrame(rewards, columns=['Reward'])
    df['MeanReward'] = df['Reward'].rolling(window=100, min_periods=2).mean()
    df['SD'] = df['Reward'].rolling(window=200, min_periods=2).std()
    return df

  from ._conv import register_converters as _register_converters


In [None]:
# load
log_dir = './FetchReach-v1_DDPG_HER_PER'
train_paths = [p for p in os.listdir(log_dir) if p.startswith('train_')]
train_paths.sort()
test_paths = [p for p in os.listdir(log_dir) if p.startswith('test_')]
test_paths.sort()

# look at first + second training sessions, first test session
df_train1 = getReward(os.path.join(log_dir, train_paths[0]))
df_train2 = getReward(os.path.join(log_dir, train_paths[1]))
df_train = pd.concat((df_train1, df_train2), ignore_index=True)
df_test = getReward(os.path.join(log_dir, test_paths[0]))

# recompute training reward after concatenation
df_train['MeanReward'] = df_train['Reward'].rolling(window=100, min_periods=2).mean()
df_train['SD'] = df_train['Reward'].rolling(window=200, min_periods=2).std()

# plot reward
sns.set_style('darkgrid')
sns.set_context("talk")
f, axs = plt.subplots(1, 2, figsize=(15, 5))
sns.tsplot(df_train['MeanReward'], ax=axs[0])
sns.tsplot(df_test['MeanReward'], ax=axs[1])
axs[0].set_title('Mean Rewards -- Training')
axs[1].set_title('Mean Rewards -- Testing')

plt.ylim(-50, 0)
plt.show()

In [None]:
l_ = []
for i in [128, 256, 512]:
    for j in [64, 256]:
        try:
            log_dir = './FetchReach-v1_DDPG_HER_batch_' + str(j) + '_hidden_' + str(i) + '/tensorboard_train'
            df = getReward(log_dir)
            df['HiddenSize'] = i
            df['BatchSize'] = j
            l_.append(df)
        except:
            continue
df = pd.concat(l_, ignore_index=True)
    
# plot
sns.set(rc={'figure.figsize':(12, 12)})
g = sns.FacetGrid(df, col="HiddenSize", row="BatchSize", size=5)
g = g.map(sns.tsplot, "Mean")
plt.show()

# plot
sns.set(rc={'figure.figsize':(12, 12)})
g = sns.FacetGrid(df, col="HiddenSize", row="BatchSize", size=5)
g = g.map(sns.tsplot, "SD")
plt.show()