### CartPole Environment

In [None]:
from dqn_ner_cart import ner_cart
from dqn_cer_cart import cer_cart
from dqn_her_cart import her_cart
from dqn_per_cart import per_cart
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import sem

#### Hyper-parameters

In [None]:
runs = 10
num_episodes = 3000
# Hyperparameters of the Experience Replay
batch_size = 32
buf_size = 50_000

In [None]:
def mean_and_error(data):
    return np.mean(data, axis=0), sem(data, axis=0) * 1.96

In [None]:
def plot_and_save(means, errors, er_method, y_axis_title, save_name, y_lim = (0,1)):
    '''
        parameters:
            means - mean array
            errors - errors array for the confidence interval
            er_method - ER method to add to the title
            y_axis_title - metric type to use for plot as label name
            save_name - name of the figure to save
            y_lim - limits of the y (used because we have succes rate and average score)
        return:
            None    
    '''
    
    episodes = range(0, len(means)*100, 100)
    plt.errorbar(episodes, means, yerr=errors, fmt='o')
    plt.plot(episodes, means)
    plt.ylim(y_lim)
    plt.title(er_method + ' with 95% confidence interval')
    plt.xlabel('Episodes')
    plt.ylabel(y_axis_title)
    plt.savefig(save_name)
    plt.show()

#### Normal Experince Replay

In [None]:
# NER
scores_ner = []
for run in range(runs):
    _, score_rate_ner = (
        ner_cart(
            n_episodes_=num_episodes
            ,batch_size_=batch_size
            ,buf_size_=buf_size
            )
        )
    scores_ner.append(score_rate_ner)
ner_means, ner_errors = mean_and_error(scores_ner)

#### Combined Experince Replay

In [None]:
# CER
scores_cer = []
for run in range(runs):
    _, score_rate_cer = (
        cer_cart(
            n_episodes_=num_episodes
            ,batch_size_=batch_size
            ,buf_size_=buf_size
            )
        )
    scores_cer.append(score_rate_cer)
cer_means, cer_errors = mean_and_error(scores_cer)

#### Hindsight Experince Replay

In [None]:
# HER
scores_her = []
for run in range(runs):
    _, score_rate_her = (
        her_cart(
            n_episodes_=num_episodes
            ,batch_size_=batch_size
            ,buf_size_=buf_size
            )
        )
    scores_her.append(score_rate_her)
her_means, her_errors = mean_and_error(scores_her)

#### Plots
# Consider changing the 'save_name'  when running different hyper parameters

In [None]:
plot_and_save(ner_means, ner_errors, 'NER', 'Total reward', 'ner_cart_buf1_batch_1.png')

In [None]:
plot_and_save(cer_means, cer_errors, 'CER', 'Total reward', 'cer_cart_buf1_batch_1.png')

In [None]:
plot_and_save(her_means, her_errors, 'HER', 'Total reward', 'her_cart_buf1_batch_1.png')