In [1]:
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
from bokeh.plotting import figure, output_notebook, show
output_notebook()

In [3]:
import glob

In [4]:
import numpy as np

In [5]:
from scipy.interpolate import interp1d
from scipy.interpolate import spline

In [6]:
from tqdm import tqdm

In [7]:
DQN_Rewards_Path = "/home/tabz/Dropbox/RL/Log_Files/March 2017/Maze_5_Batch_Sizes_Logs/*DQN*/logs/Episode_Rewards.txt"
DQN_Lengths_Path = "/home/tabz/Dropbox/RL/Log_Files/March 2017/Maze_5_Batch_Sizes_Logs/*DQN*/logs/Episode_Lengths.txt"
DQN_Rewards = []
DQN_Lengths = []
DQN_Lengths_C = []

In [8]:
for filename in glob.glob(DQN_Rewards_Path):
    run_logs = [0]
#     print(filename)
    with open(filename, "r") as f:
        for line in f:
            try:
                ep_r = float(line)
            except:
                continue
            run_logs.append(ep_r)
    DQN_Rewards.append(run_logs)

In [9]:
for filename in glob.glob(DQN_Lengths_Path):
    run_logs = [0]
#     print(filename)
    with open(filename, "r") as f:
        for line in f:
            try:
                ep_r = float(line)
            except:
                continue
            run_logs.append(ep_r)
    DQN_Lengths.append(run_logs)
    run_logs_c = np.cumsum(run_logs).astype(np.int32)
    DQN_Lengths_C.append(run_logs_c)

for lens, runs in zip(DQN_Lengths_C, DQN_Rewards):
    plt.plot(lens, runs)
plt.show()

p = figure(width=500, height=500, y_range=(-1,3))
for lens, runs in zip(DQN_Lengths_C, DQN_Rewards):
    p.line(lens, runs)
show(p)

def plot_averages(names, t_maxs, colors, legends=None, directory="Logs/", step=10):
    plt.figure(figsize=(10,10))
    plt.ylim(-1, 3)
    for name, color, t_max in tqdm(zip(names, colors, t_maxs)):
        # TODO: Remove DQN from names
        DQN_Rewards_Path = directory + "*" + name + "*/logs/Episode_Rewards.txt"
        DQN_Lengths_Path = directory + "*" + name + "*/logs/Episode_Lengths.txt"
        DQN_Rewards = []
        DQN_Lengths = []
        DQN_Lengths_C = []
        for filename in glob.glob(DQN_Rewards_Path):
            run_logs = [0]
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        continue
                    run_logs.append(ep_r)
            DQN_Rewards.append(run_logs)
        for filename in glob.glob(DQN_Lengths_Path):
            run_logs = [0]
        #     print(filename)
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        continue
                    run_logs.append(ep_r)
            DQN_Lengths.append(run_logs)
            run_logs_c = np.cumsum(run_logs).astype(np.int32)
            DQN_Lengths_C.append(run_logs_c)
        DQN_Smoothed = []
        for times, rewards in zip(DQN_Lengths_C, DQN_Rewards):
#             linear = interp1d(times, rewards, kind="linear")
#             linear_rewards = linear([i for i in range(500000)])
#             DQN_Smoothed.append(linear_rewards)
            spline_rewards = spline(times, rewards, [i for i in range(0, t_max, step)])
            DQN_Smoothed.append(spline_rewards)
        DQN_Means = np.mean(DQN_Smoothed, axis=0)
        DQN_Stds = np.std(DQN_Smoothed, axis=0)
        plt.fill_between([i for i in range(0, t_max, step)], DQN_Means - DQN_Stds, DQN_Means + DQN_Stds, color=color, alpha=0.2, edgecolor="white")

        plt.plot([i for i in range(0, t_max, step)], DQN_Means, color=color)

    plt.title("Episode_Reward")
    plt.xlabel("T")
    plt.ylabel("Reward")
    if legends is not None:
        plt.legend(legends, bbox_to_anchor=(1.05, 1), loc=2)
    plt.show()

In [10]:
from bokeh.models import Legend
from bokeh.models.formatters import BasicTickFormatter
from bokeh.models import HoverTool, TapTool

In [32]:
def plot_averages_b(names, t_maxs, colors, legends, directories="Logs/", step=10, title=""):
#     plt.figure(figsize=(10,10))
    p = figure(width=1200, height=800, y_range=(-1.2, 3.2), title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "Episode Reward"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)
#     p = figure(width=800, height=500)
    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in zip(names, colors, t_maxs, legends, directories):
#         print(directory)
        DQN_Rewards_Path = directory + "*" + name + "*/logs/Episode_Rewards.txt"
        DQN_Lengths_Path = directory + "*" + name + "*/logs/Episode_Lengths.txt"
        DQN_Rewards = []
        DQN_Lengths = []
        DQN_Lengths_C = []
        for filename in glob.glob(DQN_Rewards_Path):
            run_logs = [0]
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        continue
                    run_logs.append(ep_r)
            DQN_Rewards.append(run_logs)
        for filename in glob.glob(DQN_Lengths_Path):
            run_logs = [0]
        #     print(filename)
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        continue
                    run_logs.append(ep_r)
            DQN_Lengths.append(run_logs)
            run_logs_c = np.cumsum(run_logs).astype(np.int32)
            DQN_Lengths_C.append(run_logs_c)
        DQN_Smoothed = []
        for times, rewards in zip(DQN_Lengths_C, DQN_Rewards):
#             linear = interp1d(times, rewards, kind="linear")
#             linear_rewards = linear([i for i in range(500000)])
#             DQN_Smoothed.append(linear_rewards)
            spline_rewards = spline(times, rewards, [i for i in range(0, t_max, step)], order=1)
            DQN_Smoothed.append(spline_rewards)
        DQN_Means = np.mean(DQN_Smoothed, axis=0)
        DQN_Stds = np.std(DQN_Smoothed, axis=0)
        
#         plt.fill_between([i for i in range(0, t_max, step)], DQN_Means - DQN_Stds, DQN_Means + DQN_Stds, color=color, alpha=0.2, edgecolor="white")
        # Error bars
        xs = [i for i in range(0, t_max, step)]
        xs = xs + list(reversed(xs))
        ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
        lls = p.patch(xs, ys, color=color, alpha=0.1)

        
#         for x, y, yerr in zip(range(0, t_max, step), DQN_Means, DQN_Stds):
#             lls = p.line([x, x], [y - yerr, y + yerr], color=color, alpha=0.1, hover_alpha=0.8)
#             alpha_lines.append(lls)
            

#         plt.plot([i for i in range(0, t_max, step)], DQN_Means, color=color)
        l = p.line([i for i in range(0, t_max, step)], DQN_Means, color=color, line_width=2)
        lines.append(l)
#         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [33]:
names = []
legends = []
t_maxs = []

# DQN
names += ["DQN"]
legends += ["DQN"]
t_maxs += [1000000]

for conv in [True, False]:
#     break
    names.append("Count*_Conv_{}*_Eps_1".format(conv))
    legends.append("Count_Conv_{}".format(conv))
    t_maxs.append(1000000)
#     break
    names.append("Count*_Conv_{}*_Eps_0.1".format(conv))
    legends.append("Count_Conv_{}_Epsilon".format(conv))
    t_maxs.append(1000000)
    
# Batch size names
# names = names + ["*Batch_{}_{}".format(b, name) for b in [64, 128, 256] for name in names]
# legends = legends + ["{}_Batch_{}".format(leg, b) for b in [64, 128, 256] for leg in legends]
# t_maxs = t_maxs + [t_max for b in [64, 128, 256] for t_max in t_maxs]
    
dropbox_direc = "/home/tabz/Dropbox/RL/Log_Files/March 2017"
# directories =  ["{}/{}".format(dropbox_direc, "Maze_5_Logs/")] + 3 * ["{}/{}".format(dropbox_direc, "Maze_5_Batch_Sizes_Logs/")] 
directories = dropbox_direc + "/Maze_5_Logs/"
    
print(names)
print(t_maxs)
# colours = ["red", "#3366ff", "#66ff33", "yellow"]
# colours = rainbow(np.linspace(0, 1, len(names)))

['DQN', 'Count*_Conv_True*_Eps_1', 'Count*_Conv_True*_Eps_0.1', 'Count*_Conv_False*_Eps_1', 'Count*_Conv_False*_Eps_0.1']
[1000000, 1000000, 1000000, 1000000, 1000000]


In [34]:
from bokeh.palettes import Set1_9 as rainbow_colours

In [35]:
from bokeh.palettes import magma, inferno, Purples, Oranges, Greens, Reds, Set1

In [36]:
magma_colours = inferno(len(names))

In [37]:
c_nums = 1
triple_colours = Purples[9][:c_nums] + Oranges[9][:c_nums] + Greens[9][:c_nums] + Reds[9][:c_nums]

In [38]:
triple_colours

['#3f007d', '#7f2704', '#00441b', '#67000d']

In [41]:
plot_averages_b(names, t_maxs, Set1[5], legends, directories, step=10000, title="Maze 5")

In [None]:
DQN_Smoothed = []
for times, rewards in zip(DQN_Lengths_C, DQN_Rewards):
    linear = interp1d(times, rewards, kind="linear")
    linear_rewards = linear([i for i in range(500000)])
    DQN_Smoothed.append(linear_rewards)

In [None]:
DQN_Means = np.mean(DQN_Smoothed, axis=0)

In [None]:
DQN_Stds = np.std(DQN_Smoothed, axis=0)

In [None]:
plt.fill_between([i for i in range(500000)], DQN_Means - DQN_Stds, DQN_Means + DQN_Stds, color="red", alpha=0.3, edgecolor="white")

plt.plot([i for i in range(500000)], DQN_Means, color="red")

In [None]:
len(DQN_Repeat)

In [None]:
plt.plot(DQN)

In [None]:
directories = ["Logs/"]

In [None]:
for i in range(0, 10, 2):
    print(i)