In [1]:
import numpy as np
import glob
from scipy.interpolate import interp1d
from scipy.interpolate import spline
from tqdm import tqdm
from bokeh.plotting import figure, output_notebook, show
output_notebook()

In [2]:
from bokeh.models import Legend
from bokeh.models.formatters import BasicTickFormatter
from bokeh.models import HoverTool, TapTool
from bokeh.palettes import magma, inferno, Set1, Set3, Paired, Dark2

In [88]:
def plot_averages_b(names, t_maxs, legends, directories="Logs/", step=10, title="", inter_quartile=True, step_reward=False):
#     plt.figure(figsize=(10,10))
    p = figure(width=1200, height=800, y_range=(-1, 101), title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Agents Reaching the Goal"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)
#     p = figure(width=800, height=500)
    lines = []
    alpha_lines = []
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in zip(names, colors, t_maxs, legends, directories):
#         print(directory)
        DQN_Rewards_Path = directory + "*" + name + "*/logs/Episode_Rewards.txt"
        DQN_Lengths_Path = directory + "*" + name + "*/logs/Episode_Lengths.txt"
        DQN_Rewards = []
        DQN_Lengths = []
        DQN_Lengths_C = []
        for filename in glob.glob(DQN_Rewards_Path):
            run_logs = [0]
            with open(filename, "r") as f:
#                 print(f)
                for line in f:
                    try:
                        ep_r = float(line) * 100.0
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            DQN_Rewards.append(run_logs)
        for filename in glob.glob(DQN_Lengths_Path):
            run_logs = [0]
        #     print(filename)
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        continue
                    run_logs.append(ep_r)
            DQN_Lengths.append(run_logs)
            run_logs_c = np.cumsum(run_logs).astype(np.int32)
            DQN_Lengths_C.append(run_logs_c)
        DQN_Smoothed = []
        for times, rewards in zip(DQN_Lengths_C, DQN_Rewards):
#             linear = interp1d(times, rewards, kind="linear")
#             linear_rewards = linear([i for i in range(500000)])
#             DQN_Smoothed.append(linear_rewards)
            if not step_reward:
                spline_rewards = spline(times, rewards, [i for i in range(0, t_max, step)], order=1)
#                 spline_rewards = rewards
                DQN_Smoothed.append(spline_rewards)
            else:
                max_reward = []
                c_index = 0
                for i in range(0, t_max, step):
                    c_time = times[c_index]
                    c_reward = rewards[c_index]
                    max_reward.append(c_reward)
                    if c_time < i:
                        c_index += 1
                DQN_Smoothed.append(max_reward)

        DQN_Means = np.mean(DQN_Smoothed, axis=0)
#         DQN_Stds = np.std(DQN_Smoothed, axis=0)
#         DQN_Means = np.percentile(DQN_Smoothed, 50, axis=0)
        lq = np.percentile(DQN_Smoothed, 25, axis=0)
        uq = np.percentile(DQN_Smoothed, 75, axis=0)
        
        if inter_quartile:
            xs = [i for i in range(0, t_max, step)]
            xs = xs + list(reversed(xs))
    #         ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
            ys = np.concatenate([lq, np.flip(uq, axis=0)])
            lls = p.patch(xs, ys, color=color, alpha=0.1)
        
#         if min_max:
#     #         plt.fill_between([i for i in range(0, t_max, step)], DQN_Means - DQN_Stds, DQN_Means + DQN_Stds, color=color, alpha=0.2, edgecolor="white")
#             # Error bars
#             xs = [i for i in range(0, t_max, step)]
#             xs = xs + list(reversed(xs))
#     #         ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#             ys = np.concatenate([np.min(DQN_Smoothed, axis=0), np.flip(np.max(DQN_Smoothed, axis=0), axis=0)])
#             lls = p.patch(xs, ys, color=color, alpha=0.1)


    #         for x, y, yerr in zip(range(0, t_max, step), DQN_Means, DQN_Stds):
    #             lls = p.line([x, x], [y - yerr, y + yerr], color=color, alpha=0.1, hover_alpha=0.8)
    #             alpha_lines.append(lls)

#         print(DQN_Rewards)
    #         plt.plot([i for i in range(0, t_max, step)], DQN_Means, color=color)
        l = p.line([i for i in range(0, t_max, step)], DQN_Means, color=color, line_width=3, alpha=0.8)
        lines.append(l)
    #         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [4]:
def plot_states(names, t_maxs, legends, directories="Logs/", step=10, title=""):
#     plt.figure(figsize=(10,10))
    p = figure(width=1200, height=800, title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "States visited"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)
#     p = figure(width=800, height=500)
    lines = []
    alpha_lines = []
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in zip(names, colors, t_maxs, legends, directories):
#         print(directory)
        States_Path = directory + "*" + name + "*/logs/Player_Positions.txt"
        States = []
        for filename in glob.glob(States_Path):
            run_logs = []
            with open(filename, "r") as f:
#                 print(f)
                for line in f:
                    try:
                        ep_r = str(line)
#                         print(ep_r)
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            States.append(run_logs)
        States_Visited = []
        for run in States:
            Visited = []
            States_Sets = set()
#             print(len(run))
            for ii, s in enumerate(run):
                if ii >= t_max - 1:
                    break
#                 print(s)
                States_Sets.add(s)
                if ii % step == 0:
                    Visited.append(len(States_Sets))
#             print(len(States_Sets))
#             print(len(Visited))
            States_Visited.append(Visited)
#         print(np.array(States_Visited).shape)
        Means = np.mean(States_Visited, axis=0)
#         Stds = np.std(States_Visited, axis=0)
        Means = np.percentile(States_Visited, 50, axis=0)
        lq = np.percentile(States_Visited, 25, axis=0)
        uq = np.percentile(States_Visited, 75, axis=0)
#         print(Means.shape)
        
#         plt.fill_between([i for i in range(0, t_max, step)], DQN_Means - DQN_Stds, DQN_Means + DQN_Stds, color=color, alpha=0.2, edgecolor="white")
        # Error bars
        xs = [i for i in range(0, t_max, step)]
        xs = xs + list(reversed(xs))
#         ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])
        ys = np.concatenate([np.min(States_Visited, axis=0), np.flip(np.max(States_Visited, axis=0), axis=0)])
    
        ys = np.concatenate([lq, np.flip(uq, axis=0)])
        
        lls = p.patch(xs, ys, color=color, alpha=0.1)

        
#         for x, y, yerr in zip(range(0, t_max, step), DQN_Means, DQN_Stds):
#             lls = p.line([x, x], [y - yerr, y + yerr], color=color, alpha=0.1, hover_alpha=0.8)
#             alpha_lines.append(lls)
            

#         plt.plot([i for i in range(0, t_max, step)], DQN_Means, color=color)
        l = p.line([i for i in range(0, t_max, step)], Means, color=color, line_width=2)
        lines.append(l)
#         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [5]:
def plot_percentages(names, t_maxs, legends, directories="Logs/", step=10, title="", min_max=True, eps_average=3):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
    p = figure(width=1200, height=800, y_range=(-0.01, 1.01), title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Successful Agents"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):

        Eval_Rewards_Path = directory + "*" + name + "*/logs/Eval_Rewards*.txt"
        Eval_Rewards = []

        for filename in glob.glob(Eval_Rewards_Path):
            run_logs = [0]
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            Eval_Rewards.append(run_logs)
            
#         print(Eval_Rewards)
        
        Win_Percentages = []
        for i in range(100):
            rr = 0
            for reward in Eval_Rewards:
                rr += reward[i]
            Win_Percentages.append(rr / len(Eval_Rewards))

#         Win_Percentages = []
#         for times, rewards in zip(DQN_Lengths_C, DQN_Rewards):
#             # Linear interpolation
#             spline_rewards = spline(times, rewards, [i for i in range(0, t_max, step)], order=1)
#             DQN_Smoothed.append(spline_rewards)
            
            
#         DQN_Means = np.mean(DQN_Smoothed, axis=0)
        
#         Win_Percentages = []
# #         print(len(DQN_Smoothed), len(DQN_Smoothed[0]))
#         np_smoothed = np.array(DQN_Rewards)
#         print(np_smoothed)
# #         print(np_smoothed.shape)
#         for tt in range(len(DQN_Means)):
# #             print(DQN_Smoothed)
#             rewards = np_smoothed[:,tt]
#             percentage = len([1 for r in rewards if r > 0])/(len(rewards) * 1.0)
#             Win_Percentages.append(percentage)

#         print(len(DQN_Means), len(Win_Percentages))
#         DQN_Stds = np.std(DQN_Smoothed, axis=0)

#         xs = [i for i in range(0, t_max, step)]
#         xs = xs + list(reversed(xs))
#         # Show error bars between min and max runs
#         if min_max:
#             ys = np.concatenate([np.min(DQN_Smoothed, axis=0), np.flip(np.max(DQN_Smoothed, axis=0), axis=0)])
#         # +- 1 Std
#         else:
#             ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#         lls = p.patch(xs, ys, color=color, alpha=0.1)
        l = p.line([i + 1 for i in range(100)], Win_Percentages, color=color, line_width=2)
#         l = p.line([i for i in range(0, t_max, step)], DQN_Means, color=color, line_width=2)
        lines.append(l)
    #         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [35]:
# Maze 12 Uniform Baselines
names = []
legends = []
t_maxs = []
          
# Prioritized replay stuff
for xp in [50, 100, 200, 300, 400]:
    names += ["_XpSize_{}k_".format(xp)]
    legends += ["{}k XpSize".format(xp)]
    t_maxs += [1200001]
    
# direc = "/home/tabz/tmp/logfiles/MedMaze12_Uniform_400_500/"
direc = "/home/scratch/tabhid/Log_Files/MedMaze12_Uniform/"
env = "Med Maze 12 Uniform Baselines"
directories = direc
    
print(names)
print(t_maxs)
print(directories)
print("\n", "{} Lines".format(len(names)))

['_XpSize_50k_', '_XpSize_100k_', '_XpSize_200k_', '_XpSize_300k_', '_XpSize_400k_']
[1200001, 1200001, 1200001, 1200001, 1200001]
/home/scratch/tabhid/Log_Files/MedMaze12_Uniform/

 5 Lines


In [99]:
# Doom Stuff
names = []
legends = []
t_maxs = []
          
names += ["_XpSize_500k_*_CountEps_0.99_"]
legends += ["0.99 Decay"]
t_maxs += [2500001]

names += ["_XpSize_500k_*_CountEps_0.999_"]
legends += ["0.999 Decay"]
t_maxs += [2500001]
          
# names += ["_XpSize_500k_Prioritized"]
# legends += ["Prioritized"]
# t_maxs += [5000001]

# direc = "/home/tabz/tmp/logfiles/Doom_Uniform_1mil/"
direc = "/home/scratch/tabhid/Log_Files/Doom_Gamma_Test/"
env = "Doom"
directories = direc
    
print(names)
print(t_maxs)
print(directories)
print("\n", "{} Lines".format(len(names)))

['_XpSize_500k_*_CountEps_0.99_', '_XpSize_500k_*_CountEps_0.999_']
[2500001, 2500001]
/home/scratch/tabhid/Log_Files/Doom_Gamma_Test/

 2 Lines


In [100]:
plot_percentages(names, t_maxs, legends, directories, step=1000, title=env)

2it [00:00, 135.66it/s]


In [101]:
plot_averages_b(names, t_maxs, legends, directories=directories, step=10000, title=env, inter_quartile=False)

In [102]:
plot_states(names, t_maxs, legends, directories=directories, step=5000, title=env)

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


IndexError: cannot do a non-empty take from an empty axes.