In [1]:
import numpy as np
import glob
import pandas as pd
from scipy.interpolate import interp1d
from scipy.interpolate import spline
from tqdm import tqdm
from bokeh.plotting import figure, output_notebook, show
output_notebook()

In [136]:
from bokeh.models import Legend
from bokeh.models.formatters import BasicTickFormatter
from bokeh.models import HoverTool, TapTool
from bokeh.palettes import magma, inferno, Set1, Set3, Paired, Dark2, Category20
from bokeh.palettes import Reds, Greens, Blues

In [169]:
def plot_averages_b(names, t_maxs, legends, directories="Logs/", step=10, title="", stds=False, spans=1000, start_offset=10):
#     plt.figure(figsize=(10,10))
    p = figure(width=1200, height=800, x_range=(0, t_maxs[0]), title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "Episode Training Reward"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)
               
    # Styling
    p.xaxis.axis_label_text_font_style = "normal"
    p.yaxis.axis_label_text_font_style = "normal"
               
    p.xaxis.axis_label_text_font_size = "14pt"
    p.yaxis.axis_label_text_font_size = "14pt"
               
#     p = figure(width=800, height=500)
    lines = []
    alpha_lines = []
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Category20[len(names)]
    else:
        colors = Set1[len(names)]
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in zip(names, colors, t_maxs, legends, directories):
#         print(directory)
        DQN_Rewards_Path = directory + "*" + name + "*/logs/Episode_Rewards.txt"
        DQN_Lengths_Path = directory + "*" + name + "*/logs/Episode_Lengths.txt"
        DQN_Rewards = []
        DQN_Lengths = []
        DQN_Lengths_C = []
        for filename in glob.glob(DQN_Rewards_Path):
            run_logs = [0]
            with open(filename, "r") as f:
#                 print(f)
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            DQN_Rewards.append(run_logs)
        for filename in glob.glob(DQN_Lengths_Path):
            run_logs = [0]
        #     print(filename)
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        continue
                    run_logs.append(ep_r)
            DQN_Lengths.append(run_logs)
            run_logs_c = np.cumsum(run_logs).astype(np.int32)
            DQN_Lengths_C.append(run_logs_c)
        DQN_Smoothed = []
        for times, rewards in zip(DQN_Lengths_C, DQN_Rewards):
#             linear = interp1d(times, rewards, kind="linear")
#             linear_rewards = linear([i for i in range(500000)])
#             DQN_Smoothed.append(linear_rewards)


            spline_rewards = spline(times, rewards, [i for i in range(step*start_offset, t_max, step)], order=1)
            DQN_Smoothed.append(spline_rewards)


        
        DQN_Means = np.mean(DQN_Smoothed, axis=0)
        DQN_Means = pd.Series(DQN_Means).ewm(span=spans).mean()
#         print(DQN_Means)
#         DQN_Means = ewm_smoothed.mean()
        DQN_Stds = np.std(DQN_Smoothed, axis=0)
        DQN_Stds = pd.Series(DQN_Stds).ewm(span=spans).mean()
#         DQN_Means = np.percentile(DQN_Smoothed, 50, axis=0)
#         lq = np.percentile(DQN_Smoothed, 25, axis=0)
#         uq = np.percentile(DQN_Smoothed, 75, axis=0)
        
#         if inter_quartile:
#             xs = [i for i in range(0, t_max, step)]
#             xs = xs + list(reversed(xs))
#     #         ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#             ys = np.concatenate([lq, np.flip(uq, axis=0)])
#             lls = p.patch(xs, ys, color=color, alpha=0.1)
        
        if stds:
    #         plt.fill_between([i for i in range(0, t_max, step)], DQN_Means - DQN_Stds, DQN_Means + DQN_Stds, color=color, alpha=0.2, edgecolor="white")
            # Error bars
            xs = [i for i in range(step*start_offset, t_max, step)]
            xs = xs + list(reversed(xs))
            ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#             ys = np.concatenate([np.min(DQN_Smoothed, axis=0), np.flip(np.max(DQN_Smoothed, axis=0), axis=0)])
            lls = p.patch(xs, ys, color=color, alpha=0.1)

#         if stds:
#             for x, y, yerr in zip(range(0, t_max, step), DQN_Means, DQN_Stds):
#                 lls = p.line([x, x], [y - yerr, y + yerr], color=color, alpha=0.1, hover_alpha=0.8)
#                 alpha_lines.append(lls)

#         print(DQN_Rewards)
    #         plt.plot([i for i in range(0, t_max, step)], DQN_Means, color=color)
        l = p.line([i for i in range(step*start_offset, t_max, step)], DQN_Means, color=color, line_width=3, alpha=0.8)
        lines.append(l)
    #         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [156]:
def plot_states(names, t_maxs, legends, directories="Logs/", step=10, title="", stds=True, color_offset=0):
#     plt.figure(figsize=(10,10))
    p = figure(width=1200, height=800, title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "States visited"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)
#     p = figure(width=800, height=500)
    lines = []
    alpha_lines = []
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Category20[len(names)]
    else:
        colors = Set1[len(names) + color_offset][color_offset:]
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):
#         print(directory)
        States_Path = directory + "*" + name + "*/logs/Player_Positions.txt"
        States = []
        for filename in glob.glob(States_Path):
            run_logs = []
            with open(filename, "r") as f:
#                 print(f)
                for line in f:
                    try:
                        ep_r_str = str(line)
                        ss = ep_r_str.split()
                        x = int(ss[0])
                        y = int(ss[1])
                        ep_r = (x,y)
#                         print(ep_r)
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            States.append(run_logs)
        States_Visited = []
        xs = []
        for run in States:
            Visited = []
            States_Sets = set()
            xs = []
#             print(len(run))
            for ii, s in enumerate(run):
                if ii > t_max:
                    break
#                 print(s)
                States_Sets.add(s)
                if ii % step == 0 or ii == len(run) - 1:
                    Visited.append(len(States_Sets))
                    xs.append(ii)
#             print(len(States_Sets))
#             print(len(Visited))
            States_Visited.append(Visited)
#         print(np.array(States_Visited).shape)
        Means = np.mean(States_Visited, axis=0)
        Stds = np.std(States_Visited, axis=0)
#         Means = np.percentile(States_Visited, 50, axis=0)
#         lq = np.percentile(States_Visited, 25, axis=0)
#         uq = np.percentile(States_Visited, 75, axis=0)
#         print(Means.shape)
        
#         plt.fill_between([i for i in range(0, t_max, step)], DQN_Means - DQN_Stds, DQN_Means + DQN_Stds, color=color, alpha=0.2, edgecolor="white")
        # Error bars
#         xs = [i for i in range(0, t_max, step)]
        xs = xs + list(reversed(xs))
#         ys = np.concatenate([np.min(States_Visited, axis=0), np.flip(np.max(States_Visited, axis=0), axis=0)])
    
        if stds:
            ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])
#         else:
#             ys = np.concatenate([lq, np.flip(uq, axis=0)])
#         print(len(xs), len(ys))
        lls = p.patch(xs, ys, color=color, alpha=0.1)

        
#         for x, y, yerr in zip(range(0, t_max, step), DQN_Means, DQN_Stds):
#             lls = p.line([x, x], [y - yerr, y + yerr], color=color, alpha=0.1, hover_alpha=0.8)
#             alpha_lines.append(lls)
            

#         plt.plot([i for i in range(0, t_max, step)], DQN_Means, color=color)
        l = p.line([i * step for i in range(len(Means))], Means, color=color, line_width=2)
        lines.append(l)
#         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [176]:
def plot_percentages(names, t_maxs, legends, directories="Logs/", step=10, title="", min_max=True, eps_average=3):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Category20[len(names)]
    else:
        colors = Set1[len(names)]
    p = figure(width=1200, height=800, title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "Episode Eval Reward"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):

        Eval_Rewards_Path = directory + "*" + name + "*/logs/Eval_Rewards*.txt"
        Eval_Rewards = []

        for filename in glob.glob(Eval_Rewards_Path):
            run_logs = []
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            Eval_Rewards.append(run_logs)
            
#         print(Eval_Rewards)
        
        Win_Percentages = []
        for i in range(100):
            rr = 0
            for reward in Eval_Rewards:
                rr += reward[i]
            
            Win_Percentages.append(rr / len(Eval_Rewards))

#         print(Win_Percentages)
#         Win_Percentages = []
#         for times, rewards in zip(DQN_Lengths_C, DQN_Rewards):
#             # Linear interpolation
#             spline_rewards = spline(times, rewards, [i for i in range(0, t_max, step)], order=1)
#             DQN_Smoothed.append(spline_rewards)
            
            
#         DQN_Means = np.mean(DQN_Smoothed, axis=0)
        
#         Win_Percentages = []
# #         print(len(DQN_Smoothed), len(DQN_Smoothed[0]))
#         np_smoothed = np.array(DQN_Rewards)
#         print(np_smoothed)
# #         print(np_smoothed.shape)
#         for tt in range(len(DQN_Means)):
# #             print(DQN_Smoothed)
#             rewards = np_smoothed[:,tt]
#             percentage = len([1 for r in rewards if r > 0])/(len(rewards) * 1.0)
#             Win_Percentages.append(percentage)

#         print(len(DQN_Means), len(Win_Percentages))
#         DQN_Stds = np.std(DQN_Smoothed, axis=0)

#         xs = [i for i in range(0, t_max, step)]
#         xs = xs + list(reversed(xs))
#         # Show error bars between min and max runs
#         if min_max:
#             ys = np.concatenate([np.min(DQN_Smoothed, axis=0), np.flip(np.max(DQN_Smoothed, axis=0), axis=0)])
#         # +- 1 Std
#         else:
#             ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#         lls = p.patch(xs, ys, color=color, alpha=0.1)
        l = p.line([i + 1 for i in range(100)], Win_Percentages, color=color, line_width=2)
#         l = p.line([i for i in range(0, t_max, step)], DQN_Means, color=color, line_width=2)
        lines.append(l)
    #         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [140]:
loaded_frontier_actions = {}

In [7]:
def load_frontier_stuff(names, directories):
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, directory in tqdm(zip(names, directories)):

        Action_Counts_Path = directory + "*" + name + "*/logs/Action_Counts.txt"
        
        Current_State_Counts = []
        Available_Action_Counts = []
        Chosen_Action_Counts = []

        for filename in tqdm(glob.glob(Action_Counts_Path)):
        
            currents = []
            availables = []
            chosen = []
        
            with open(filename, "r") as f:
                for line in f:
                    if line != "":
                        nums = line.split()
                        if nums == []:
                            continue
                        nums = [int(v) for v in nums]
                        currents.append(nums[0])
                        availables.append(nums[1:-1])
                        chosen.append(nums[-1])
            Current_State_Counts.append(currents)
            Available_Action_Counts.append(availables)
            Chosen_Action_Counts.append(chosen)
        print(directory)
        loaded_frontier_actions["{}_{}_{}".format(directory, name, "Current")] = Current_State_Counts
        loaded_frontier_actions["{}_{}_{}".format(directory, name, "Available")] = Available_Action_Counts
        loaded_frontier_actions["{}_{}_{}".format(directory, name, "Chosen")] = Chosen_Action_Counts

In [8]:
def action_selections(names, t_maxs, legends, directories="Logs/", step=10, title="", Frontier=10, Unknown=2, stds=True, known_states=False, unknown_available=False, start=1000):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
#     colors = [i for i in range(len(names))]
#     colors_n = len(colors)
#     colors_to_use = [Reds[9][:colors_n], Blues[9][:colors_n], Greens[9][:colors_n]]
    title += " Frontier_{} Unknown_{}".format(Frontier, Unknown)
    p = figure(width=1200, height=800, title=title, y_range=(0,1))
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Frontier Actions"
    if unknown_available:
        p.yaxis.axis_label = "% Frontier Actions if Unknown state available"
    if known_states:
        unknown_available = True
    if known_states:
        p.yaxis.axis_label = "% Actions if Unknown state available"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):

        Action_Counts_Path = directory + "*" + name + "*/logs/Action_Counts.txt"
        
        Current_State_Counts = []
        Available_Action_Counts = []
        Chosen_Action_Counts = []
        
        Current_State_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Current")]
        Available_Action_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Available")]
        Chosen_Action_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Chosen")]

#         for filename in tqdm(glob.glob(Action_Counts_Path)):
        
#             currents = []
#             availables = []
#             chosen = []
        
#             with open(filename, "r") as f:
#                 for line in f:
#                     if line != "":
#                         nums = line.split()
#                         if nums == []:
#                             continue
#                         nums = [int(v) for v in nums]
#                         currents.append(nums[0])
#                         availables.append(nums[1:-1])
#                         chosen.append(nums[-1])
#             Current_State_Counts.append(currents)
#             Available_Action_Counts.append(availables)
#             Chosen_Action_Counts.append(chosen)
            
        
        times_picked_unknown = []
        times_picked_frontier = []
        times_picked_known = []
        for cs, aacs, cacs in tqdm(zip(Current_State_Counts, Available_Action_Counts, Chosen_Action_Counts)):
            us = [0]
            fs = [0]
            ks = [0]
            total_frontier_actions = [1]
            for index, state_count, next_counts, chosen_count in zip([i for i in range(len(cs))], cs, aacs, cacs):
                list_to_add = [0,0,0]
                unknown_true = False
                if unknown_available:
                    if (not known_states and state_count <= Frontier) or known_states:
                        # At the frontier
                        if any(k < Unknown for k in next_counts):
                            #There is an unknown state
                            unknown_true = True
                if (not unknown_available and not known_states and state_count <= Frontier) or (known_states and state_count > Frontier) or unknown_true:
                    list_to_add = [0,0,1]
                    if index != 0:
                        total_frontier_actions.append(total_frontier_actions[-1] + 1)
                    if chosen_count <= Frontier and chosen_count >= Unknown:
                        list_to_add = [0,1,0]
                    elif chosen_count < Unknown:
                        list_to_add = [1,0,0]
                else:
                    total_frontier_actions.append(total_frontier_actions[-1])
                us.append(us[-1] + list_to_add[0])
                fs.append(fs[-1] + list_to_add[1])
                ks.append(ks[-1] + list_to_add[2])
            
            frontier_actions = np.array(total_frontier_actions)[:t_max]
                
            times_picked_unknown.append(np.array(us)[:t_max]/frontier_actions)
            times_picked_frontier.append(np.array(fs)[:t_max]/frontier_actions)
            times_picked_known.append(np.array(ks)[:t_max]/frontier_actions)
        
#         line_colors = [colors_to_use[0][color], colors_to_use[1][color], colors_to_use[2][color]]
        line_styles = ["solid", "dashed", "dotted"]
        for g, cc in zip([times_picked_unknown, times_picked_frontier, times_picked_known], line_styles):
            Means = np.mean(g, axis=0)[start:][::step]
            Stds = np.std(g, axis=0)[start:][::step]
#             print(len([i for i in range(0, t_max, step)]), len(means[::step]))
            l = p.line([i for i in range(start, t_max, step)], Means, color=color, line_width=2, line_dash=cc)
            lines.append(l)
            
            if stds:
                xs = [i for i in range(start, t_max, step)]
                xs = xs + list(reversed(xs))

                ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])

                lls = p.patch(xs, ys, color=color, alpha=0.1)
                        
    legend_names = ["Unknown", "Frontier", "Known"]
    zipped_legend_names = [name + " " + type_state for name in legends for type_state in legend_names]
    new_leg = Legend(items=[(name, [line]) for name, line in zip(zipped_legend_names, lines)], location=(0,-30))
#     new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")

    show(p)

In [9]:
def frontier_times(names, t_maxs, legends, directories="Logs/", step=10, title="", Frontier=10, Unknown=2, stds=True):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
#     colors = [i for i in range(len(names))]
#     colors_n = len(colors)
#     colors_to_use = [Reds[9][:colors_n], Blues[9][:colors_n], Greens[9][:colors_n]]
    p = figure(width=1200, height=800, title=title, y_range=(0,1))
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Frontier States"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):

        Action_Counts_Path = directory + "*" + name + "*/logs/Action_Counts.txt"
        
        Current_State_Counts = []
        Available_Action_Counts = []
        Chosen_Action_Counts = []
        
        Current_State_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Current")]
        Available_Action_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Available")]
        Chosen_Action_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Chosen")]

#         for filename in tqdm(glob.glob(Action_Counts_Path)):
        
#             currents = []
#             availables = []
#             chosen = []
        
#             with open(filename, "r") as f:
#                 for line in f:
#                     if line != "":
#                         nums = line.split()
#                         if nums == []:
#                             continue
#                         nums = [int(v) for v in nums]
#                         currents.append(nums[0])
#                         availables.append(nums[1:-1])
#                         chosen.append(nums[-1])
#             Current_State_Counts.append(currents)
#             Available_Action_Counts.append(availables)
#             Chosen_Action_Counts.append(chosen)
            
        
        times_picked_unknown = []
        times_picked_frontier = []
        times_picked_known = []
        for cs, aacs, cacs in tqdm(zip(Current_State_Counts, Available_Action_Counts, Chosen_Action_Counts)):
            us = [0]
            fs = [0]
            ks = [0]
            total_states = [1]
            for index, state_count, next_counts, chosen_count in zip([i for i in range(len(cs))], cs, aacs, cacs):
                if state_count >= Unknown and state_count <= Frontier:
                    list_to_add = [0,1,0]
                elif state_count < Unknown:
                    list_to_add = [1, 0, 0]
                else:
                    list_to_add = [0, 0, 1]
                if index == 0:
                    total_states.append(1)
                else:
                    total_states.append(total_states[-1] + 1)
                us.append(us[-1] + list_to_add[0])
                fs.append(fs[-1] + list_to_add[1])
                ks.append(ks[-1] + list_to_add[2])
            
            frontier_actions = np.array(total_states)[1:t_max]
                
            times_picked_unknown.append(np.array(us)[1:t_max]/frontier_actions)
            times_picked_frontier.append(np.array(fs)[1:t_max]/frontier_actions)
            times_picked_known.append(np.array(ks)[1:t_max]/frontier_actions)
        
#         line_colors = [colors_to_use[0][color], colors_to_use[1][color], colors_to_use[2][color]]
        line_styles = ["solid", "dashed", "dotted"]
        for g, cc in zip([times_picked_unknown, times_picked_frontier, times_picked_known], line_styles):
            Means = np.mean(g, axis=0)[::step]
            Stds = np.std(g, axis=0)[::step]
#             print(len([i for i in range(0, t_max, step)]), len(means[::step]))
            l = p.line([i for i in range(1, t_max, step)], Means, color=color, line_width=2, line_dash=cc)
            lines.append(l)
            
            if stds:
                xs = [i for i in range(1, t_max, step)]
                xs = xs + list(reversed(xs))

                ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])

                lls = p.patch(xs, ys, color=color, alpha=0.1)
                        
    legend_names = ["Frontier", "Unknown", "Known"]
    zipped_legend_names = [name + " " + type_state for name in legends for type_state in legend_names]
    new_leg = Legend(items=[(name, [line]) for name, line in zip(zipped_legend_names, lines)], location=(0,-30))
#     new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")

    show(p)

In [330]:
#--- ThinMazeNeg 8 ---
ThinMazeNeg8_Runs = {}
ThinMazeNeg8_uid = 0

# Xp Sizes -- [0: 15]
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg8/Xp_Sizes/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg8_uid)
for xp in [10, 25, 50 , 100]:
    for beta in [0.01, 0.001]:
        for step in [1, 10]:
            folder_name = "Thin_Maze_8_Neg_{}_stp_*_Xp_{}k_*_Beta_{}_".format(step, xp, beta)
            legend_name = "Uniform {} XP {} Beta {} Step".format(xp, beta, step)
            t_max = 500000
            direc_name = direc
            ThinMazeNeg8_Runs[ThinMazeNeg8_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg8_uid += 1
            
# Hyperparams -- [16: 31]
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg8/Hyperparams/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg8_uid)
for beta in [0.0001, 0.001, 0.01, 0.1]:
    for step in [1, 10, 100, 250]:
        folder_name = "Thin_Maze_8_Neg_{}_stp_*_Beta_{}_".format(step, beta)
        legend_name = "Uniform {} XP {} Beta {} Step".format(100, beta, step)
        t_max = 500000
        direc_name = direc
        ThinMazeNeg8_Runs[ThinMazeNeg8_uid] = [folder_name, legend_name, t_max, direc_name]
        ThinMazeNeg8_uid += 1
        
# Bandit -- [32: 33]
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg8/Bandit/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg8_uid)
for step in [10, 100]:
    folder_name = "Thin_Maze_8_Neg_{}_stp_*_Xp_100k_Bandit".format(step)
    legend_name = "0.1 Bandit Uniform {} XP {} Beta {} Step".format(100, 0.001, step)
    t_max = 400000
    direc_name = direc
    ThinMazeNeg8_Runs[ThinMazeNeg8_uid] = [folder_name, legend_name, t_max, direc_name]
    ThinMazeNeg8_uid += 1
    
# Recomputing Pseudocounts Prioritised -- [34: 46]
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg8/Recomputing_and_Prioritised/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg8_uid)
for alpha in [0.3, 0.5, 0.7]:
    for step in [1, 10]:
        for stale in [10, 100]:
            folder_name = "Thin_Maze_8_Neg_{}_stp_*_Prioritized_{}_Alpha_*_Stle_{}k_".format(step, alpha, stale)
            legend_name = "{} Alpha Prioritised {} XP {}k Stale {} Beta {} Step".format(alpha, xp, stale, 0.001, step)
            t_max = 500000
            direc_name = direc
            ThinMazeNeg8_Runs[ThinMazeNeg8_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg8_uid += 1
            
print("")

Xp_Sizes  --  0
Hyperparams  --  16
Bandit  --  32
Recomputing_and_Prioritised  --  34


In [None]:
# TODO: Make t_max automatic

In [326]:
names = []
legends = []
t_maxs = []
directories = []

env = "Thin Maze 8 Neg"

items_to_graph = ThinMazeNeg8_Runs.keys()

for key, val in [(k, ThinMazeNeg8_Runs[k]) for k in items_to_graph]:
    names += [val[0]]
    legends += [val[1] + " _ " + str(key)]
    t_maxs += [val[2]]
    directories += [val[3]]

print("\n", "{} Runs".format(len(names)))


 32 Runs


In [314]:
# Maze
names = []
legends = []
t_maxs = []

for beta in [0.1]:
    for stale in [1, 100]:
        names += ["k_CEps*_Stle_{}k_Beta_{}_".format(stale, beta)]
        legends += ["Uniform {}k Stale {} Beta".format(stale, beta)]
        t_maxs += [600000]
        
for alpha in [0.3, 0.5, 0.7]:
    for beta in [0.1]:
        for stale in [1, 100]:
            names += ["Prioritized_{}_Alpha_*_Stle_{}k_Beta_{}_".format(alpha, stale, beta)]
            legends += ["{} Alpha {}k Stale {} Beta".format(alpha, stale, beta)]
            t_maxs += [600000]
    
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Recomputing_and_Prioritized/"
env = "Thin Maze 10 Neg"
directories = direc

print(names)
print(t_maxs)
print(directories)
print("\n", "{} Lines".format(len(names)))

['k_CEps*_Stle_1k_Beta_0.1_', 'k_CEps*_Stle_100k_Beta_0.1_', 'Prioritized_0.3_Alpha_*_Stle_1k_Beta_0.1_', 'Prioritized_0.3_Alpha_*_Stle_100k_Beta_0.1_', 'Prioritized_0.5_Alpha_*_Stle_1k_Beta_0.1_', 'Prioritized_0.5_Alpha_*_Stle_100k_Beta_0.1_', 'Prioritized_0.7_Alpha_*_Stle_1k_Beta_0.1_', 'Prioritized_0.7_Alpha_*_Stle_100k_Beta_0.1_']
[600000, 600000, 600000, 600000, 600000, 600000, 600000, 600000]
/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Recomputing_and_Prioritized/

 8 Lines


In [243]:
if isinstance(directories, str):
    dd = directories
    directories2 = [dd for _ in names]
else:
    directories2 = directories
unloaded = False
for name, directory in zip(names, directories2):
    if not "{}_{}_{}".format(directory, name, "Current") in loaded_frontier_actions:
        unloaded=True
        print("{} frontier actions not loaded!".format(name))
#     loaded_frontier_actions.keys()
if not unloaded:
    print("All are already loaded")

_1_stp*Xp_100k_*_Beta_0.01_ frontier actions not loaded!
_1_stp*Xp_100k_*_Beta_0.001_ frontier actions not loaded!
_10_stp*Xp_100k_*_Beta_0.01_ frontier actions not loaded!
_10_stp*Xp_100k_*_Beta_0.001_ frontier actions not loaded!


In [244]:
load_frontier_stuff(names, directories)

0it [00:00, ?it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:01<00:04,  1.44s/it][A
 50%|█████     | 2/4 [00:02<00:02,  1.43s/it][A
 75%|███████▌  | 3/4 [00:04<00:01,  1.43s/it][A
100%|██████████| 4/4 [00:05<00:00,  1.41s/it][A
1it [00:05,  5.64s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A

/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg8/Xp_Sizes/



 25%|██▌       | 1/4 [00:01<00:04,  1.41s/it][A
 50%|█████     | 2/4 [00:02<00:02,  1.42s/it][A
 75%|███████▌  | 3/4 [00:04<00:01,  1.41s/it][A
100%|██████████| 4/4 [00:05<00:00,  1.41s/it][A
2it [00:11,  5.65s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A

/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg8/Xp_Sizes/



 25%|██▌       | 1/4 [00:01<00:04,  1.40s/it][A
 50%|█████     | 2/4 [00:02<00:02,  1.39s/it][A
 75%|███████▌  | 3/4 [00:04<00:01,  1.43s/it][A
100%|██████████| 4/4 [00:05<00:00,  1.42s/it][A
3it [00:16,  5.66s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A

/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg8/Xp_Sizes/



 25%|██▌       | 1/4 [00:01<00:04,  1.42s/it][A
 50%|█████     | 2/4 [00:02<00:02,  1.41s/it][A
 75%|███████▌  | 3/4 [00:39<00:11, 11.93s/it][A
100%|██████████| 4/4 [00:40<00:00,  8.82s/it][A
4it [00:57, 16.22s/it]

/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg8/Xp_Sizes/





In [327]:
plot_percentages(names, t_maxs, legends, directories, step=1000, title=env)

KeyError: 32

In [324]:
plot_averages_b(names, t_maxs, legends, directories=directories, step=1000, title=env, spans=100, stds=False)

spline is deprecated in scipy 0.19.0, use Bspline class instead.


In [310]:
plot_states(names, t_maxs, legends, directories=directories, step=10000, title=env, color_offset=0)

16it [00:55,  3.44s/it]


In [250]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=5, Unknown=1, start=10)

0it [00:00, ?it/s]
0it [00:00, ?it/s][A
1it [00:00,  1.63it/s][A
2it [00:01,  1.61it/s][A
3it [00:01,  1.60it/s][A
4it [00:02,  1.50it/s][A
1it [00:02,  2.85s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.56it/s][A
2it [00:01,  1.54it/s][A
3it [00:01,  1.52it/s][A
4it [00:02,  1.53it/s][A
2it [00:05,  2.82s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.48it/s][A
2it [00:01,  1.43it/s][A
3it [00:02,  1.42it/s][A
4it [00:02,  1.35it/s][A
3it [00:08,  2.92s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.36it/s][A
2it [00:01,  1.39it/s][A
3it [00:02,  1.35it/s][A
4it [00:02,  1.40it/s][A
4it [00:11,  2.95s/it]


In [251]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=5, Unknown=1, start=10, unknown_available=True)

0it [00:00, ?it/s]
0it [00:00, ?it/s][A
1it [00:00,  1.44it/s][A
2it [00:01,  1.47it/s][A
3it [00:02,  1.42it/s][A
4it [00:02,  1.44it/s][A
1it [00:02,  2.92s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.54it/s][A
2it [00:01,  1.54it/s][A
3it [00:01,  1.56it/s][A
4it [00:02,  1.55it/s][A
2it [00:05,  2.85s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.45it/s][A
2it [00:01,  1.43it/s][A
3it [00:02,  1.48it/s][A
4it [00:02,  1.50it/s][A
3it [00:08,  2.84s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.39it/s][A
2it [00:01,  1.43it/s][A
3it [00:02,  1.37it/s][A
4it [00:02,  1.40it/s][A
4it [00:11,  2.89s/it]


In [252]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=100, Unknown=2, start=10, unknown_available=True)

0it [00:00, ?it/s]
0it [00:00, ?it/s][A
1it [00:00,  1.08it/s][A
2it [00:01,  1.09it/s][A
3it [00:02,  1.10it/s][A
4it [00:03,  1.14it/s][A
1it [00:03,  3.64s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.26it/s][A
2it [00:01,  1.24it/s][A
3it [00:02,  1.23it/s][A
4it [00:03,  1.23it/s][A
2it [00:07,  3.56s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.07it/s][A
2it [00:01,  1.05it/s][A
3it [00:02,  1.08it/s][A
4it [00:03,  1.07it/s][A
3it [00:10,  3.66s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.14it/s][A
2it [00:01,  1.16it/s][A
3it [00:02,  1.13it/s][A
4it [00:03,  1.13it/s][A
4it [00:14,  3.67s/it]


In [253]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=100, Unknown=2, known_states=True, start=10)

0it [00:00, ?it/s]
0it [00:00, ?it/s][A
1it [00:01,  1.17s/it][A
2it [00:02,  1.17s/it][A
3it [00:03,  1.16s/it][A
4it [00:04,  1.18s/it][A
1it [00:04,  4.86s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.17s/it][A
2it [00:02,  1.18s/it][A
3it [00:03,  1.17s/it][A
4it [00:04,  1.17s/it][A
2it [00:09,  4.85s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.19s/it][A
2it [00:02,  1.20s/it][A
3it [00:03,  1.17s/it][A
4it [00:04,  1.16s/it][A
3it [00:14,  4.85s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.16s/it][A
2it [00:02,  1.16s/it][A
3it [00:03,  1.18s/it][A
4it [00:04,  1.20s/it][A
4it [00:19,  4.88s/it]


In [254]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=25, Unknown=5, start=10, unknown_available=True)

0it [00:00, ?it/s]
0it [00:00, ?it/s][A
1it [00:00,  1.19it/s][A
2it [00:01,  1.23it/s][A
3it [00:02,  1.26it/s][A
4it [00:03,  1.27it/s][A
1it [00:03,  3.26s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.43it/s][A
2it [00:01,  1.45it/s][A
3it [00:02,  1.45it/s][A
4it [00:02,  1.43it/s][A
2it [00:06,  3.15s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.39it/s][A
2it [00:01,  1.37it/s][A
3it [00:02,  1.39it/s][A
4it [00:02,  1.38it/s][A
3it [00:09,  3.13s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.45it/s][A
2it [00:01,  1.32it/s][A
3it [00:02,  1.28it/s][A
4it [00:03,  1.31it/s][A
4it [00:12,  3.19s/it]


In [255]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=25, Unknown=5, start=10)

0it [00:00, ?it/s]
0it [00:00, ?it/s][A
1it [00:00,  1.24it/s][A
2it [00:01,  1.30it/s][A
3it [00:02,  1.36it/s][A
4it [00:02,  1.40it/s][A
1it [00:02,  2.95s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.50it/s][A
2it [00:01,  1.46it/s][A
3it [00:02,  1.45it/s][A
4it [00:02,  1.42it/s][A
2it [00:05,  2.96s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.42it/s][A
2it [00:01,  1.35it/s][A
3it [00:02,  1.36it/s][A
4it [00:02,  1.40it/s][A
3it [00:08,  2.99s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.42it/s][A
2it [00:01,  1.41it/s][A
3it [00:02,  1.42it/s][A
4it [00:02,  1.38it/s][A
4it [00:12,  3.01s/it]


In [256]:
#frontier_times(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=5, Unknown=1)