In [1]:
import numpy as np
import glob
import pandas as pd
from scipy.interpolate import interp1d
from scipy.interpolate import spline
from tqdm import tqdm
from bokeh.plotting import figure, output_notebook, show
output_notebook()

In [287]:
from bokeh.models import Legend
from bokeh.models.formatters import BasicTickFormatter, NumeralTickFormatter
from bokeh.models import HoverTool, TapTool
from bokeh.palettes import magma, inferno, Set1, Set3, Paired, Dark2, Category20
from bokeh.palettes import Reds, Greens, Blues

In [346]:
def plot_averages_b(names, t_maxs, legends, directories="Logs/", step=10, title="", stds=False, spans=1000, start_offset=10):
#     plt.figure(figsize=(10,10))
    p = figure(width=1200, height=800, x_range=(0, t_maxs[0]), title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "Episode Training Reward"
    p.xaxis.axis_label = "T"
#     p.xaxis.formatter = BasicTickFormatter(use_scientific=False)
    p.xaxis.formatter = NumeralTickFormatter(format="0,0")
               
    # Styling
    p.xaxis.axis_label_text_font_style = "normal"
    p.yaxis.axis_label_text_font_style = "normal"
               
    p.xaxis.axis_label_text_font_size = "16pt"
    p.yaxis.axis_label_text_font_size = "16pt"
    
    p.axis.major_label_text_font_size = "14pt"
#     p.yaxis.major_tick_text_font_size = "14pt"
               
#     p = figure(width=800, height=500)
    lines = []
    alpha_lines = []
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) >= 10:
        colors = Category20[len(names)]
    else:
        colors = Set1[len(names)]
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in zip(names, colors, t_maxs, legends, directories):
#         print(directory)
        DQN_Rewards_Path = directory + "*" + name + "*/logs/Episode_Rewards.txt"
        DQN_Lengths_Path = directory + "*" + name + "*/logs/Episode_Lengths.txt"
        DQN_Rewards = []
        DQN_Lengths = []
        DQN_Lengths_C = []
        for filename in glob.glob(DQN_Rewards_Path):
            run_logs = [0]
            with open(filename, "r") as f:
#                 print(f)
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            DQN_Rewards.append(run_logs)
        for filename in glob.glob(DQN_Lengths_Path):
            run_logs = [0]
        #     print(filename)
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        continue
                    run_logs.append(ep_r)
            DQN_Lengths.append(run_logs)
            run_logs_c = np.cumsum(run_logs).astype(np.int32)
            DQN_Lengths_C.append(run_logs_c)
        DQN_Smoothed = []
        for times, rewards in zip(DQN_Lengths_C, DQN_Rewards):
#             linear = interp1d(times, rewards, kind="linear")
#             linear_rewards = linear([i for i in range(500000)])
#             DQN_Smoothed.append(linear_rewards)


            spline_rewards = spline(times, rewards, [i for i in range(step*start_offset, t_max, step)], order=1)
            DQN_Smoothed.append(spline_rewards)


        
        DQN_Means = np.mean(DQN_Smoothed, axis=0)
        DQN_Means = pd.Series(DQN_Means).ewm(span=spans).mean()
#         print(DQN_Means)
#         DQN_Means = ewm_smoothed.mean()
        DQN_Stds = np.std(DQN_Smoothed, axis=0)
        DQN_Stds = pd.Series(DQN_Stds).ewm(span=spans).mean()
#         DQN_Means = np.percentile(DQN_Smoothed, 50, axis=0)
#         lq = np.percentile(DQN_Smoothed, 25, axis=0)
#         uq = np.percentile(DQN_Smoothed, 75, axis=0)
        
#         if inter_quartile:
#             xs = [i for i in range(0, t_max, step)]
#             xs = xs + list(reversed(xs))
#     #         ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#             ys = np.concatenate([lq, np.flip(uq, axis=0)])
#             lls = p.patch(xs, ys, color=color, alpha=0.1)
        
        if stds:
    #         plt.fill_between([i for i in range(0, t_max, step)], DQN_Means - DQN_Stds, DQN_Means + DQN_Stds, color=color, alpha=0.2, edgecolor="white")
            # Error bars
            xs = [i for i in range(step*start_offset, t_max, step)]
            xs = xs + list(reversed(xs))
            ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#             ys = np.concatenate([np.min(DQN_Smoothed, axis=0), np.flip(np.max(DQN_Smoothed, axis=0), axis=0)])
            lls = p.patch(xs, ys, color=color, alpha=0.1)

#         if stds:
#             for x, y, yerr in zip(range(0, t_max, step), DQN_Means, DQN_Stds):
#                 lls = p.line([x, x], [y - yerr, y + yerr], color=color, alpha=0.1, hover_alpha=0.8)
#                 alpha_lines.append(lls)

#         print(DQN_Rewards)
    #         plt.plot([i for i in range(0, t_max, step)], DQN_Means, color=color)
        l = p.line([i for i in range(step*start_offset, t_max, step)], DQN_Means, color=color, line_width=5, alpha=0.8)
        lines.append(l)
    #         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(-400,-550))
    new_leg.label_text_font_size = "16pt"
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [342]:
def plot_states(names, t_maxs, legends, directories="Logs/", step=10, title="", stds=True, color_offset=0):
#     plt.figure(figsize=(10,10))
    p = figure(width=1200, height=800, title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "States visited"
    p.xaxis.axis_label = "T"
#     p.xaxis.formatter = BasicTickFormatter(use_scientific=False)
    
    p.xaxis.formatter = NumeralTickFormatter(format="0,0")
               
    # Styling
    p.xaxis.axis_label_text_font_style = "normal"
    p.yaxis.axis_label_text_font_style = "normal"
               
    p.xaxis.axis_label_text_font_size = "16pt"
    p.yaxis.axis_label_text_font_size = "16pt"
    
    p.axis.major_label_text_font_size = "14pt"
    
#     p = figure(width=800, height=500)
    lines = []
    alpha_lines = []
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) >= 10:
        colors = Category20[len(names)]
    else:
        colors = Set1[len(names) + color_offset][color_offset:]
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):
#         print(directory)
        States_Path = directory + "*" + name + "*/logs/Player_Positions.txt"
        States = []
        for filename in glob.glob(States_Path):
            run_logs = []
            with open(filename, "r") as f:
#                 print(f)
                for line in f:
                    try:
                        ep_r_str = str(line)
                        ss = ep_r_str.split()
                        x = int(ss[0])
                        y = int(ss[1])
                        ep_r = (x,y)
#                         print(ep_r)
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            States.append(run_logs)
        States_Visited = []
        xs = []
        for run in States:
            Visited = []
            States_Sets = set()
            xs = []
#             print(len(run))
            for ii, s in enumerate(run):
                if ii > t_max:
                    break
#                 print(s)
                States_Sets.add(s)
                if ii % step == 0 or ii == len(run) - 1:
                    Visited.append(len(States_Sets))
                    xs.append(ii)
#             print(len(States_Sets))
#             print(len(Visited))
            States_Visited.append(Visited)
#         print(np.array(States_Visited).shape)
        Means = np.mean(States_Visited, axis=0)
        Stds = np.std(States_Visited, axis=0)
#         Means = np.percentile(States_Visited, 50, axis=0)
#         lq = np.percentile(States_Visited, 25, axis=0)
#         uq = np.percentile(States_Visited, 75, axis=0)
#         print(Means.shape)
        
#         plt.fill_between([i for i in range(0, t_max, step)], DQN_Means - DQN_Stds, DQN_Means + DQN_Stds, color=color, alpha=0.2, edgecolor="white")
        # Error bars
#         xs = [i for i in range(0, t_max, step)]
        xs = xs + list(reversed(xs))
#         ys = np.concatenate([np.min(States_Visited, axis=0), np.flip(np.max(States_Visited, axis=0), axis=0)])
    
        if stds:
            ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])
#         else:
#             ys = np.concatenate([lq, np.flip(uq, axis=0)])
#         print(len(xs), len(ys))
        lls = p.patch(xs, ys, color=color, alpha=0.1)

        
#         for x, y, yerr in zip(range(0, t_max, step), DQN_Means, DQN_Stds):
#             lls = p.line([x, x], [y - yerr, y + yerr], color=color, alpha=0.1, hover_alpha=0.8)
#             alpha_lines.append(lls)
            

#         plt.plot([i for i in range(0, t_max, step)], DQN_Means, color=color)
        l = p.line([i * step for i in range(len(Means))], Means, color=color, line_width=5)
        lines.append(l)
#         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(-200,-700))
    new_leg.label_text_font_size = "16pt"
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [5]:
def plot_percentages(names, t_maxs, legends, directories="Logs/", step=10, title="", min_max=True, eps_average=3):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) >= 10:
        colors = Category20[len(names)]
    else:
        colors = Set1[len(names)]
    p = figure(width=1200, height=800, title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "Episode Eval Reward"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):

        Eval_Rewards_Path = directory + "*" + name + "*/logs/Eval_Rewards*.txt"
        Eval_Rewards = []

        for filename in glob.glob(Eval_Rewards_Path):
            run_logs = []
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            Eval_Rewards.append(run_logs)
            
#         print(Eval_Rewards)
        
        Win_Percentages = []
        for i in range(100):
            rr = 0
            for jj, reward in enumerate(Eval_Rewards):
                if len(reward) < 99:
                    if i == 0:
                        print(name)
                        print("Length of rewards is {} for #{}".format(len(reward), jj))
                        print(glob.glob(Eval_Rewards_Path)[jj])
                    continue
                rr += reward[i]
            
            if len(Eval_Rewards) == 0:
                print(name)
                print("Length of rewards is {} for #{}".format(len(reward), jj))
                print(glob.glob(Eval_Rewards_Path)[jj])
            Win_Percentages.append(rr / len(Eval_Rewards))

#         print(Win_Percentages)
#         Win_Percentages = []
#         for times, rewards in zip(DQN_Lengths_C, DQN_Rewards):
#             # Linear interpolation
#             spline_rewards = spline(times, rewards, [i for i in range(0, t_max, step)], order=1)
#             DQN_Smoothed.append(spline_rewards)
            
            
#         DQN_Means = np.mean(DQN_Smoothed, axis=0)
        
#         Win_Percentages = []
# #         print(len(DQN_Smoothed), len(DQN_Smoothed[0]))
#         np_smoothed = np.array(DQN_Rewards)
#         print(np_smoothed)
# #         print(np_smoothed.shape)
#         for tt in range(len(DQN_Means)):
# #             print(DQN_Smoothed)
#             rewards = np_smoothed[:,tt]
#             percentage = len([1 for r in rewards if r > 0])/(len(rewards) * 1.0)
#             Win_Percentages.append(percentage)

#         print(len(DQN_Means), len(Win_Percentages))
#         DQN_Stds = np.std(DQN_Smoothed, axis=0)

#         xs = [i for i in range(0, t_max, step)]
#         xs = xs + list(reversed(xs))
#         # Show error bars between min and max runs
#         if min_max:
#             ys = np.concatenate([np.min(DQN_Smoothed, axis=0), np.flip(np.max(DQN_Smoothed, axis=0), axis=0)])
#         # +- 1 Std
#         else:
#             ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#         lls = p.patch(xs, ys, color=color, alpha=0.1)
        l = p.line([i + 1 for i in range(100)], Win_Percentages, color=color, line_width=2)
#         l = p.line([i for i in range(0, t_max, step)], DQN_Means, color=color, line_width=2)
        lines.append(l)
    #         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [6]:
loaded_frontier_actions = {}

In [7]:
def load_frontier_stuff(names, directories):
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, directory in tqdm(zip(names, directories)):

        Action_Counts_Path = directory + "*" + name + "*/logs/Action_Counts.txt"
        
        Current_State_Counts = []
        Available_Action_Counts = []
        Chosen_Action_Counts = []

        for filename in tqdm(glob.glob(Action_Counts_Path)):
        
            currents = []
            availables = []
            chosen = []
        
            with open(filename, "r") as f:
                for line in f:
                    if line != "":
                        nums = line.split()
                        if nums == []:
                            continue
                        nums = [int(v) for v in nums]
                        currents.append(nums[0])
                        availables.append(nums[1:-1])
                        chosen.append(nums[-1])
            Current_State_Counts.append(currents)
            Available_Action_Counts.append(availables)
            Chosen_Action_Counts.append(chosen)
        print(directory)
        loaded_frontier_actions["{}_{}_{}".format(directory, name, "Current")] = Current_State_Counts
        loaded_frontier_actions["{}_{}_{}".format(directory, name, "Available")] = Available_Action_Counts
        loaded_frontier_actions["{}_{}_{}".format(directory, name, "Chosen")] = Chosen_Action_Counts

In [308]:
def action_selections(names, t_maxs, legends, directories="Logs/", step=10, title="", Frontier=10, Unknown=2, stds=True, known_states=False, unknown_available=False, start=1000):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
#     colors = [i for i in range(len(names))]
#     colors_n = len(colors)
#     colors_to_use = [Reds[9][:colors_n], Blues[9][:colors_n], Greens[9][:colors_n]]
#     title += " Frontier_{} Unknown_{}".format(Frontier, Unknown)
    p = figure(width=1200, height=800, title=title, y_range=(0,1))
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Frontier Actions"
    if unknown_available:
        p.yaxis.axis_label = "% Frontier Actions if Unknown state available"
    if known_states:
        unknown_available = True
    if known_states:
        p.yaxis.axis_label = "% Actions if Unknown state available"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = NumeralTickFormatter(format="0,0")
               
    # Styling
    p.xaxis.axis_label_text_font_style = "normal"
    p.yaxis.axis_label_text_font_style = "normal"
               
    p.xaxis.axis_label_text_font_size = "16pt"
    p.yaxis.axis_label_text_font_size = "16pt"
    
    p.axis.major_label_text_font_size = "14pt"

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):

        Action_Counts_Path = directory + "*" + name + "*/logs/Action_Counts.txt"
        
        Current_State_Counts = []
        Available_Action_Counts = []
        Chosen_Action_Counts = []
        
        Current_State_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Current")]
        Available_Action_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Available")]
        Chosen_Action_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Chosen")]

#         for filename in tqdm(glob.glob(Action_Counts_Path)):
        
#             currents = []
#             availables = []
#             chosen = []
        
#             with open(filename, "r") as f:
#                 for line in f:
#                     if line != "":
#                         nums = line.split()
#                         if nums == []:
#                             continue
#                         nums = [int(v) for v in nums]
#                         currents.append(nums[0])
#                         availables.append(nums[1:-1])
#                         chosen.append(nums[-1])
#             Current_State_Counts.append(currents)
#             Available_Action_Counts.append(availables)
#             Chosen_Action_Counts.append(chosen)
            
        
        times_picked_unknown = []
        times_picked_frontier = []
        times_picked_known = []
        for cs, aacs, cacs in tqdm(zip(Current_State_Counts, Available_Action_Counts, Chosen_Action_Counts)):
            us = [0]
            fs = [0]
            ks = [0]
            total_frontier_actions = [1]
            for index, state_count, next_counts, chosen_count in zip([i for i in range(len(cs))], cs, aacs, cacs):
                list_to_add = [0,0,0]
                unknown_true = False
                if unknown_available:
                    if (not known_states and state_count <= Frontier) or known_states:
                        # At the frontier
                        if any(k < Unknown for k in next_counts):
                            #There is an unknown state
                            unknown_true = True
                if (not unknown_available and not known_states and state_count <= Frontier) or (known_states and state_count > Frontier) or unknown_true:
                    list_to_add = [0,0,1]
                    if index != 0:
                        total_frontier_actions.append(total_frontier_actions[-1] + 1)
                    if chosen_count <= Frontier and chosen_count >= Unknown:
                        list_to_add = [0,1,0]
                    elif chosen_count < Unknown:
                        list_to_add = [1,0,0]
                else:
                    total_frontier_actions.append(total_frontier_actions[-1])
                us.append(us[-1] + list_to_add[0])
                fs.append(fs[-1] + list_to_add[1])
                ks.append(ks[-1] + list_to_add[2])
            
            frontier_actions = np.array(total_frontier_actions)[:t_max]
                
            times_picked_unknown.append(np.array(us)[:t_max]/frontier_actions)
            times_picked_frontier.append(np.array(fs)[:t_max]/frontier_actions)
            times_picked_known.append(np.array(ks)[:t_max]/frontier_actions)
        
#         line_colors = [colors_to_use[0][color], colors_to_use[1][color], colors_to_use[2][color]]
        line_styles = ["solid", "dashed"]
        for g, cc in zip([times_picked_unknown, times_picked_frontier], line_styles):
            Means = np.mean(g, axis=0)[start:][::step]
            Stds = np.std(g, axis=0)[start:][::step]
#             print(len([i for i in range(0, t_max, step)]), len(means[::step]))
            l = p.line([i for i in range(start, t_max, step)], Means, color=color, line_width=3, line_dash=cc)
            lines.append(l)
            
            if stds:
                xs = [i for i in range(start, t_max, step)]
                xs = xs + list(reversed(xs))

                ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])

                lls = p.patch(xs, ys, color=color, alpha=0.1)
                        
    legend_names = ["Unknown", "Frontier"]
    zipped_legend_names = [name + " " + type_state for name in legends for type_state in legend_names]
    new_leg = Legend(items=[(name, [line]) for name, line in zip(zipped_legend_names, lines)], location=(-400,-30))
#     new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    new_leg.label_text_font_size = "16pt"
    p.add_layout(new_leg, "right")

    show(p)

In [209]:
def frontier_times(names, t_maxs, legends, directories="Logs/", step=10, title="", Frontier=10, Unknown=2, stds=True):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) >= 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
#     colors = [i for i in range(len(names))]
#     colors_n = len(colors)
#     colors_to_use = [Reds[9][:colors_n], Blues[9][:colors_n], Greens[9][:colors_n]]
    p = figure(width=1200, height=800, title=title, y_range=(0,1))
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Frontier States"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):

        Action_Counts_Path = directory + "*" + name + "*/logs/Action_Counts.txt"
        
        Current_State_Counts = []
        Available_Action_Counts = []
        Chosen_Action_Counts = []
        
        Current_State_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Current")]
        Available_Action_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Available")]
        Chosen_Action_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Chosen")]

#         for filename in tqdm(glob.glob(Action_Counts_Path)):
        
#             currents = []
#             availables = []
#             chosen = []
        
#             with open(filename, "r") as f:
#                 for line in f:
#                     if line != "":
#                         nums = line.split()
#                         if nums == []:
#                             continue
#                         nums = [int(v) for v in nums]
#                         currents.append(nums[0])
#                         availables.append(nums[1:-1])
#                         chosen.append(nums[-1])
#             Current_State_Counts.append(currents)
#             Available_Action_Counts.append(availables)
#             Chosen_Action_Counts.append(chosen)
            
        
        times_picked_unknown = []
        times_picked_frontier = []
        times_picked_known = []
        for cs, aacs, cacs in tqdm(zip(Current_State_Counts, Available_Action_Counts, Chosen_Action_Counts)):
            us = [0]
            fs = [0]
            ks = [0]
            total_states = [1]
            for index, state_count, next_counts, chosen_count in zip([i for i in range(len(cs))], cs, aacs, cacs):
                if state_count >= Unknown and state_count <= Frontier:
                    list_to_add = [0,1,0]
                elif state_count < Unknown:
                    list_to_add = [1, 0, 0]
                else:
                    list_to_add = [0, 0, 1]
                if index == 0:
                    total_states.append(1)
                else:
                    total_states.append(total_states[-1] + 1)
                us.append(us[-1] + list_to_add[0])
                fs.append(fs[-1] + list_to_add[1])
                ks.append(ks[-1] + list_to_add[2])
            
            frontier_actions = np.array(total_states)[1:t_max]
                
            times_picked_unknown.append(np.array(us)[1:t_max]/frontier_actions)
            times_picked_frontier.append(np.array(fs)[1:t_max]/frontier_actions)
            times_picked_known.append(np.array(ks)[1:t_max]/frontier_actions)
        
#         line_colors = [colors_to_use[0][color], colors_to_use[1][color], colors_to_use[2][color]]
        line_styles = ["solid", "dashed", "dotted"]
        for g, cc in zip([times_picked_unknown, times_picked_frontier, times_picked_known], line_styles):
            Means = np.mean(g, axis=0)[::step]
            Stds = np.std(g, axis=0)[::step]
#             print(len([i for i in range(0, t_max, step)]), len(means[::step]))
            l = p.line([i for i in range(1, t_max, step)], Means, color=color, line_width=2, line_dash=cc)
            lines.append(l)
            
            if stds:
                xs = [i for i in range(1, t_max, step)]
                xs = xs + list(reversed(xs))

                ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])

                lls = p.patch(xs, ys, color=color, alpha=0.1)
                        
    legend_names = ["Frontier", "Unknown", "Known"]
    zipped_legend_names = [name + " " + type_state for name in legends for type_state in legend_names]
    new_leg = Legend(items=[(name, [line]) for name, line in zip(zipped_legend_names, lines)], location=(0,-30))
#     new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")

    show(p)

In [116]:
def num_times_visited(names, t_maxs, legends, directories="Logs/", step=10, title="", Frontier=10, Unknown=2, stds=True, start=10, more=100):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
#     colors = [i for i in range(len(names))]
#     colors_n = len(colors)
#     colors_to_use = [Reds[9][:colors_n], Blues[9][:colors_n], Greens[9][:colors_n]]
#     title += " Frontier_{} Unknown_{}".format(Frontier, Unknown)
    p = figure(width=1200, height=800, title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "# States Visited >= {} times".format(more)
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in (zip(names, colors, t_maxs, legends, directories)):
        
#         States_Path = directory + "*" + name + "*/logs/Player_Positions.txt"
#         States = []
#         for filename in glob.glob(States_Path):
#             run_logs = []
#             with open(filename, "r") as f:
# #                 print(f)
#                 for line in f:
#                     try:
#                         ep_r_str = str(line)
#                         ss = ep_r_str.split()
#                         x = int(ss[0])
#                         y = int(ss[1])
#                         ep_r = (x,y)
# #                         print(ep_r)
#                     except:
#                         print("Nothing here for:", filename)
#                         continue
#                     run_logs.append(ep_r)
#             States.append(run_logs)

        Action_Counts_Path = directory + "*" + name + "*/logs/Action_Counts.txt"
        
        Current_State_Counts = []
        Available_Action_Counts = []
        Chosen_Action_Counts = []
        
        Current_State_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Current")]
        Available_Action_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Available")]
        Chosen_Action_Counts = loaded_frontier_actions["{}_{}_{}".format(directory, name, "Chosen")]

#         for filename in tqdm(glob.glob(Action_Counts_Path)):
        
#             currents = []
#             availables = []
#             chosen = []
        
#             with open(filename, "r") as f:
#                 for line in f:
#                     if line != "":
#                         nums = line.split()
#                         if nums == []:
#                             continue
#                         nums = [int(v) for v in nums]
#                         currents.append(nums[0])
#                         availables.append(nums[1:-1])
#                         chosen.append(nums[-1])
#             Current_State_Counts.append(currents)
#             Available_Action_Counts.append(availables)
#             Chosen_Action_Counts.append(chosen)
            
        
#         more = 10
        num_states_visited_more = []
#         num = 0
        for cs in Current_State_Counts:
#             states_visited_more = set()
            num = 0
            num_visited_more = []
            for state_count in cs:
                if state_count > more:
                    num += 1
#                     states_visited_more.add(state_position)
                num_visited_more.append(num)
            num_visited_more = np.array(num_visited_more, dtype=np.uint)[:t_max]
            num_states_visited_more.append(num_visited_more)
                
#         num_states_visited_more = np.array(num_states_visited_more)
        print(name)
        print(len(num_states_visited_more), num_states_visited_more[0][:10])
        Means = np.mean(num_states_visited_more, axis=0)[start:][::step]
        Stds = np.std(num_states_visited_more, axis=0)[start:][::step]
        print(len(Means), len([i for i in range(start, t_max, step)]))
#             print(len([i for i in range(0, t_max, step)]), len(means[::step]))
        l = p.line([i for i in range(start, t_max, step)], Means, color=color, line_width=2)
        lines.append(l)

        if stds:
            xs = [i for i in range(start, t_max, step)]
            xs = xs + list(reversed(xs))

            ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])

            lls = p.patch(xs, ys, color=color, alpha=0.1)

    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     legend_names = ["Unknown", "Frontier", "Known"]
#     zipped_legend_names = [name + " " + type_state for name in legends for type_state in legend_names]
#     new_leg = Legend(items=[(name, [line]) for name, line in zip(zipped_legend_names, lines)], location=(0,-30))
# #     new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
#     p.add_layout(new_leg, "right")

    show(p)

In [11]:
#--- ThinMazeNeg 8 ---
ThinMazeNeg8_Runs = {}
ThinMazeNeg8_uid = 0

# Xp Sizes -- [0: 15]
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg8/Xp_Sizes/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg8_uid)
for xp in [10, 25, 50 , 100]:
    for beta in [0.01, 0.001]:
        for step in [1, 10]:
            folder_name = "Thin_Maze_8_Neg_{}_stp_*_Xp_{}k_*_Beta_{}_".format(step, xp, beta)
            legend_name = "Uniform {}k XP {} Beta {} Step".format(xp, beta, step)
            t_max = 500000
            direc_name = direc
            ThinMazeNeg8_Runs[ThinMazeNeg8_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg8_uid += 1
            
# Hyperparams -- [16: 31]
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg8/Hyperparams/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg8_uid)
for beta in [0.0001, 0.001, 0.01, 0.1]:
    for step in [1, 10, 100, 250]:
        folder_name = "Thin_Maze_8_Neg_{}_stp_*_Beta_{}_".format(step, beta)
        legend_name = "Uniform {}k XP {} Beta {} Step".format(100, beta, step)
        t_max = 500000
        direc_name = direc
        ThinMazeNeg8_Runs[ThinMazeNeg8_uid] = [folder_name, legend_name, t_max, direc_name]
        ThinMazeNeg8_uid += 1
        
# Bandit -- [32: 33]
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg8/Bandit/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg8_uid)
for step in [10, 100]:
    folder_name = "Thin_Maze_8_Neg_{}_stp_*_Xp_100k_Bandit".format(step)
    legend_name = "0.1 Bandit Uniform {}k XP {} Beta {} Step".format(100, 0.001, step)
    t_max = 400000
    direc_name = direc
    ThinMazeNeg8_Runs[ThinMazeNeg8_uid] = [folder_name, legend_name, t_max, direc_name]
    ThinMazeNeg8_uid += 1
    
# Recomputing Pseudocounts Prioritised -- [34: 45]
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg8/Recomputing_and_Prioritised/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg8_uid)
for alpha in [0.3, 0.5, 0.7]:
    for step in [1, 10]:
        for stale in [10, 100]:
            folder_name = "Thin_Maze_8_Neg_{}_stp_*_Prioritized_{}_Alpha_*_Stle_{}k_".format(step, alpha, stale)
            legend_name = "{} Alpha Prioritised {}k XP {}k Stale {} Beta {} Step".format(alpha, xp, stale, 0.001, step)
            t_max = 500000
            direc_name = direc
            ThinMazeNeg8_Runs[ThinMazeNeg8_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg8_uid += 1
            
print("***End*** -- ", ThinMazeNeg8_uid)

Xp_Sizes  --  0
Hyperparams  --  16
Bandit  --  32
Recomputing_and_Prioritised  --  34
***End*** --  46


In [12]:
#--- ThinMazeNeg 10 ---
ThinMazeNeg10_Runs = {}
ThinMazeNeg10_uid = 0

# Xp Sizes -- [0: 23]
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Xp_Sizes/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for xp in [10, 25, 50 , 100]:
    for beta in [0.01, 0.001]:
        for step in [1, 10, 100]:
            folder_name = "Thin_Maze_10_Neg_{}_stp_*_Xp_{}k_*_Beta_{}_".format(step, xp, beta)
            legend_name = "Uniform {}k XP {} Beta {} Step".format(xp, beta, step)
            t_max = 600000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
# Bandit -- [24: 35]
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Bandit/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for scaler in [0.0001, 0.001, 0.01, 0.1, 1, 10]:
    for beta in [0.01, 0.1]:
        for step in [10]:
            folder_name = "Thin_Maze_10_Neg_{}_stp_*_Bandit_{}_*_Beta_{}_".format(step, scaler, beta)
            legend_name = "{} Bandit Uniform {}k XP {} Beta {} Step".format(scaler, 100, beta, step)
            t_max = 600000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
# Prioritized and Recomputing -- [36: 51]
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Recomputing_and_Prioritized/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for beta in [0.01, 0.1]:
    for stale in [1, 100]:
        folder_name = "Thin_Maze_10_Neg_10_stp_*_100k_CEps_*_Stle_{}k_Beta_{}_".format(stale, beta)
        legend_name = "Uniform {}k XP {}k Stale {} Beta {} Step".format(100, stale, beta, 10)
        t_max = 600000
        direc_name = direc
        ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
        ThinMazeNeg10_uid += 1
for alpha in [0.3, 0.5, 0.7]:
    for beta in [0.01, 0.1]:
        for stale in [1, 100]:
            folder_name = "Thin_Maze_10_Neg_10_stp_*_Prioritized_{}_Alpha_*_Stle_{}k_Beta_{}_".format(alpha, stale, beta)
            legend_name = "Prioritized {} Alpha {}k XP {}k Stale {} Beta {} Step".format(alpha, 100, stale, beta, 10)
            t_max = 600000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
# Xp Sizes Bigger Longer -- [52, 63]
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Xp_Sizes_Bigger/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for xp in [50, 100, 200, 300]:
    for beta in [0.001, 0.01, 0.1]:
        for step in [10]:
            folder_name = "Thin_Maze_10_Neg_{}_stp_*_Xp_{}k_*_Beta_{}_".format(step, xp, beta)
            legend_name = "Uniform {}k XP {} Beta {} Step".format(xp, beta, step)
            t_max = 1000000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Xp_Sizes_Bigger_part_2/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for xp in [50, 100, 200, 300]:
    for beta in [0.001, 0.01, 0.1]:
        for step in [1]:
            folder_name = "Thin_Maze_10_Neg_{}_stp_*_Xp_{}k_*_Beta_{}_".format(step, xp, beta)
            legend_name = "Uniform {}k XP {} Beta {} Step".format(xp, beta, step)
            t_max = 1000000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
# Bandit Bigger -- []
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Bandit_Bigger/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for scaler in [0.0001, 0.001, 0.01, 0.1, 1, 10]:
    for xp in [200, 300]:
        for step in [1, 10]:
            folder_name = "Thin_Maze_10_Neg_{}_stp_*_Xp_{}k_Bandit_{}_".format(step, xp, scaler)
            legend_name = "{} Bandit Uniform {}k XP {} Beta {} Step".format(scaler, xp, 0.1, step)
            t_max = 1000000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
# Bandit Prioritised
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Bandit_Prioritised/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for alpha in [0.3, 0.5, 0.7]:
    for beta in [0.001]:
        for scaler in [0.01, 0.1]:
            folder_name = "Thin_Maze_10_Neg_1_stp_*_Prioritized_{}_Alpha_*_Bandit_{}_Scaler".format(alpha, scaler)
            legend_name = "Bandit {} Scaler Prioritized {} Alpha {}k XP {}k Stale {} Beta {} Step".format(scaler, alpha, 100, 100, 0.001, 1)
            t_max = 1000000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
# Bandit 0.01 Beta -- []
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Bandit_Lower_Beta/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for scaler in [0.0001, 0.001, 0.01, 0.1, 1, 10]:
    for xp in [100, 200, 300]:
        for step in [1]:
            folder_name = "Thin_Maze_10_Neg_{}_stp_*_Xp_{}k_Bandit_{}_".format(step, xp, scaler)
            legend_name = "{} Bandit Uniform {}k XP {} Beta {} Step".format(scaler, xp, 0.01, step)
            t_max = 1000000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
# Bandit 0.001 Beta -- []
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Bandit_Lower_Lower_Beta/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for scaler in [0.01, 0.1]:
    for xp in [100, 200, 300]:
        for step in [1]:
            folder_name = "Thin_Maze_10_Neg_{}_stp_*_Xp_{}k_Bandit_{}_".format(step, xp, scaler)
            legend_name = "{} Bandit Uniform {}k XP {} Beta {} Step".format(scaler, xp, 0.001, step)
            t_max = 1000000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
# Smaller 2 layer DQN
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Smaller_DQN_Bandit/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for scaler in [0.01, 0.1, 0.001]:
    for lr in [0.001, 0.0001]:
        for beta in [0.01, 0.001]:
            folder_name = "Thin_Maze_10_Neg_*_LR_{}_*_Bandit_{}_*_Beta_{}_".format(lr, scaler, beta)
            legend_name = "{} Bandit Uniform {}k XP {} Beta {} LR".format(scaler, 300, beta, lr)
            t_max = 1000000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
for lr in [0.001, 0.0001]:
    for beta in [0.01, 0.001]:
        folder_name = "Thin_Maze_10_Neg_*_LR_{}_*_CEps_*_Beta_{}_".format(lr, beta)
        legend_name = "Uniform {}k XP {} Beta {} LR".format(300, beta, lr)
        t_max = 1000000
        direc_name = direc
        ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
        ThinMazeNeg10_uid += 1
        
# 2 Replay stuff
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/2Replay_More/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for xp in [1, 200]:
    for bonus_xp in [25, 50, 100]:
        for thr in [0.01, 0.005, 0.001]:
            folder_name = "Thin_Maze_10_Neg_*_Xp_{}k_*_{}k_2Replay_{}_Thr_".format(xp, bonus_xp, thr)
            legend_name = "{}k BonusReplay {} Thresh Uniform {}k XP {} Beta".format(bonus_xp, thr, xp, 0.001)
            t_max = 1000000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
# Bandit different bonusees
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Bandit_Bonus/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for scaler in [0.001, 0.01, 0.1, 1]:
    for xp in [300]:
        for bandit_bonus in [0.25, 0.5, 1, 2]:
            folder_name = "Thin_Maze_10_Neg_*_{}_Bandit_{}_Scaler".format(bandit_bonus, scaler)
            legend_name = "{} Bandit {} Bonus Uniform {}k XP {} Beta".format(scaler, bandit_bonus, 300, 0.001)
            t_max = 1000000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
## Model DQN with Q Values in lookahead
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Model_DQNs/1_Step_Q_Vals/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for model_loss in [0.25, 0.5, 0.75]:
    for lookahead in [False, True]:
        for scaler in [0.001, 0.01]:
            folder_name = "Thin_Maze_10_Neg_*_Model_{}_Loss_{}_Look_Bandit_{}_Scaler_".format(model_loss, lookahead, scaler)
            legend_name = "1 Step Q Val Look {} Model Loss {} Look {} Scaler".format(model_loss, lookahead, scaler)
            t_max = 1000000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
## Model DQN with 0,1,2 lookahead, all pseudocounts along path
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg10/Model_DQNs/012_All_Counts/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg10_uid)
for model_loss in [0.25, 0.5]:
    for lookahead in [0, 1, 2]:
        for scaler in [0.001, 0.01]:
            folder_name = "Thin_Maze_10_Neg_*_Model_{}_Loss_{}_Look_0.5_Bandit_{}_Scaler_".format(model_loss, lookahead, scaler)
            legend_name = "AllCount {} Model Loss {} Depth {} Scaler".format(model_loss, lookahead, scaler)
            t_max = 1000000
            direc_name = direc
            ThinMazeNeg10_Runs[ThinMazeNeg10_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg10_uid += 1
            
print("***End*** -- ", ThinMazeNeg10_uid)

Xp_Sizes  --  0
Bandit  --  24
Recomputing_and_Prioritized  --  36
Xp_Sizes_Bigger  --  52
Xp_Sizes_Bigger_part_2  --  64
Bandit_Bigger  --  76
Bandit_Prioritised  --  100
Bandit_Lower_Beta  --  106
Bandit_Lower_Lower_Beta  --  124
Smaller_DQN_Bandit  --  130
2Replay_More  --  146
Bandit_Bonus  --  164
1_Step_Q_Vals  --  180
012_All_Counts  --  192
***End*** --  204


In [189]:
#--- ThinMazeNeg 12 ---
ThinMazeNeg12_Runs = {}
ThinMazeNeg12_uid = 0

# Xp Sizes -- [0: 26]
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg12/Xp_Sizes/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg12_uid)
for xp in [50, 100, 200]:
    for beta in [0.1, 0.01, 0.001]:
        for step in [1, 10, 100]:
            folder_name = "Thin_Maze_12_Neg_{}_stp_*_Xp_{}k_*_Beta_{}_".format(step, xp, beta)
            legend_name = "Uniform {}k XP {} Beta {} Step".format(xp, beta, step)
            t_max = 1000000
            direc_name = direc
            ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg12_uid += 1
            
# Bandit test
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg12/Bandit_Test/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg12_uid)
for scaler in [0.001, 0.01, 0.1]:
    folder_name = "Thin_Maze_12_Neg_*_Bandit_{}_".format(scaler)
    legend_name = "{} Bandit Uniform {}k XP {} Beta {} Step".format(scaler, 300, 0.001, 1)
    t_max = 1200000
    direc_name = direc
    ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
    ThinMazeNeg12_uid += 1
    
folder_name = "Thin_Maze_12_Neg_*_CEps_"
legend_name = "Uniform {}k XP {} Beta {} Step".format(300, 0.001, 1)
t_max = 1200000
direc_name = direc
ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
ThinMazeNeg12_uid += 1

# 2 Replay stuff
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg12/2Replay/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg12_uid)
for xp in [1, 300]:
    for bonus_xp in [50, 100]:
        for thr in [0.0005, 0.001, 0.005, 0.01]:
            folder_name = "Thin_Maze_12_Neg_*_Xp_{}k_*_{}k_2Replay_{}_Thr_".format(xp, bonus_xp, thr)
            legend_name = "{}k BonusReplay {} Thresh Uniform {}k XP {} Beta".format(bonus_xp, thr, xp, 0.001)
            t_max = 1200000
            direc_name = direc
            ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg12_uid += 1
            
# 2 Replay stuff with Bandit
direc = "/data/savitar/tabhid/Runs/Frontier/ThinMazeNeg12/2Replay_Bandit_1Batch/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg12_uid)
for xp in [1, 300]:
    for bandit in [0.001, 0.01, 0.1]:
        for thr in [0.0001, 0.001, 0.01, 0.1]:
            folder_name = "Thin_Maze_12_Neg_*_Xp_{}k_*_Bandit_{}_Scaler_{}k_2Replay_{}_Thr_".format(xp, bandit, 100, thr)
            legend_name = "{} Bandit {}k BonusReplay {} Thresh Uniform {}k XP {} Beta".format(bandit, 100, thr, xp, 0.001)
            t_max = 1200000
            direc_name = direc
            ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg12_uid += 1
for xp in [1, 300]:
    for bonus_xp in [100]:
        for thr in [0.0005, 0.001, 0.005, 0.01]:
            folder_name = "Thin_Maze_12_Neg_*_Xp_{}k_CEps_*_{}k_2Replay_{}_Thr_".format(xp, bonus_xp, thr)
            legend_name = "{}k BonusReplay {} Thresh Uniform {}k XP {} Beta".format(bonus_xp, thr, xp, 0.001)
            t_max = 1200000
            direc_name = direc
            ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg12_uid += 1
            
# Epsilon Schedule
direc = "/data/savitar/tabhid/Runs/Servers/Maze12_Epsilon_Schedule__2017_11_03/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg12_uid)
for anneal in ["",300, 600, 900]:
    folder_name = "Thin_Maze_12_Neg_*_Eps_1_0.05_{}k".format(anneal)
    legend_name = "Uniform {}k XP {} Beta {}k Anneal".format(300, 0.001, anneal)
    t_max = 1200000
    direc_name = direc
    ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
    ThinMazeNeg12_uid += 1
    
# 2 Replay stuff with Bandit
direc = "/data/savitar/tabhid/Runs/Servers/ThinMaze12_BonusReplayBandit_Reruns__2017_11_01/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg12_uid)
for xp in [1, 300]:
    for bandit in [0.001, 0.01]:
        for thr in [0.0005, 0.001]:
            folder_name = "Thin_Maze_12_Neg_*_Xp_{}k_*_Bandit_{}_Scaler_{}k_2Replay_{}_Thr_".format(xp, bandit, 100, thr)
            legend_name = "{} Bandit {}k BonusReplay {} Thresh Uniform {}k XP {} Beta".format(bandit, 100, thr, xp, 0.001)
            t_max = 1200000
            direc_name = direc
            ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg12_uid += 1
for xp in [1, 300]:
    for bonus_xp in [100]:
        for thr in [0.0005, 0.001]:
            folder_name = "Thin_Maze_12_Neg_*_Xp_{}k_CEps_*_{}k_2Replay_{}_Thr_".format(xp, bonus_xp, thr)
            legend_name = "{}k BonusReplay {} Thresh Uniform {}k XP {} Beta".format(bonus_xp, thr, xp, 0.001)
            t_max = 1200000
            direc_name = direc
            ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg12_uid += 1
            
# 2 Iters
direc = "/data/savitar/tabhid/Runs/Servers/ThinMaze12_2Iters__2017_11_01/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg12_uid)
for scaler in [0.01, 0.1]:
    folder_name = "Thin_Maze_12_Neg_*_Bandit_{}_".format(scaler)
    legend_name = "2 iters {} Bandit Uniform {}k XP {} Beta {} Step".format(scaler, 300, 0.001, 1)
    t_max = 1200000
    direc_name = direc
    ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
    ThinMazeNeg12_uid += 1
    
folder_name = "Thin_Maze_12_Neg_*_CEps_"
legend_name = "Uniform {}k XP {} Beta {} Step".format(300, 0.001, 1)
t_max = 1200000
direc_name = direc
ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
ThinMazeNeg12_uid += 1

# Epsilon Decay 
direc = "/data/savitar/tabhid/Runs/Servers/Maze12_Epsilon_Decay__2017_11_04/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg12_uid)
for decay in [0.9, 0.99, 0.999, 0.9999, 0.99999]:
    folder_name = "Thin_Maze_12_Neg_*_CEps_{}_".format(decay)
    legend_name = "Uniform {}k XP {} Beta {} Decay StateAction".format(300, 0.001, decay)
    t_max = 1200000
    direc_name = direc
    ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
    ThinMazeNeg12_uid += 1
    
# Epsilon Decay State Counts
direc = "/data/savitar/tabhid/Runs/Servers/Maze12_EpsilonDecay_StateCounts__2017_11_05/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg12_uid)
for decay in [0.9, 0.99, 0.999, 0.9999, 0.99999]:
    folder_name = "Thin_Maze_12_Neg_*_CEps_{}_".format(decay)
    legend_name = "Uniform {}k XP {} Beta {} Decay".format(300, 0.001, decay)
    t_max = 1200000
    direc_name = direc
    ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
    ThinMazeNeg12_uid += 1
    
# Xp Sizes
direc = "/data/savitar/tabhid/Runs/Servers/Maze12_XpSizes_StateCounts__2017_11_05/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg12_uid)
for xp in [50, 100, 300, 500, 700]:
        folder_name = "Thin_Maze_12_Neg_*_Xp_{}k_".format(xp)
        legend_name = "Uniform {}k XP {} Beta".format(xp, beta)
        t_max = 1200000
        direc_name = direc
        ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
        ThinMazeNeg12_uid += 1
        
# Epsilon Schedule
direc = "/data/savitar/tabhid/Runs/Servers/Maze12_EpsilonSchedule_StateCounts__2017_11_05/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg12_uid)
for anneal in ["",200, 400, 600, 800, 1000]:
    folder_name = "Thin_Maze_12_Neg_*_Eps_1_0.05_{}k".format(anneal)
    legend_name = "Uniform {}k XP {} Beta {}k Anneal".format(300, 0.001, anneal)
    t_max = 1200000
    direc_name = direc
    ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
    ThinMazeNeg12_uid += 1
    
# Epsilon Decay 2
direc = "/data/savitar/tabhid/Runs/Servers/Maze12_Epsilon_Decay__2017_11_08/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg12_uid)
for decay in [0.9, 0.99, 0.999, 0.9999, 0.99999]:
    folder_name = "Thin_Maze_12_Neg_*_CEps_{}_".format(decay)
    legend_name = "Uniform {}k XP {} Beta {} Decay".format(300, 0.001, decay)
    t_max = 1200000
    direc_name = direc
    ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
    ThinMazeNeg12_uid += 1
    
# 2 Replay stuff with Bandit v3
direc = "/data/savitar/tabhid/Runs/Servers/Maze12_Bonus_Replay_Bandits_v3__2017_11_06/"
print(direc.split(sep="/")[-2], " -- ", ThinMazeNeg12_uid)
for xp in [10, 50, 100]:
    for bandit in [0.001, 0.01]:
        for thr in [0.001, 0.005]:
            folder_name = "Thin_Maze_12_Neg_*_*_Bandit_{}_Scaler_{}k_2Replay_{}_Thr_".format(bandit, xp, thr)
            legend_name = "{} Bandit {}k BonusReplay {} Thresh Uniform {}k XP {} Beta 16 batch".format(bandit, xp, thr, 300, 0.001)
            t_max = 1200000
            direc_name = direc
            ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
            ThinMazeNeg12_uid += 1

for bonus_xp in [10, 50, 100]:
    for thr in [0.005, 0.001]:
        folder_name = "Thin_Maze_12_Neg_*_Xp_{}k_CEps_*_{}k_2Replay_{}_Thr_".format(300, bonus_xp, thr)
        legend_name = "{}k BonusReplay {} Thresh Uniform {}k XP {} Beta 16 batch".format(bonus_xp, thr, 300, 0.001)
        t_max = 1200000
        direc_name = direc
        ThinMazeNeg12_Runs[ThinMazeNeg12_uid] = [folder_name, legend_name, t_max, direc_name]
        ThinMazeNeg12_uid += 1
            
            
print("***End*** -- ", ThinMazeNeg12_uid)

Xp_Sizes  --  0
Bandit_Test  --  27
2Replay  --  31
2Replay_Bandit_1Batch  --  47
Maze12_Epsilon_Schedule__2017_11_03  --  79
ThinMaze12_BonusReplayBandit_Reruns__2017_11_01  --  83
ThinMaze12_2Iters__2017_11_01  --  95
Maze12_Epsilon_Decay__2017_11_04  --  98
Maze12_EpsilonDecay_StateCounts__2017_11_05  --  103
Maze12_XpSizes_StateCounts__2017_11_05  --  108
Maze12_EpsilonSchedule_StateCounts__2017_11_05  --  113
Maze12_Epsilon_Decay__2017_11_08  --  119
Maze12_Bonus_Replay_Bandits_v3__2017_11_06  --  124
***End*** --  142


In [60]:
#--- EmptyRoom 20 ---
Room_Runs = {}
Room_uid = 0
           
# Test
direc = "/data/savitar/tabhid/Runs/Servers/EmptyRoom20__2017_11_03/"
print(direc.split(sep="/")[-2], " -- ", Room_uid)
for scaler in [0.001, 0.01, 0.1, 1]:
    folder_name = "_Bandit_{}_Scaler".format(scaler)
    legend_name = "{} Bandit".format(scaler)
    t_max = 300000
    direc_name = direc
    Room_Runs[Room_uid] = [folder_name, legend_name, t_max, direc_name]
    Room_uid += 1

for anneal in ["", 100, 200]:
    folder_name = "300k_Count_*_Eps_1_0.05_{}k".format(anneal)
    legend_name = "{}k Anneal".format(anneal)
    t_max = 300000
    direc_name = direc
    Room_Runs[Room_uid] = [folder_name, legend_name, t_max, direc_name]
    Room_uid += 1
            
            
print("***End*** -- ", Room_uid)

EmptyRoom20__2017_11_03  --  0
***End*** --  7


In [14]:
#--- Doom ---
Doom_Runs = {}
Doom_uid = 0
           
# Bandit test
direc = "/data/savitar/tabhid/Runs/Frontier/Doom/Bandit_Test/"
print(direc.split(sep="/")[-2], " -- ", Doom_uid)
for scaler in [0.0001, 0.001, 0.01]:
    folder_name = "_Bandit_{}_Scaler".format(scaler)
    legend_name = "{} Bandit Uniform {}k XP {} Beta".format(scaler, 500, 0.001, 1)
    t_max = 4000000
    direc_name = direc
    Doom_Runs[Doom_uid] = [folder_name, legend_name, t_max, direc_name]
    Doom_uid += 1
    
folder_name = "_CEps_"
legend_name = "Uniform {}k XP {} Beta".format(500, 0.001, 1)
t_max = 4000000
direc_name = direc
Doom_Runs[Doom_uid] = [folder_name, legend_name, t_max, direc_name]
Doom_uid += 1
            
            
print("***End*** -- ", Doom_uid)

Bandit_Test  --  0
***End*** --  4


In [None]:
# TODO: Make t_max automatic

In [340]:
names = []
legends = []
t_maxs = []
directories = []

load_frontier = False

env = "ThinMazeNeg 12"
Runs_To_Use = ThinMazeNeg12_Runs

# --- ThinMazeNeg 8 Runs ---
# Xp Sizes [2, 5, 9, 14]
# Hyperparams [20, 25]
# Bandit [32]
# Prioritised Stale [35, 37, 41, 45]

# --- ThinMazeNeg 10 Runs ---
# Xp Sizes [13, 18, 19, 21]
# Bandit [24, 28, 30]
# Recomputing Unifrom [36, 37, 38, 39]
# Prioritized [40, 44, 48]
# Bigger Xp 1 step [73, 74]
# Bigger Xp 10 step [53, 56, 60, 63]
# Bandit Bigger, 0.1 Beta not good
# Bandit 0.01 Beta [114]
# Bandit 0.001 Beta, Better, [126, 129] only 300k ones


# --- ThinMazeNeg 12 Runs ---
# Xp Sizes [10, 13, 15, 19, 22, 24]
# Bandit [27, 28, 29, 30]
# 2 Replay [35, 36]
# 2 Replay bandit buggy [60,65,67,69,78]

items_to_graph = [88, 89, 90, 95, 97]

for key, val in [(k, Runs_To_Use[k]) for k in items_to_graph]:
    names += [val[0]]
    legends += [val[1] + " _ " + str(key)]
    t_maxs += [val[2]]
    directories += [val[3]]
    
# Hard coded
# legends = ["{}k".format(c) for c in [50, 100, 300, 500, 700]]
legends = ["0.001 Tau 100k FR 0.001 Thr", "0.01 Tau 100k FR 0.0005 Thr", "0.01 Tau 100k FR 0.001 Thr", "0.01 Tau 2 Iters", "0.9999 Decay 2 Iters"]

print("\n", "{} Runs".format(len(names)))


 5 Runs


In [229]:
if isinstance(directories, str):
    dd = directories
    directories2 = [dd for _ in names]
else:
    directories2 = directories
unloaded = False
for name, directory in zip(names, directories2):
    if not "{}_{}_{}".format(directory, name, "Current") in loaded_frontier_actions:
        unloaded=True
        print("{} frontier actions not loaded!".format(name))
#     loaded_frontier_actions.keys()
if not unloaded:
    print("All are already loaded")

All are already loaded


In [218]:
if load_frontier:
    load_frontier_stuff(names, directories)

0it [00:00, ?it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:04<00:13,  4.51s/it][A
 50%|█████     | 2/4 [00:07<00:08,  4.12s/it][A
 75%|███████▌  | 3/4 [00:10<00:03,  3.85s/it][A
100%|██████████| 4/4 [00:59<00:00, 17.25s/it][A
1it [00:59, 59.47s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A

/data/savitar/tabhid/Runs/Servers/Maze12_Epsilon_Decay__2017_11_08/



 25%|██▌       | 1/4 [00:03<00:09,  3.30s/it][A
 50%|█████     | 2/4 [00:06<00:06,  3.36s/it][A
 75%|███████▌  | 3/4 [00:10<00:03,  3.36s/it][A
100%|██████████| 4/4 [00:13<00:00,  3.36s/it][A
2it [01:12, 45.69s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A

/data/savitar/tabhid/Runs/Servers/Maze12_Epsilon_Decay__2017_11_08/



 25%|██▌       | 1/4 [00:03<00:10,  3.48s/it][A
 50%|█████     | 2/4 [00:06<00:06,  3.45s/it][A
 75%|███████▌  | 3/4 [00:10<00:03,  3.43s/it][A
100%|██████████| 4/4 [00:13<00:00,  3.40s/it][A
3it [01:26, 36.05s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A

/data/savitar/tabhid/Runs/Servers/Maze12_Epsilon_Decay__2017_11_08/



 25%|██▌       | 1/4 [00:03<00:10,  3.52s/it][A
 50%|█████     | 2/4 [00:06<00:06,  3.47s/it][A
 75%|███████▌  | 3/4 [00:16<00:05,  5.31s/it][A
100%|██████████| 4/4 [00:19<00:00,  4.73s/it][A
4it [01:46, 31.19s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A

/data/savitar/tabhid/Runs/Servers/Maze12_Epsilon_Decay__2017_11_08/



 25%|██▌       | 1/4 [00:03<00:10,  3.38s/it][A
 50%|█████     | 2/4 [00:06<00:06,  3.38s/it][A
 75%|███████▌  | 3/4 [00:10<00:03,  3.37s/it][A
100%|██████████| 4/4 [00:13<00:00,  3.42s/it][A
5it [02:00, 25.94s/it]

/data/savitar/tabhid/Runs/Servers/Maze12_Epsilon_Decay__2017_11_08/





In [233]:
plot_percentages(names, t_maxs, legends, directories, step=1000, title=env)

4it [00:00, 147.42it/s]


In [347]:
plot_averages_b(names, t_maxs, legends, directories=directories, step=1000, title="", spans=100, stds=False)

spline is deprecated in scipy 0.19.0, use Bspline class instead.


In [325]:
plot_states(names, t_maxs, legends, directories=directories, step=10000, title="", color_offset=0)

4it [00:28,  7.03s/it]


In [203]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=10, Unknown=1, start=10)

0it [00:00, ?it/s]
0it [00:00, ?it/s][A
1it [00:02,  2.43s/it][A
2it [00:04,  2.23s/it][A
3it [00:06,  2.14s/it][A
4it [00:07,  2.00s/it][A
1it [00:08,  8.15s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.38s/it][A
2it [00:02,  1.39s/it][A
3it [00:04,  1.38s/it][A
4it [00:05,  1.39s/it][A
2it [00:14,  7.48s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.32s/it][A
2it [00:02,  1.34s/it][A
3it [00:04,  1.36s/it][A
4it [00:05,  1.36s/it][A
3it [00:19,  6.96s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.38s/it][A
2it [00:02,  1.38s/it][A
3it [00:04,  1.38s/it][A
4it [00:05,  1.38s/it][A
4it [00:25,  6.64s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.39s/it][A
2it [00:02,  1.39s/it][A
3it [00:04,  1.38s/it][A
4it [00:05,  1.38s/it][A
5it [00:31,  6.40s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.38s/it][A
2it [00:02,  1.38s/it][A
3it [00:04,  1.38s/it][A
4it [00:05,  1.39s/it][A
6it [00:37,  6.26s/it]


In [327]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title="", Frontier=10, Unknown=1, start=10, unknown_available=True)

0it [00:00, ?it/s]
0it [00:00, ?it/s][A
1it [00:01,  1.37s/it][A
2it [00:02,  1.38s/it][A
3it [00:04,  1.39s/it][A
4it [00:05,  1.41s/it][A
1it [00:05,  5.89s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.38s/it][A
2it [00:02,  1.38s/it][A
3it [00:04,  1.39s/it][A
4it [00:05,  1.39s/it][A
2it [00:11,  5.85s/it]


In [205]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=10, Unknown=1, start=10, unknown_available=True)

0it [00:00, ?it/s]
0it [00:00, ?it/s][A
1it [00:01,  1.49s/it][A
2it [00:02,  1.47s/it][A
3it [00:04,  1.45s/it][A
4it [00:05,  1.44s/it][A
1it [00:06,  6.09s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.40s/it][A
2it [00:02,  1.40s/it][A
3it [00:04,  1.41s/it][A
4it [00:05,  1.41s/it][A
2it [00:12,  6.04s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.40s/it][A
2it [00:02,  1.40s/it][A
3it [00:04,  1.40s/it][A
4it [00:05,  1.41s/it][A
3it [00:17,  6.02s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.51s/it][A
2it [00:03,  1.52s/it][A
3it [00:04,  1.49s/it][A
4it [00:05,  1.47s/it][A
4it [00:24,  6.10s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.43s/it][A
2it [00:02,  1.43s/it][A
3it [00:04,  1.42s/it][A
4it [00:05,  1.41s/it][A
5it [00:30,  6.05s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.40s/it][A
2it [00:02,  1.40s/it][A
3it [00:04,  1.40s/it][A
4it [00:05,  1.41s/it][A
6it [00:36,  6.03s/it]


In [206]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=20, Unknown=5, known_states=False, start=10, unknown_available=True)

0it [00:00, ?it/s]
0it [00:00, ?it/s][A
1it [00:01,  1.64s/it][A
2it [00:03,  1.73s/it][A
3it [00:05,  1.74s/it][A
4it [00:07,  1.87s/it][A
1it [00:07,  7.88s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.74s/it][A
2it [00:03,  1.66s/it][A
3it [00:04,  1.60s/it][A
4it [00:06,  1.56s/it][A
2it [00:14,  7.43s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.43s/it][A
2it [00:02,  1.42s/it][A
3it [00:04,  1.42s/it][A
4it [00:05,  1.43s/it][A
3it [00:20,  7.02s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.45s/it][A
2it [00:02,  1.46s/it][A
3it [00:04,  1.46s/it][A
4it [00:05,  1.45s/it][A
4it [00:26,  6.77s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.44s/it][A
2it [00:02,  1.46s/it][A
3it [00:04,  1.46s/it][A
4it [00:05,  1.47s/it][A
5it [00:32,  6.59s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.45s/it][A
2it [00:02,  1.45s/it][A
3it [00:04,  1.45s/it][A
4it [00:05,  1.45s/it][A
6it [00:38,  6.48s/it]


In [207]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=25, Unknown=5, start=10, unknown_available=True)

0it [00:00, ?it/s]
0it [00:00, ?it/s][A
1it [00:01,  1.70s/it][A
2it [00:03,  1.68s/it][A
3it [00:04,  1.66s/it][A
4it [00:06,  1.68s/it][A
1it [00:07,  7.04s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.51s/it][A
2it [00:02,  1.50s/it][A
3it [00:04,  1.49s/it][A
4it [00:05,  1.48s/it][A
2it [00:13,  6.81s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.46s/it][A
2it [00:02,  1.46s/it][A
3it [00:04,  1.48s/it][A
4it [00:06,  1.54s/it][A
3it [00:19,  6.68s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.47s/it][A
2it [00:02,  1.47s/it][A
3it [00:04,  1.47s/it][A
4it [00:05,  1.47s/it][A
4it [00:25,  6.55s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.46s/it][A
2it [00:02,  1.46s/it][A
3it [00:04,  1.46s/it][A
4it [00:05,  1.47s/it][A
5it [00:32,  6.45s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.49s/it][A
2it [00:02,  1.49s/it][A
3it [00:04,  1.48s/it][A
4it [00:05,  1.48s/it][A
6it [00:38,  6.38s/it]


In [208]:
action_selections(names, t_maxs, legends, directories=directories, step=1000, title=env, Frontier=25, Unknown=5, start=10)

0it [00:00, ?it/s]
0it [00:00, ?it/s][A
1it [00:01,  1.34s/it][A
2it [00:02,  1.36s/it][A
3it [00:04,  1.38s/it][A
4it [00:05,  1.39s/it][A
1it [00:05,  5.93s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.38s/it][A
2it [00:02,  1.39s/it][A
3it [00:04,  1.39s/it][A
4it [00:05,  1.40s/it][A
2it [00:11,  5.93s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.41s/it][A
2it [00:02,  1.41s/it][A
3it [00:04,  1.39s/it][A
4it [00:05,  1.37s/it][A
3it [00:17,  5.88s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.39s/it][A
2it [00:02,  1.40s/it][A
3it [00:04,  1.40s/it][A
4it [00:05,  1.41s/it][A
4it [00:23,  5.92s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.42s/it][A
2it [00:02,  1.42s/it][A
3it [00:04,  1.42s/it][A
4it [00:05,  1.43s/it][A
5it [00:29,  5.97s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.41s/it][A
2it [00:02,  1.42s/it][A
3it [00:04,  1.42s/it][A
4it [00:05,  1.43s/it][A
6it [00:35,  5.99s/it]


In [142]:
frontier_times(names, [100 * 1000 for _ in names], legends, directories=directories, step=100, title=env, Frontier=25, Unknown=5)

0it [00:00, ?it/s]
0it [00:00, ?it/s][A
1it [00:01,  1.59s/it][A
2it [00:03,  1.60s/it][A
3it [00:04,  1.60s/it][A
4it [00:06,  1.61s/it][A
1it [00:06,  6.63s/it]
0it [00:00, ?it/s][A
1it [00:01,  1.63s/it][A
2it [00:03,  1.64s/it][A
3it [00:04,  1.64s/it][A
4it [00:06,  1.64s/it][A
2it [00:13,  6.68s/it]


In [143]:
num_times_visited(names, t_maxs, legends, directories=directories, step=10000, title=env, more=100)

Thin_Maze_12_Neg_*_Bandit_0.01_
4 [ 1  2  3  4  5  6  7  8  9 10]
120 120
Thin_Maze_12_Neg_*_CEps_
4 [1 1 2 3 4 4 4 5 6 7]
120 120
