In [1]:
import numpy as np
import glob
import pandas as pd
from scipy.interpolate import interp1d
from scipy.interpolate import spline
from tqdm import tqdm
from bokeh.plotting import figure, output_notebook, show
output_notebook()

In [133]:
from bokeh.models import Legend
from bokeh.models.formatters import BasicTickFormatter
from bokeh.models import HoverTool, TapTool
from bokeh.palettes import magma, inferno, Set1, Set3, Paired, Dark2
from bokeh.palettes import Reds, Greens, Blues

In [3]:
def plot_averages_b(names, t_maxs, legends, directories="Logs/", step=10, title="", stds=False, spans=1000):
#     plt.figure(figsize=(10,10))
    p = figure(width=1200, height=800, y_range=(-1, 101), x_range=(0, t_maxs[0]), title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Agents Reaching the Goal"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)
               
    # Styling
    p.xaxis.axis_label_text_font_style = "normal"
    p.yaxis.axis_label_text_font_style = "normal"
               
    p.xaxis.axis_label_text_font_size = "14pt"
    p.yaxis.axis_label_text_font_size = "14pt"
               
#     p = figure(width=800, height=500)
    lines = []
    alpha_lines = []
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in zip(names, colors, t_maxs, legends, directories):
#         print(directory)
        DQN_Rewards_Path = directory + "*" + name + "*/logs/Episode_Rewards.txt"
        DQN_Lengths_Path = directory + "*" + name + "*/logs/Episode_Lengths.txt"
        DQN_Rewards = []
        DQN_Lengths = []
        DQN_Lengths_C = []
        for filename in glob.glob(DQN_Rewards_Path):
            run_logs = [0]
            with open(filename, "r") as f:
#                 print(f)
                for line in f:
                    try:
                        ep_r = float(line) * 100.0
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            DQN_Rewards.append(run_logs)
        for filename in glob.glob(DQN_Lengths_Path):
            run_logs = [0]
        #     print(filename)
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        continue
                    run_logs.append(ep_r)
            DQN_Lengths.append(run_logs)
            run_logs_c = np.cumsum(run_logs).astype(np.int32)
            DQN_Lengths_C.append(run_logs_c)
        DQN_Smoothed = []
        for times, rewards in zip(DQN_Lengths_C, DQN_Rewards):
#             linear = interp1d(times, rewards, kind="linear")
#             linear_rewards = linear([i for i in range(500000)])
#             DQN_Smoothed.append(linear_rewards)


            spline_rewards = spline(times, rewards, [i for i in range(0, t_max, step)], order=1)
            DQN_Smoothed.append(spline_rewards)


        
        DQN_Means = np.mean(DQN_Smoothed, axis=0)
        DQN_Means = pd.Series(DQN_Means).ewm(span=spans).mean()
#         print(DQN_Means)
#         DQN_Means = ewm_smoothed.mean()
        DQN_Stds = np.std(DQN_Smoothed, axis=0)
        DQN_Stds = pd.Series(DQN_Stds).ewm(span=spans).mean()
#         DQN_Means = np.percentile(DQN_Smoothed, 50, axis=0)
#         lq = np.percentile(DQN_Smoothed, 25, axis=0)
#         uq = np.percentile(DQN_Smoothed, 75, axis=0)
        
#         if inter_quartile:
#             xs = [i for i in range(0, t_max, step)]
#             xs = xs + list(reversed(xs))
#     #         ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#             ys = np.concatenate([lq, np.flip(uq, axis=0)])
#             lls = p.patch(xs, ys, color=color, alpha=0.1)
        
        if stds:
    #         plt.fill_between([i for i in range(0, t_max, step)], DQN_Means - DQN_Stds, DQN_Means + DQN_Stds, color=color, alpha=0.2, edgecolor="white")
            # Error bars
            xs = [i for i in range(0, t_max, step)]
            xs = xs + list(reversed(xs))
            ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#             ys = np.concatenate([np.min(DQN_Smoothed, axis=0), np.flip(np.max(DQN_Smoothed, axis=0), axis=0)])
            lls = p.patch(xs, ys, color=color, alpha=0.1)

#         if stds:
#             for x, y, yerr in zip(range(0, t_max, step), DQN_Means, DQN_Stds):
#                 lls = p.line([x, x], [y - yerr, y + yerr], color=color, alpha=0.1, hover_alpha=0.8)
#                 alpha_lines.append(lls)

#         print(DQN_Rewards)
    #         plt.plot([i for i in range(0, t_max, step)], DQN_Means, color=color)
        l = p.line([i for i in range(0, t_max, step)], DQN_Means, color=color, line_width=3, alpha=0.8)
        lines.append(l)
    #         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [361]:
def plot_states(names, t_maxs, legends, directories="Logs/", step=10, title="", stds=True, color_offset=0):
#     plt.figure(figsize=(10,10))
    p = figure(width=1200, height=800, title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "States visited"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)
#     p = figure(width=800, height=500)
    lines = []
    alpha_lines = []
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names) + color_offset][color_offset:]
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in zip(names, colors, t_maxs, legends, directories):
#         print(directory)
        States_Path = directory + "*" + name + "*/logs/Player_Positions.txt"
        States = []
        for filename in glob.glob(States_Path):
            run_logs = []
            with open(filename, "r") as f:
#                 print(f)
                for line in f:
                    try:
                        ep_r_str = str(line)
                        ss = ep_r_str.split()
                        x = int(ss[0])
                        y = int(ss[1])
                        ep_r = (x,y)
#                         print(ep_r)
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            States.append(run_logs)
        States_Visited = []
        xs = []
        for run in States:
            Visited = []
            States_Sets = set()
            xs = []
#             print(len(run))
            for ii, s in enumerate(run):
                if ii > t_max:
                    break
#                 print(s)
                States_Sets.add(s)
                if ii % step == 0 or ii == len(run) - 1:
                    Visited.append(len(States_Sets))
                    xs.append(ii)
#             print(len(States_Sets))
#             print(len(Visited))
            States_Visited.append(Visited)
#         print(np.array(States_Visited).shape)
        Means = np.mean(States_Visited, axis=0)
        Stds = np.std(States_Visited, axis=0)
#         Means = np.percentile(States_Visited, 50, axis=0)
#         lq = np.percentile(States_Visited, 25, axis=0)
#         uq = np.percentile(States_Visited, 75, axis=0)
#         print(Means.shape)
        
#         plt.fill_between([i for i in range(0, t_max, step)], DQN_Means - DQN_Stds, DQN_Means + DQN_Stds, color=color, alpha=0.2, edgecolor="white")
        # Error bars
#         xs = [i for i in range(0, t_max, step)]
        xs = xs + list(reversed(xs))
#         ys = np.concatenate([np.min(States_Visited, axis=0), np.flip(np.max(States_Visited, axis=0), axis=0)])
    
        if stds:
            ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])
#         else:
#             ys = np.concatenate([lq, np.flip(uq, axis=0)])
#         print(len(xs), len(ys))
        lls = p.patch(xs, ys, color=color, alpha=0.1)

        
#         for x, y, yerr in zip(range(0, t_max, step), DQN_Means, DQN_Stds):
#             lls = p.line([x, x], [y - yerr, y + yerr], color=color, alpha=0.1, hover_alpha=0.8)
#             alpha_lines.append(lls)
            

#         plt.plot([i for i in range(0, t_max, step)], DQN_Means, color=color)
        l = p.line([i * step for i in range(len(Means))], Means, color=color, line_width=2)
        lines.append(l)
#         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [154]:
def plot_percentages(names, t_maxs, legends, directories="Logs/", step=10, title="", min_max=True, eps_average=3):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
    p = figure(width=1200, height=800, y_range=(-0.01, 1.01), title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Successful Agents"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):

        Eval_Rewards_Path = directory + "*" + name + "*/logs/Eval_Rewards*.txt"
        Eval_Rewards = []

        for filename in glob.glob(Eval_Rewards_Path):
            run_logs = [0]
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            Eval_Rewards.append(run_logs)
            
#         print(Eval_Rewards)
        
        Win_Percentages = []
        for i in range(100):
            rr = 0
            for reward in Eval_Rewards:
                rr += reward[i]
            Win_Percentages.append(rr / len(Eval_Rewards))

#         Win_Percentages = []
#         for times, rewards in zip(DQN_Lengths_C, DQN_Rewards):
#             # Linear interpolation
#             spline_rewards = spline(times, rewards, [i for i in range(0, t_max, step)], order=1)
#             DQN_Smoothed.append(spline_rewards)
            
            
#         DQN_Means = np.mean(DQN_Smoothed, axis=0)
        
#         Win_Percentages = []
# #         print(len(DQN_Smoothed), len(DQN_Smoothed[0]))
#         np_smoothed = np.array(DQN_Rewards)
#         print(np_smoothed)
# #         print(np_smoothed.shape)
#         for tt in range(len(DQN_Means)):
# #             print(DQN_Smoothed)
#             rewards = np_smoothed[:,tt]
#             percentage = len([1 for r in rewards if r > 0])/(len(rewards) * 1.0)
#             Win_Percentages.append(percentage)

#         print(len(DQN_Means), len(Win_Percentages))
#         DQN_Stds = np.std(DQN_Smoothed, axis=0)

#         xs = [i for i in range(0, t_max, step)]
#         xs = xs + list(reversed(xs))
#         # Show error bars between min and max runs
#         if min_max:
#             ys = np.concatenate([np.min(DQN_Smoothed, axis=0), np.flip(np.max(DQN_Smoothed, axis=0), axis=0)])
#         # +- 1 Std
#         else:
#             ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#         lls = p.patch(xs, ys, color=color, alpha=0.1)
        l = p.line([i + 1 for i in range(100)], Win_Percentages, color=color, line_width=2)
#         l = p.line([i for i in range(0, t_max, step)], DQN_Means, color=color, line_width=2)
        lines.append(l)
    #         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [313]:
def action_selections(names, t_maxs, legends, directories="Logs/", step=10, title="", Frontier=10, Unknown=2, stds=True, known_states=False, unknown_available=False, start=1000):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
#     colors = [i for i in range(len(names))]
#     colors_n = len(colors)
#     colors_to_use = [Reds[9][:colors_n], Blues[9][:colors_n], Greens[9][:colors_n]]
    p = figure(width=1200, height=800, title=title, y_range=(0,1))
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Frontier Actions"
    if known_states:
        p.yaxis.axis_label = "% Known Actions"
    if unknown_available:
        p.yaxis.axis_label = "% Frontier Actions if Unknown state available"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):

        Action_Counts_Path = directory + "*" + name + "*/logs/Action_Counts.txt"
        
        Current_State_Counts = []
        Available_Action_Counts = []
        Chosen_Action_Counts = []

        for filename in tqdm(glob.glob(Action_Counts_Path)):
        
            currents = []
            availables = []
            chosen = []
        
            with open(filename, "r") as f:
                for line in f:
                    if line != "":
                        nums = line.split()
                        if nums == []:
                            continue
                        nums = [int(v) for v in nums]
                        currents.append(nums[0])
                        availables.append(nums[1:-1])
                        chosen.append(nums[-1])
            Current_State_Counts.append(currents)
            Available_Action_Counts.append(availables)
            Chosen_Action_Counts.append(chosen)
            
        
        times_picked_unknown = []
        times_picked_frontier = []
        times_picked_known = []
        for cs, aacs, cacs in zip(Current_State_Counts, Available_Action_Counts, Chosen_Action_Counts):
            us = [0]
            fs = [0]
            ks = [0]
            total_frontier_actions = [1]
            for index, state_count, next_counts, chosen_count in zip([i for i in range(len(cs))], cs, aacs, cacs):
                list_to_add = [0,0,0]
                unknown_true = False
                if unknown_available:
                    if state_count <= Frontier:
                        # At the frontier
                        if any(k < Unknown for k in next_counts):
                            #There is an unknown state
                            unknown_true = True
                if (not unknown_available and not known_states and state_count <= Frontier) or (known_states and state_count > Frontier) or unknown_true:
                    list_to_add = [0,0,1]
                    if index != 0:
                        total_frontier_actions.append(total_frontier_actions[-1] + 1)
                    if chosen_count <= Frontier and chosen_count >= Unknown:
                        list_to_add = [0,1,0]
                    elif chosen_count < Unknown:
                        list_to_add = [1,0,0]
                else:
                    total_frontier_actions.append(total_frontier_actions[-1])
                us.append(us[-1] + list_to_add[0])
                fs.append(fs[-1] + list_to_add[1])
                ks.append(ks[-1] + list_to_add[2])
            
            frontier_actions = np.array(total_frontier_actions)[:t_max]
                
            times_picked_unknown.append(np.array(us)[:t_max]/frontier_actions)
            times_picked_frontier.append(np.array(fs)[:t_max]/frontier_actions)
            times_picked_known.append(np.array(ks)[:t_max]/frontier_actions)
        
#         line_colors = [colors_to_use[0][color], colors_to_use[1][color], colors_to_use[2][color]]
        line_styles = ["solid", "dashed", "dotted"]
        for g, cc in zip([times_picked_unknown, times_picked_frontier, times_picked_known], line_styles):
            Means = np.mean(g, axis=0)[start:][::step]
            Stds = np.std(g, axis=0)[start:][::step]
#             print(len([i for i in range(0, t_max, step)]), len(means[::step]))
            l = p.line([i for i in range(start, t_max, step)], Means, color=color, line_width=2, line_dash=cc)
            lines.append(l)
            
            if stds:
                xs = [i for i in range(start, t_max, step)]
                xs = xs + list(reversed(xs))

                ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])

                lls = p.patch(xs, ys, color=color, alpha=0.1)
                        
    legend_names = ["Unknown", "Frontier", "Known"]
    zipped_legend_names = [name + " " + type_state for name in legends for type_state in legend_names]
    new_leg = Legend(items=[(name, [line]) for name, line in zip(zipped_legend_names, lines)], location=(0,-30))
#     new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")

    show(p)

In [None]:
def action_selections_state(names, t_maxs, legends, directories="Logs/", step=10, title="", Frontier=10, Unknown=2, stds=True, known_states=False, unknown_available=False, start=1000):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
#     colors = [i for i in range(len(names))]
#     colors_n = len(colors)
#     colors_to_use = [Reds[9][:colors_n], Blues[9][:colors_n], Greens[9][:colors_n]]
    p = figure(width=1200, height=800, title=title, y_range=(0,1))
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Frontier Actions"
    if known_states:
        p.yaxis.axis_label = "% Known Actions"
    if unknown_available:
        p.yaxis.axis_label = "% Frontier Actions if Unknown state available"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):

        Action_Counts_Path = directory + "*" + name + "*/logs/Action_Counts.txt"
        
        Current_State_Counts = []
        Available_Action_Counts = []
        Chosen_Action_Counts = []

        for filename in tqdm(glob.glob(Action_Counts_Path)):
        
            currents = []
            availables = []
            chosen = []
        
            with open(filename, "r") as f:
                for line in f:
                    if line != "":
                        nums = line.split()
                        if nums == []:
                            continue
                        nums = [int(v) for v in nums]
                        currents.append(nums[0])
                        availables.append(nums[1:-1])
                        chosen.append(nums[-1])
            Current_State_Counts.append(currents)
            Available_Action_Counts.append(availables)
            Chosen_Action_Counts.append(chosen)
            
        
        times_picked_unknown = []
        times_picked_frontier = []
        times_picked_known = []
        for cs, aacs, cacs in zip(Current_State_Counts, Available_Action_Counts, Chosen_Action_Counts):
            us = [0]
            fs = [0]
            ks = [0]
            total_frontier_actions = [1]
            for index, state_count, next_counts, chosen_count in zip([i for i in range(len(cs))], cs, aacs, cacs):
                list_to_add = [0,0,0]
                unknown_true = False
                if unknown_available:
                    if state_count <= Frontier:
                        # At the frontier
                        if any(k < Unknown for k in next_counts):
                            #There is an unknown state
                            unknown_true = True
                if (not unknown_available and not known_states and state_count <= Frontier) or (known_states and state_count > Frontier) or unknown_true:
                    list_to_add = [0,0,1]
                    if index != 0:
                        total_frontier_actions.append(total_frontier_actions[-1] + 1)
                    if chosen_count <= Frontier and chosen_count >= Unknown:
                        list_to_add = [0,1,0]
                    elif chosen_count < Unknown:
                        list_to_add = [1,0,0]
                else:
                    total_frontier_actions.append(total_frontier_actions[-1])
                us.append(us[-1] + list_to_add[0])
                fs.append(fs[-1] + list_to_add[1])
                ks.append(ks[-1] + list_to_add[2])
            
            frontier_actions = np.array(total_frontier_actions)[:t_max]
                
            times_picked_unknown.append(np.array(us)[:t_max]/frontier_actions)
            times_picked_frontier.append(np.array(fs)[:t_max]/frontier_actions)
            times_picked_known.append(np.array(ks)[:t_max]/frontier_actions)
        
#         line_colors = [colors_to_use[0][color], colors_to_use[1][color], colors_to_use[2][color]]
        line_styles = ["solid", "dashed", "dotted"]
        for g, cc in zip([times_picked_unknown, times_picked_frontier, times_picked_known], line_styles):
            Means = np.mean(g, axis=0)[start:][::step]
            Stds = np.std(g, axis=0)[start:][::step]
#             print(len([i for i in range(0, t_max, step)]), len(means[::step]))
            l = p.line([i for i in range(start, t_max, step)], Means, color=color, line_width=2, line_dash=cc)
            lines.append(l)
            
            if stds:
                xs = [i for i in range(start, t_max, step)]
                xs = xs + list(reversed(xs))

                ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])

                lls = p.patch(xs, ys, color=color, alpha=0.1)
                        
    legend_names = ["Unknown", "Frontier", "Known"]
    zipped_legend_names = [name + " " + type_state for name in legends for type_state in legend_names]
    new_leg = Legend(items=[(name, [line]) for name, line in zip(zipped_legend_names, lines)], location=(0,-30))
#     new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")

    show(p)

In [216]:
def frontier_times(names, t_maxs, legends, directories="Logs/", step=10, title="", Frontier=10, Unknown=2, stds=True):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
#     colors = [i for i in range(len(names))]
#     colors_n = len(colors)
#     colors_to_use = [Reds[9][:colors_n], Blues[9][:colors_n], Greens[9][:colors_n]]
    p = figure(width=1200, height=800, title=title, y_range=(0,1))
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Frontier States"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):

        Action_Counts_Path = directory + "*" + name + "*/logs/Action_Counts.txt"
        
        Current_State_Counts = []
        Available_Action_Counts = []
        Chosen_Action_Counts = []

        for filename in tqdm(glob.glob(Action_Counts_Path)):
        
            currents = []
            availables = []
            chosen = []
        
            with open(filename, "r") as f:
                for line in f:
                    if line != "":
                        nums = line.split()
                        if nums == []:
                            continue
                        nums = [int(v) for v in nums]
                        currents.append(nums[0])
                        availables.append(nums[1:-1])
                        chosen.append(nums[-1])
            Current_State_Counts.append(currents)
            Available_Action_Counts.append(availables)
            Chosen_Action_Counts.append(chosen)
            
        
        times_picked_unknown = []
        times_picked_frontier = []
        times_picked_known = []
        for cs, aacs, cacs in zip(Current_State_Counts, Available_Action_Counts, Chosen_Action_Counts):
            us = [0]
            fs = [0]
            ks = [0]
            total_states = [1]
            for index, state_count, next_counts, chosen_count in zip([i for i in range(len(cs))], cs, aacs, cacs):
                if state_count >= Unknown and state_count <= Frontier:
                    list_to_add = [0,1,0]
                elif state_count < Unknown:
                    list_to_add = [1, 0, 0]
                else:
                    list_to_add = [0, 0, 1]
                if index == 0:
                    total_states.append(1)
                else:
                    total_states.append(total_states[-1] + 1)
                us.append(us[-1] + list_to_add[0])
                fs.append(fs[-1] + list_to_add[1])
                ks.append(ks[-1] + list_to_add[2])
            
            frontier_actions = np.array(total_states)[1:t_max]
                
            times_picked_unknown.append(np.array(us)[1:t_max]/frontier_actions)
            times_picked_frontier.append(np.array(fs)[1:t_max]/frontier_actions)
            times_picked_known.append(np.array(ks)[1:t_max]/frontier_actions)
        
#         line_colors = [colors_to_use[0][color], colors_to_use[1][color], colors_to_use[2][color]]
        line_styles = ["solid", "dashed", "dotted"]
        for g, cc in zip([times_picked_unknown, times_picked_frontier, times_picked_known], line_styles):
            Means = np.mean(g, axis=0)[::step]
            Stds = np.std(g, axis=0)[::step]
#             print(len([i for i in range(0, t_max, step)]), len(means[::step]))
            l = p.line([i for i in range(1, t_max, step)], Means, color=color, line_width=2, line_dash=cc)
            lines.append(l)
            
            if stds:
                xs = [i for i in range(1, t_max, step)]
                xs = xs + list(reversed(xs))

                ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])

                lls = p.patch(xs, ys, color=color, alpha=0.1)
                        
    legend_names = ["Frontier", "Unknown", "Known"]
    zipped_legend_names = [name + " " + type_state for name in legends for type_state in legend_names]
    new_leg = Legend(items=[(name, [line]) for name, line in zip(zipped_legend_names, lines)], location=(0,-30))
#     new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")

    show(p)

In [347]:
# Maze
names = []
legends = []
t_maxs = []
      
# for step in [10, 100]:
#     for gamma in [0.99, 0.9999]:
#         names += ["_{}_stp_*_Gamma_{}_".format(step, gamma)]
#         legends += ["{} Step {} Gamma".format(step, gamma)]
#         t_maxs += [600001]

# for bandit in [0.0001, 0.001, 0.01, 0.1]:
#     names += ["Bandit_{}_".format(bandit)]
#     legends += ["{} Tau UCB".format(bandit)]
#     t_maxs += [800001]
    
# for bandit in [0.001, 0.01, 0.1]:
#     names += ["Bandit_{}_".format(bandit)]
#     legends += ["{} Tau UCB".format(bandit)]
#     t_maxs += [800001]
    
#     names += ["OptimisticAction_{}_Scaler".format(bandit)]
#     legends += ["{} Tau".format(bandit)]
#     t_maxs += [800001]

tt = 500000
for alpha in [0.3, 0.4, 0.5, 0.6, 0.7]:
    names += ["Prioritized_{}_*_Bandit_".format(alpha)]
    legends += ["{} Alpha Bandit".format(alpha)]
    t_maxs += [tt]
    
    names += ["Prioritized_{}_*_CEps_".format(alpha)]
    legends += ["{} Alpha".format(alpha)]
    t_maxs += [tt]
    
names += ["Big_Bandit_".format(alpha)]
legends += ["Bandit".format(alpha)]
t_maxs += [tt]

names += ["Big_CEps_".format(alpha)]
legends += ["CountEps".format(alpha)]
t_maxs += [tt]

# direc = "/home/tabz/tmp/logfiles/MedMaze12_Uniform_400_500/"
# direc = ["MedMaze12_Prioritised/" for _ in range(3)] + ["MedMaze12_Uniform/"]
# direc = ["/home/scratch/tabhid/Log_Files/" + d for d in direc]
# direc = "/home/scratch/tabhid/Log_Files/MedMaze14_Uniform_Test/"
direc = "/data/savitar/tabhid/Old_Logs/FrontierFocused/ThinMaze6/Test/"
env = "Thin Maze 6"
directories = direc
# directories = ["/data/savitar/tabhid/Old_Logs/FrontierFocused/ThinMaze14/UCB_Bandit/" for _ in range(4)]
# directories += ["/data/savitar/tabhid/Old_Logs/FrontierFocused/ThinMaze14/Epsilon_Schedule_FrontierActions/" for _ in range(3)]
    
print(names)
print(t_maxs)
print(directories)
print("\n", "{} Lines".format(len(names)))

['Prioritized_0.3_*_Bandit_', 'Prioritized_0.3_*_CEps_', 'Prioritized_0.4_*_Bandit_', 'Prioritized_0.4_*_CEps_', 'Prioritized_0.5_*_Bandit_', 'Prioritized_0.5_*_CEps_', 'Prioritized_0.6_*_Bandit_', 'Prioritized_0.6_*_CEps_', 'Prioritized_0.7_*_Bandit_', 'Prioritized_0.7_*_CEps_', 'Big_Bandit_', 'Big_CEps_']
[500000, 500000, 500000, 500000, 500000, 500000, 500000, 500000, 500000, 500000, 500000, 500000]
/data/savitar/tabhid/Old_Logs/FrontierFocused/ThinMaze6/Test/

 12 Lines


In [171]:
# Doom Stuff
names = []
legends = []
t_maxs = []
        
# names += ["_XpSize_500k_BonusClip"]
# legends += ["Uniform"]
# t_maxs += [5000001]

# for mpseudo in [0.1, 1]:          
#     names += ["_{}_MinusPseudo".format(mpseudo)]
#     legends += ["{} MinusPseudo".format(mpseudo)]
#     t_maxs += [3000001]
    
for bandit in [0.0001, 0]:
    names += ["OptimisticAction_{}_".format(bandit)]
    legends += ["{} Tau".format(bandit)]
    t_maxs += [3000001]
    
# names += ["500k_BonusClip"]
# legends += ["No Bandit"]
# t_maxs += [3000001]

# direc = "/home/tabz/tmp/logfiles/Doom_Uniform_1mil/"
# direc = "/home/scratch/tabhid/Log_Files/Doom_0.4_Alpha_Logs/"
direc = "/data/savitar/tabhid/Old_Logs/FrontierFocused/DoomMaze/Test/"
env = "Doom Maze"
directories = direc
# directories = [direc for _ in range(6)] + ["/data/savitar/tabhid/Old_Logs/FrontierFocused/Doom/Doom_Fixed_Ending_Uniform_Nscaling/"]
    
print(names)
print(t_maxs)
print(directories)
print("\n", "{} Lines".format(len(names)))

['OptimisticAction_0.0001_', 'OptimisticAction_0_']
[3000001, 3000001]
/data/savitar/tabhid/Old_Logs/FrontierFocused/DoomMaze/Test/

 2 Lines


In [348]:
plot_percentages(names, t_maxs, legends, directories, step=1000, title=env)

12it [00:00, 99.07it/s]


In [349]:
plot_averages_b(names, t_maxs, legends, directories=directories, step=1000, title=env, spans=200, stds=False)

spline is deprecated in scipy 0.19.0, use Bspline class instead.


In [362]:
plot_states(names, t_maxs, legends, directories=directories, step=10000, title=env, color_offset=0)

In [351]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=5, Unknown=1, start=10)

0it [00:00, ?it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:01<00:05,  1.84s/it][A
 50%|█████     | 2/4 [00:03<00:03,  1.93s/it][A
 75%|███████▌  | 3/4 [00:05<00:01,  1.95s/it][A
100%|██████████| 4/4 [00:07<00:00,  1.93s/it][A
1it [00:10, 10.66s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:02<00:06,  2.25s/it][A
 50%|█████     | 2/4 [00:04<00:04,  2.15s/it][A
 75%|███████▌  | 3/4 [00:06<00:02,  2.07s/it][A
100%|██████████| 4/4 [00:08<00:00,  2.06s/it][A
2it [00:22, 10.87s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:02<00:07,  2.43s/it][A
 50%|█████     | 2/4 [00:04<00:04,  2.33s/it][A
 75%|███████▌  | 3/4 [00:06<00:02,  2.21s/it][A
100%|██████████| 4/4 [00:08<00:00,  2.22s/it][A
3it [00:34, 11.26s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:02<00:07,  2.61s/it][A
 50%|█████     | 2/4 [00:04<00:04,  2.50s/it][A
 75%|███████▌  | 3/4 [00:07<00:02,  2.44s/it][A
100%|████████

In [352]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=5, Unknown=1, start=10, unknown_available=True)

0it [00:00, ?it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:02<00:06,  2.21s/it][A
 50%|█████     | 2/4 [00:05<00:04,  2.40s/it][A
 75%|███████▌  | 3/4 [00:08<00:02,  2.68s/it][A
100%|██████████| 4/4 [00:10<00:00,  2.48s/it][A
1it [00:13, 13.61s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:01<00:04,  1.48s/it][A
 50%|█████     | 2/4 [00:03<00:03,  1.69s/it][A
 75%|███████▌  | 3/4 [00:05<00:01,  1.88s/it][A
100%|██████████| 4/4 [00:07<00:00,  1.91s/it][A
2it [00:24, 12.77s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:02<00:06,  2.09s/it][A
 50%|█████     | 2/4 [00:03<00:03,  2.00s/it][A
 75%|███████▌  | 3/4 [00:05<00:01,  1.98s/it][A
100%|██████████| 4/4 [00:07<00:00,  2.02s/it][A
3it [00:35, 12.19s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:01<00:05,  1.75s/it][A
 50%|█████     | 2/4 [00:03<00:03,  1.86s/it][A
 75%|███████▌  | 3/4 [00:05<00:01,  1.91s/it][A
100%|████████

In [353]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=5, Unknown=1, known_states=True, start=10)

0it [00:00, ?it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:02<00:06,  2.09s/it][A
 50%|█████     | 2/4 [00:04<00:04,  2.09s/it][A
 75%|███████▌  | 3/4 [00:06<00:02,  2.13s/it][A
100%|██████████| 4/4 [00:08<00:00,  2.05s/it][A
1it [00:11, 11.89s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:02<00:06,  2.19s/it][A
 50%|█████     | 2/4 [00:04<00:04,  2.12s/it][A
 75%|███████▌  | 3/4 [00:06<00:02,  2.09s/it][A
100%|██████████| 4/4 [00:08<00:00,  2.04s/it][A
2it [00:23, 11.81s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:01<00:05,  1.80s/it][A
 50%|█████     | 2/4 [00:03<00:03,  1.89s/it][A
 75%|███████▌  | 3/4 [00:06<00:01,  1.96s/it][A
100%|██████████| 4/4 [00:08<00:00,  2.22s/it][A
3it [00:36, 12.15s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:01<00:05,  1.72s/it][A
 50%|█████     | 2/4 [00:03<00:03,  1.76s/it][A
 75%|███████▌  | 3/4 [00:06<00:02,  2.01s/it][A
100%|████████

KeyboardInterrupt: 

In [None]:
frontier_times(names, t_maxs, legends, directories=directories, step=10000, title=env, Frontier=5, Unknown=1)