In [1]:
import numpy as np
import glob
import pandas as pd
from scipy.interpolate import interp1d
from scipy.interpolate import spline
from tqdm import tqdm
from bokeh.plotting import figure, output_notebook, show
output_notebook()

In [2]:
from bokeh.models import Legend
from bokeh.models.formatters import BasicTickFormatter
from bokeh.models import HoverTool, TapTool
from bokeh.palettes import magma, inferno, Set1, Set3, Paired, Dark2

In [3]:
def plot_averages_b(names, t_maxs, legends, directories="Logs/", step=10, title="", stds=False, spans=1000):
#     plt.figure(figsize=(10,10))
    p = figure(width=1200, height=800, y_range=(-1, 101), x_range=(0, t_maxs[0]), title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Agents Reaching the Goal"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)
               
    # Styling
    p.xaxis.axis_label_text_font_style = "normal"
    p.yaxis.axis_label_text_font_style = "normal"
               
    p.xaxis.axis_label_text_font_size = "14pt"
    p.yaxis.axis_label_text_font_size = "14pt"
               
#     p = figure(width=800, height=500)
    lines = []
    alpha_lines = []
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in zip(names, colors, t_maxs, legends, directories):
#         print(directory)
        DQN_Rewards_Path = directory + "*" + name + "*/logs/Episode_Rewards.txt"
        DQN_Lengths_Path = directory + "*" + name + "*/logs/Episode_Lengths.txt"
        DQN_Rewards = []
        DQN_Lengths = []
        DQN_Lengths_C = []
        for filename in glob.glob(DQN_Rewards_Path):
            run_logs = [0]
            with open(filename, "r") as f:
#                 print(f)
                for line in f:
                    try:
                        ep_r = float(line) * 100.0
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            DQN_Rewards.append(run_logs)
        for filename in glob.glob(DQN_Lengths_Path):
            run_logs = [0]
        #     print(filename)
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        continue
                    run_logs.append(ep_r)
            DQN_Lengths.append(run_logs)
            run_logs_c = np.cumsum(run_logs).astype(np.int32)
            DQN_Lengths_C.append(run_logs_c)
        DQN_Smoothed = []
        for times, rewards in zip(DQN_Lengths_C, DQN_Rewards):
#             linear = interp1d(times, rewards, kind="linear")
#             linear_rewards = linear([i for i in range(500000)])
#             DQN_Smoothed.append(linear_rewards)


            spline_rewards = spline(times, rewards, [i for i in range(0, t_max, step)], order=1)
            DQN_Smoothed.append(spline_rewards)


        
        DQN_Means = np.mean(DQN_Smoothed, axis=0)
        DQN_Means = pd.Series(DQN_Means).ewm(span=spans).mean()
#         print(DQN_Means)
#         DQN_Means = ewm_smoothed.mean()
        DQN_Stds = np.std(DQN_Smoothed, axis=0)
        DQN_Stds = pd.Series(DQN_Stds).ewm(span=spans).mean()
#         DQN_Means = np.percentile(DQN_Smoothed, 50, axis=0)
#         lq = np.percentile(DQN_Smoothed, 25, axis=0)
#         uq = np.percentile(DQN_Smoothed, 75, axis=0)
        
#         if inter_quartile:
#             xs = [i for i in range(0, t_max, step)]
#             xs = xs + list(reversed(xs))
#     #         ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#             ys = np.concatenate([lq, np.flip(uq, axis=0)])
#             lls = p.patch(xs, ys, color=color, alpha=0.1)
        
        if stds:
    #         plt.fill_between([i for i in range(0, t_max, step)], DQN_Means - DQN_Stds, DQN_Means + DQN_Stds, color=color, alpha=0.2, edgecolor="white")
            # Error bars
            xs = [i for i in range(0, t_max, step)]
            xs = xs + list(reversed(xs))
            ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#             ys = np.concatenate([np.min(DQN_Smoothed, axis=0), np.flip(np.max(DQN_Smoothed, axis=0), axis=0)])
            lls = p.patch(xs, ys, color=color, alpha=0.1)

#         if stds:
#             for x, y, yerr in zip(range(0, t_max, step), DQN_Means, DQN_Stds):
#                 lls = p.line([x, x], [y - yerr, y + yerr], color=color, alpha=0.1, hover_alpha=0.8)
#                 alpha_lines.append(lls)

#         print(DQN_Rewards)
    #         plt.plot([i for i in range(0, t_max, step)], DQN_Means, color=color)
        l = p.line([i for i in range(0, t_max, step)], DQN_Means, color=color, line_width=3, alpha=0.8)
        lines.append(l)
    #         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [43]:
def plot_states(names, t_maxs, legends, directories="Logs/", step=10, title="", stds=True, color_offset=0):
#     plt.figure(figsize=(10,10))
    p = figure(width=1200, height=800, title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "States visited"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)
#     p = figure(width=800, height=500)
    lines = []
    alpha_lines = []
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names) + color_offset][color_offset:]
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in zip(names, colors, t_maxs, legends, directories):
#         print(directory)
        States_Path = directory + "*" + name + "*/logs/Player_Positions.txt"
        States = []
        for filename in glob.glob(States_Path):
            run_logs = []
            with open(filename, "r") as f:
#                 print(f)
                for line in f:
                    try:
                        ep_r = str(line)
#                         print(ep_r)
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            States.append(run_logs)
        States_Visited = []
        xs = []
        for run in States:
            Visited = []
            States_Sets = set()
            xs = []
#             print(len(run))
            for ii, s in enumerate(run):
                if ii > t_max:
                    break
#                 print(s)
                States_Sets.add(s)
                if ii % step == 0 or ii == len(run) - 1:
                    Visited.append(len(States_Sets))
                    xs.append(ii)
#             print(len(States_Sets))
#             print(len(Visited))
            States_Visited.append(Visited)
#         print(np.array(States_Visited).shape)
        Means = np.mean(States_Visited, axis=0)
        Stds = np.std(States_Visited, axis=0)
#         Means = np.percentile(States_Visited, 50, axis=0)
#         lq = np.percentile(States_Visited, 25, axis=0)
#         uq = np.percentile(States_Visited, 75, axis=0)
#         print(Means.shape)
        
#         plt.fill_between([i for i in range(0, t_max, step)], DQN_Means - DQN_Stds, DQN_Means + DQN_Stds, color=color, alpha=0.2, edgecolor="white")
        # Error bars
#         xs = [i for i in range(0, t_max, step)]
        xs = xs + list(reversed(xs))
#         ys = np.concatenate([np.min(States_Visited, axis=0), np.flip(np.max(States_Visited, axis=0), axis=0)])
    
        if stds:
            ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])
#         else:
#             ys = np.concatenate([lq, np.flip(uq, axis=0)])
#         print(len(xs), len(ys))
        lls = p.patch(xs, ys, color=color, alpha=0.1)

        
#         for x, y, yerr in zip(range(0, t_max, step), DQN_Means, DQN_Stds):
#             lls = p.line([x, x], [y - yerr, y + yerr], color=color, alpha=0.1, hover_alpha=0.8)
#             alpha_lines.append(lls)
            

#         plt.plot([i for i in range(0, t_max, step)], DQN_Means, color=color)
        l = p.line([i for i in range(0, t_max, step)], Means, color=color, line_width=2)
        lines.append(l)
#         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [5]:
def plot_percentages(names, t_maxs, legends, directories="Logs/", step=10, title="", min_max=True, eps_average=3):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
    p = figure(width=1200, height=800, y_range=(-0.01, 1.01), title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Successful Agents"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):

        Eval_Rewards_Path = directory + "*" + name + "*/logs/Eval_Rewards*.txt"
        Eval_Rewards = []

        for filename in glob.glob(Eval_Rewards_Path):
            run_logs = [0]
            with open(filename, "r") as f:
                for line in f:
                    try:
                        ep_r = float(line)
                    except:
                        print("Nothing here for:", filename)
                        continue
                    run_logs.append(ep_r)
            Eval_Rewards.append(run_logs)
            
#         print(Eval_Rewards)
        
        Win_Percentages = []
        for i in range(100):
            rr = 0
            for reward in Eval_Rewards:
                rr += reward[i]
            Win_Percentages.append(rr / len(Eval_Rewards))

#         Win_Percentages = []
#         for times, rewards in zip(DQN_Lengths_C, DQN_Rewards):
#             # Linear interpolation
#             spline_rewards = spline(times, rewards, [i for i in range(0, t_max, step)], order=1)
#             DQN_Smoothed.append(spline_rewards)
            
            
#         DQN_Means = np.mean(DQN_Smoothed, axis=0)
        
#         Win_Percentages = []
# #         print(len(DQN_Smoothed), len(DQN_Smoothed[0]))
#         np_smoothed = np.array(DQN_Rewards)
#         print(np_smoothed)
# #         print(np_smoothed.shape)
#         for tt in range(len(DQN_Means)):
# #             print(DQN_Smoothed)
#             rewards = np_smoothed[:,tt]
#             percentage = len([1 for r in rewards if r > 0])/(len(rewards) * 1.0)
#             Win_Percentages.append(percentage)

#         print(len(DQN_Means), len(Win_Percentages))
#         DQN_Stds = np.std(DQN_Smoothed, axis=0)

#         xs = [i for i in range(0, t_max, step)]
#         xs = xs + list(reversed(xs))
#         # Show error bars between min and max runs
#         if min_max:
#             ys = np.concatenate([np.min(DQN_Smoothed, axis=0), np.flip(np.max(DQN_Smoothed, axis=0), axis=0)])
#         # +- 1 Std
#         else:
#             ys = np.concatenate([DQN_Means - DQN_Stds, np.flip(DQN_Means + DQN_Stds, axis=0)])
#         lls = p.patch(xs, ys, color=color, alpha=0.1)
        l = p.line([i + 1 for i in range(100)], Win_Percentages, color=color, line_width=2)
#         l = p.line([i for i in range(0, t_max, step)], DQN_Means, color=color, line_width=2)
        lines.append(l)
    #         alpha_lines.append(lls)

#     plt.title("Episode_Reward")
#     plt.xlabel("T")
#     plt.ylabel("Reward")
#     if legends is not None:
#         plt.legend(legends)
#     plt.show()
#     p.line([0,1], [-1,3])
    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")
#     p.add_tools(HoverTool(renderers=alpha_lines))
#     p.add_tools(TapTool(renderers=alpha_lines))
    show(p)

In [101]:
def action_selections(names, t_maxs, legends, directories="Logs/", step=10, title="", Frontier=10, Unknown=2, stds=True):
    if len(names) <= 2:
        colors = ["red", "green"]
    elif len(names) > 10:
        colors = Paired[len(names)]
    else:
        colors = Set1[len(names)]
    p = figure(width=1200, height=800, title=title)
    p.toolbar_location = "above"
    p.yaxis.axis_label = "% Successful Agents"
    p.xaxis.axis_label = "T"
    p.xaxis.formatter = BasicTickFormatter(use_scientific=False)

    lines = []
    alpha_lines = []
    if isinstance(directories, str):
        dd = directories
        directories = [dd for _ in names]
    for name, color, t_max, legend, directory in tqdm(zip(names, colors, t_maxs, legends, directories)):

        Action_Counts_Path = directory + "*" + name + "*/logs/Action_Counts.txt"
        
        Current_State_Counts = []
        Available_Action_Counts = []
        Chosen_Action_Counts = []

        for filename in tqdm(glob.glob(Action_Counts_Path)):
        
            currents = []
            availables = []
            chosen = []
        
            with open(filename, "r") as f:
                for line in f:
                    if line != "":
                        nums = line.split()
                        if nums == []:
                            continue
                        nums = [int(v) for v in nums]
                        currents.append(nums[0])
                        availables.append(nums[1:-1])
                        chosen.append(nums[-1])
            Current_State_Counts.append(currents)
            Available_Action_Counts.append(availables)
            Chosen_Action_Counts.append(chosen)
            
        
        times_picked_unknown = []
        times_picked_frontier = []
        times_picked_known = []
        for cs, aacs, cacs in zip(Current_State_Counts, Available_Action_Counts, Chosen_Action_Counts):
            us = [0]
            fs = [0]
            ks = [0]
            total_frontier_actions = [1]
            for index, state_count, next_counts, chosen_count in zip([i for i in range(len(cs))], cs, aacs, cacs):
                list_to_add = [0,0,0]
                if state_count <= Frontier:
                    list_to_add = [0,0,1]
                    if index != 0:
                        total_frontier_actions.append(total_frontier_actions[-1] + 1)
                    if chosen_count <= Frontier and chosen_count >= Unknown:
                        list_to_add = [0,1,0]
                    elif chosen_count < Unknown:
                        list_to_add = [1,0,0]
                else:
                    total_frontier_actions.append(total_frontier_actions[-1])
                us.append(us[-1] + list_to_add[0])
                fs.append(fs[-1] + list_to_add[1])
                ks.append(ks[-1] + list_to_add[2])
            
            frontier_actions = np.array(total_frontier_actions)[:t_max]
                
            times_picked_unknown.append(np.array(us)[:t_max]/frontier_actions)
            times_picked_frontier.append(np.array(fs)[:t_max]/frontier_actions)
            times_picked_known.append(np.array(ks)[:t_max]/frontier_actions)
        
        for g, cc in zip([times_picked_unknown, times_picked_frontier, times_picked_known], ["red", "green", "blue"]):
            Means = np.mean(g, axis=0)[::step]
            Stds = np.std(g, axis=0)[::step]
#             print(len([i for i in range(0, t_max, step)]), len(means[::step]))
            l = p.line([i for i in range(0, t_max, step)], Means, color=cc, line_width=2)
            lines.append(l)
            
            if stds:
                xs = [i for i in range(0, t_max, step)]
                xs = xs + list(reversed(xs))

                ys = np.concatenate([Means - Stds, np.flip(Means + Stds, axis=0)])

                lls = p.patch(xs, ys, color=cc, alpha=0.1)
                        

    new_leg = Legend(items=[(name, [line]) for name, line in zip(legends, lines)], location=(0,-30))
    p.add_layout(new_leg, "right")

    show(p)

In [66]:
# Maze
names = []
legends = []
t_maxs = []
      

# for bandit in [0.0001, 0.001, 0.01, 0.1, 1, 10]:
#     names += ["OptimisticAction_{}_".format(bandit)]
#     legends += ["{} Tau".format(bandit)]
#     t_maxs += [1200001]

# names += ["Thin_Maze_14_*_XpSize_{}k".format(100)]
# legends += ["Maze 14 {}k Xp".format(100)]
# t_maxs += [1200001]

for xp in [100]:
    names += ["_XpSize_{}k".format(xp)]
    legends += ["{}k Xp".format(xp)]
    t_maxs += [1200001]

# for bulk in [5, 10, 20, 50]:
#     names += ["Big_BonusClip_*_{}k_Reward_{}k_RIters".format(bulk, bulk)]
#     legends += ["{}k Uniform".format(bulk)]
#     t_maxs += [1200001]
    
# for bulk in [5, 10, 20, 50]:
#     names += ["Big_Prioritized_*_{}k_Reward_{}k_RIters".format(bulk, bulk)]
#     legends += ["{}k Prioritised".format(bulk)]
#     t_maxs += [1200001]

# direc = "/home/tabz/tmp/logfiles/MedMaze12_Uniform_400_500/"
# direc = ["MedMaze12_Prioritised/" for _ in range(3)] + ["MedMaze12_Uniform/"]
# direc = ["/home/scratch/tabhid/Log_Files/" + d for d in direc]
# direc = "/home/scratch/tabhid/Log_Files/MedMaze14_Uniform_Test/"
direc = "/data/savitar/tabhid/Old_Logs/FrontierFocused/ThinMaze18_v2/Uniform_Xp_Sizes/"
env = "Thin Maze 18v2 Uniform Xp Sizes"
directories = direc
# directories = [direc for _ in range(8)] + ["/data/savitar/tabhid/Old_Logs/FrontierFocused/ThinMaze14/Epsilon_Decay/"]
    
print(names)
print(t_maxs)
print(directories)
print("\n", "{} Lines".format(len(names)))

['_XpSize_100k']
[1200001]
/data/savitar/tabhid/Old_Logs/FrontierFocused/ThinMaze18_v2/Uniform_Xp_Sizes/

 1 Lines


In [16]:
# Doom Stuff
names = []
legends = []
t_maxs = []
        
# names += ["_XpSize_500k_BonusClip"]
# legends += ["Uniform"]
# t_maxs += [5000001]

# for mpseudo in [0.1, 1]:          
#     names += ["_{}_MinusPseudo".format(mpseudo)]
#     legends += ["{} MinusPseudo".format(mpseudo)]
#     t_maxs += [3000001]
    
for bandit in [0.00001, 0.0001, 0.001, 0]:
    names += ["OptimisticAction_{}_".format(bandit)]
    legends += ["{} Tau".format(bandit)]
    t_maxs += [3000001]
    
# names += ["500k_BonusClip"]
# legends += ["No Bandit"]
# t_maxs += [3000001]

# direc = "/home/tabz/tmp/logfiles/Doom_Uniform_1mil/"
# direc = "/home/scratch/tabhid/Log_Files/Doom_0.4_Alpha_Logs/"
direc = "/data/savitar/tabhid/Old_Logs/FrontierFocused/DoomHard/Bandit/"

env = "Doom Hard Bandit"
directories = direc
# directories = [direc for _ in range(6)] + ["/data/savitar/tabhid/Old_Logs/FrontierFocused/Doom/Doom_Fixed_Ending_Uniform_Nscaling/"]
    
print(names)
print(t_maxs)
print(directories)
print("\n", "{} Lines".format(len(names)))

['OptimisticAction_1e-05_', 'OptimisticAction_0.0001_', 'OptimisticAction_0.001_', 'OptimisticAction_0_']
[3000001, 3000001, 3000001, 3000001]
/data/savitar/tabhid/Old_Logs/FrontierFocused/DoomHard/Bandit/

 4 Lines


In [60]:
plot_percentages(names, t_maxs, legends, directories, step=1000, title=env)

8it [00:00, 97.24it/s]


In [61]:
plot_averages_b(names, t_maxs, legends, directories=directories, step=1000, title=env, spans=200, stds=False)

spline is deprecated in scipy 0.19.0, use Bspline class instead.


In [62]:
plot_states(names, t_maxs, legends, directories=directories, step=10000, title=env, color_offset=0)

In [None]:
action_selections(names, t_maxs, legends, directories=directories, step=10000, title=env)

0it [00:00, ?it/s]
  0%|          | 0/8 [00:00<?, ?it/s][A
 12%|█▎        | 1/8 [00:03<00:24,  3.48s/it][A
 25%|██▌       | 2/8 [00:06<00:20,  3.48s/it][A
 38%|███▊      | 3/8 [00:11<00:19,  3.91s/it][A
 50%|█████     | 4/8 [00:15<00:15,  3.86s/it][A
 62%|██████▎   | 5/8 [00:20<00:12,  4.21s/it][A
 75%|███████▌  | 6/8 [00:24<00:07,  4.00s/it][A
 88%|████████▊ | 7/8 [00:27<00:03,  3.89s/it][A
100%|██████████| 8/8 [00:33<00:00,  4.33s/it][A
[A