In [None]:
from tqdm import tqdm
%run model_evaluation
import torch
from torch import nn, optim
from sklearn.metrics import accuracy_score, confusion_matrix
from collections import defaultdict

import matplotlib.pyplot as plt
import proplot as pplt
import umap

# model, obs_rms, kwargs = load_model_and_env('nav_auxiliary_tasks/nav_aux_wall_1', 0)
# env = gym.make('NavEnv-v0', **kwargs)

save = 'plots/representation_learning/'

%run representation_analysis
%run model_evaluation


def gaussian_smooth(pos, y, extent=(5, 295), num_grid=30, sigma=10,
                    ret_hasval=False):
    # a = stacked['shared_activations'][0, :, 0].numpy()
    y = np.array(y)
    
    grid = np.linspace(extent[0], extent[1], num_grid)
    xs, ys = np.meshgrid(grid, grid)
    ys = ys[::-1]
    smoothed = np.zeros(xs.shape)
    hasval = np.zeros(xs.shape)
    for i in range(num_grid):
        for j in range(num_grid):
            p = np.array([xs[i, j], ys[i, j]])
            dists = np.sqrt(np.sum((pos - p)**2, axis=1))
            g = np.exp(-dists**2 / (2*sigma**2))
            
            if len(g[g > 0.1]) < 1:
                val = 0
            else:
                val = np.sum(y[g > 0.1] * g[g > 0.1]) / np.sum(g[g > 0.1])
                hasval[i, j] = 1

            smoothed[i, j] = val
    if ret_hasval:
        return smoothed, hasval
    else:
        return smoothed


def clean_eps(eps, prune_first=5, activations_key='shared_activations',
             activations_layer=0, clip=False,
             save_inview=True, save_seen=True):
    '''Clean up an eps data dictionary collected from evalu for heatmapping'''
    dones = eps['dones'].copy()
    pos = np.vstack(eps['data']['pos'])
    stacked = stack_activations(eps['activations'])
    angles = eps['data']['angle']
    
    activ = stacked[activations_key][activations_layer, :, :].numpy()
    pinview = np.array(eps['data']['poster_in_view'])
    pseen = np.array(eps['data']['poster_seen'])
    
    ep_activ = split_by_ep(activ, dones)
    ep_pos = split_by_ep(pos, dones)
    ep_pinview = split_by_ep(pinview, dones)
    ep_angle = split_by_ep(angles, dones)
    ep_pseen = split_by_ep(pseen, dones)
    
    if prune_first and prune_first > 0:
        prune_first = 5
        pruned_ep_activ = [a[prune_first:] for a in ep_activ]
        pruned_activ = np.vstack(pruned_ep_activ)
        pruned_ep_pos = [p[prune_first:] for p in ep_pos]
        pruned_pos = np.vstack(pruned_ep_pos)
        pruned_ep_pinview = [p[prune_first:] for p in ep_pinview]
        pruned_pinview = np.concatenate(pruned_ep_pinview)
        pruned_ep_angles = [p[prune_first:] for p in ep_angle]
        pruned_angles = np.concatenate(pruned_ep_angles)
        pruned_ep_pseen = [p[prune_first:] for p in ep_pseen]
        pruned_pseen = np.concatenate(pruned_ep_pseen)
        
        pos = pruned_pos
        activ = pruned_activ
        pinview = pruned_pinview
        angles = pruned_angles
        pseen = pruned_pseen
    
    if clip:
        activ = np.clip(activ, 0, 1)
    
    result_dict = {
        'pos': pos,
        'activ': activ,
        'pinview': pinview,
        'pseen': pseen,
        'angles': angles,
    }
    
    if save_inview:
        result_dict.update({
            'pos_inview': pos[pinview],
            'pos_notinview': pos[~pinview],
            'activ_inview': activ[pinview],
            'activ_notinview': activ[~pinview],
            'angles_inview': angles[pinview],
            'angles_notinview': angles[~pinview],
        })
    if save_seen:
        result_dict.update({'pos_seen': pos[pseen],
        'pos_notseen': pos[~pseen],
        'activ_seen': activ[pseen],
        'activ_notseen': activ[~pseen],
        'angles_seen': angles[pseen],
        'angles_notseen': angles[~pseen],
        })
    
    return result_dict
    
    
def stack_all_ep(all_ep):
    '''
    When making a list of results from multiple evalu calls,
    this function can be called to put the relevant data into a single dict to be
    passed to clean_eps for processing
    '''
    dones = np.concatenate([ep['dones'] for ep in all_ep])
    pos = np.vstack([ep['data']['pos'] for ep in all_ep])
    angles = np.concatenate([ep['data']['angle'] for ep in all_ep])
    pseen = np.concatenate([ep['data']['poster_seen'] for ep in all_ep])
    pinview = np.concatenate([ep['data']['poster_in_view'] for ep in all_ep])
    activations = []
    for ep in all_ep:
        activations += ep['activations']

    eps = {
        'dones': dones,
        'activations': activations,
        'data': {
            'pos': pos,
            'angle': angles,
            'poster_seen': pseen,
            'poster_in_view': pinview
        }
    }
    return eps
    

    
def split_by_angle(target, angles):
    splits = {
        0: [-np.pi/4, np.pi/4],
        1: [np.pi/4, 3*np.pi/4],
        3: [-3*np.pi/4, -np.pi/4],
        2: None #this will use else statement otherwise bounds are annoying
    }
    all_trues = np.zeros(angles.shape) == 1
    result = {}
    
    for s in [0, 1, 3]:
        split = splits[s]
        split_idxs = (split[0] <= angles) & (angles <= split[1])
        all_trues = all_trues | split_idxs
        
        result[s] = target[split_idxs]
    #finally, the ones that didn't fit into any of the other quadrants
    result[2] = target[~all_trues]
    
    return result
    
    
# def filter_all_ep_directness(all_ep, bound=0.9):
    
    
def compute_directness(all_ep=None, ep=None):
    '''
    Compute the directness of paths taken either from an all_ep (split up
    eps generated from appending evalu() calls) or from a single ep
    '''
    goal_loc = np.array([250, 70])

    if all_ep is not None:
        directnesses = []
        for i in range(len(all_ep)):
            p = np.vstack(all_ep[i]['data']['pos'])
            d = p - goal_loc
            d = np.sqrt(np.sum(d**2, axis=1))
            dist_changes = np.diff(d)
            directness = np.sum(dist_changes[:-1] < 0) / np.sum(dist_changes[:-1] != 0)
            directnesses.append(directness)
        return np.array(directnesses)
    elif ep is not None:
        p = np.vstack(ep['data']['pos'])
        d = p - goal_loc
        d = np.sqrt(np.sum(d**2, axis=1))
        dist_changes = np.diff(d)
        directness = np.sum(dist_changes[:-1] < 0) / np.sum(dist_changes[:-1] != 0)
        return directness
    else:
        raise Exception('No proper parameters given')
            

# Running Experiments

In [None]:
model_name = 'nav_poster_netstructure/nav_pdistal_width4batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, 0)
eps = evalu(model, obs_rms, env_kwargs=kwargs, n=500,
      data_callback=poster_data_callback, with_activations=True)

stacked = stack_activations(eps['activations'])
actions = np.vstack(eps['actions']).squeeze()
pos = np.vstack(eps['data']['pos'])



In [None]:

fig, ax = pplt.subplots(nrows=2, ncols=3)
sigmas = [2, 5, 10, 20, 40, 80]

ax.format(title=[f'\sigma = {sigma}' for sigma in sigmas])
for i, sigma in enumerate(sigmas):    
    smoothed = gaussian_smooth(pos, stacked['shared_activations'][0, :, 0], sigma=sigma)
    ax[i].imshow(smoothed, cmap='div', extent=(0, 300, 0, 300))

In [None]:

fig, ax = pplt.subplots(ncols=3, nrows=4)
ax.format(toplabels=['Clipped Heatmap', 'Heatmap', 'Scatter'])

for i in range(4):
    m = ax[i, 2].scatter(pos.T[0], pos.T[1], c=stacked['shared_activations'][0, :, i], alpha=0.2)
    ax[i, 2].colorbar(m)
    
    a = np.clip(stacked['shared_activations'][0, :, i].numpy(), 0, 1)
    # heatmap, _, _ = np.histogram2d(pos.T[0], pos.T[1], weights=a, bins=30)
    heatmap = gaussian_smooth(pos, a)
    ax[i, 0].imshow(heatmap, extent=(0, 300, 0, 300), cmap='div', vmin=-1, vmax=1)

    a = stacked['shared_activations'][0, :, i].numpy()
    # heatmap, _, _ = np.histogram2d(pos.T[0], pos.T[1], weights=a, bins=30)
    heatmap = gaussian_smooth(pos, a)
    ax[i, 1].imshow(heatmap, extent=(0, 300, 0, 300), cmap='div', vmin=-1, vmax=1)


In [None]:

fig, ax = pplt.subplots(ncols=4, nrows=4)
ax.format(toplabels=['Clip -> Heatmap', 'Heatmap', 'Heatmap -> Clip', 'Scatter'])

for i in range(4):
    m = ax[i, 3].scatter(pos.T[0], pos.T[1], c=stacked['shared_activations'][0, :, i], alpha=0.2)
    ax[i, 3].colorbar(m)
    
    a = np.clip(stacked['shared_activations'][0, :, i].numpy(), 0, 1)
    # heatmap, _, _ = np.histogram2d(pos.T[0], pos.T[1], weights=a, bins=30)
    heatmap = gaussian_smooth(pos, a, sigma=15)
    ax[i, 0].imshow(heatmap, extent=(0, 300, 0, 300), cmap='div', vmin=-1, vmax=1)

    a = stacked['shared_activations'][0, :, i].numpy()
    # heatmap, _, _ = np.histogram2d(pos.T[0], pos.T[1], weights=a, bins=30)
    heatmap = gaussian_smooth(pos, a, sigma=15)
    ax[i, 1].imshow(heatmap, extent=(0, 300, 0, 300), cmap='div', vmin=-1, vmax=1)

    ax[i, 2].imshow(np.clip(heatmap, 0, 1), extent=(0, 300, 0, 300), cmap='div', vmin=-1, vmax=1)
    

## Split by whether poster is in view

Note, run data collection in above section

In [None]:
pinview = np.array(eps['data']['poster_in_view'])

activ = stacked['shared_activations'][0]

#split into poster in view and poster not in view, from scatter first
activ_inview = activ[pinview]
activ_notinview = activ[~pinview]
pos_inview = pos[pinview]
pos_notinview = pos[~pinview]

fig, ax = pplt.subplots(ncols=3, nrows=4)
ax.format(toplabels=['Poster in View', 'Poster not in View', 'Combined'])

for i in range(4):
    # pass
    ax[i, 0].scatter(pos_inview.T[0], pos_inview.T[1], c=activ_inview[:, i], alpha=0.2)
    ax[i, 1].scatter(pos_notinview.T[0], pos_notinview.T[1], c=activ_notinview[:, i], alpha=0.2)
    ax[i, 2].scatter(pos.T[0], pos.T[1], c=activ[:, i], alpha=0.2)
    

In [None]:
#Prune first n timesteps as well as split between poster in view and not
dones = eps['dones']
activ = stacked['shared_activations'][0, :, :]
pinview = np.array(eps['data']['poster_in_view'])

ep_activ = split_by_ep(activ, dones)
ep_pos = split_by_ep(pos, dones)
ep_pinview = split_by_ep(pinview, dones)

prune_first = 5
pruned_ep_activ = [a[prune_first:] for a in ep_activ]
pruned_activ = torch.vstack(pruned_ep_activ)
pruned_ep_pos = [p[prune_first:] for p in ep_pos]
pruned_pos = np.vstack(pruned_ep_pos)
pruned_ep_pinview = [p[prune_first:] for p in ep_pinview]
pruned_pinview = np.concatenate(pruned_ep_pinview)

#split into poster in view and poster not in view, from scatter first
activ_inview = pruned_activ[pruned_pinview]
activ_notinview = pruned_activ[~pruned_pinview]
pos_inview = pruned_pos[pruned_pinview]
pos_notinview = pruned_pos[~pruned_pinview]

fig, ax = pplt.subplots(ncols=3, nrows=4)
ax.format(toplabels=['Poster in View', 'Poster not in View', 'Combined'], 
         suptitle=f'Scatter Activations on Width 4, First {prune_first} Steps Pruned')

for i in range(4):
    # pass
    ax[i, 0].scatter(pos_inview.T[0], pos_inview.T[1], c=activ_inview[:, i], alpha=0.2)
    ax[i, 1].scatter(pos_notinview.T[0], pos_notinview.T[1], c=activ_notinview[:, i], alpha=0.2)
    ax[i, 2].scatter(pos.T[0], pos.T[1], c=activ[:, i], alpha=0.2)
    

In [None]:
res = clean_eps(eps)

In [None]:
p = res['pos_notinview']
a = res['activ_notinview']

fig, ax = pplt.subplots(ncols=2, nrows=4)

for i in range(4):
    heatmap = gaussian_smooth(p, a[:, i], sigma=6)
    ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i, 1].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
# show starting points
starting_pts = np.vstack([p[0] for p in ep_pos])
fig, ax = pplt.subplots()
ax.format(xlim=[0, 300], ylim=[0, 300])
ax.scatter(starting_pts.T[0], starting_pts.T[1])

In [None]:
widths = [2, 3, 4, 8, 16, 32, 64]
fig_sizes = [(1, 2), (1, 3), (2, 2), (2, 4), (4, 4), (4, 8), (8, 8)]

In [None]:
n = 0
num_trials = 3
trial = 0
width = widths[n]
model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

eps = evalu(model, obs_rms, env_kwargs=kwargs, n=500,
      data_callback=poster_data_callback, with_activations=True)

res = clean_eps(eps)
p = res['pos_notinview']
a = res['activ_notinview']
fig_size = fig_sizes[n]

fig, ax = pplt.subplots(nrows=fig_size[0], ncols=fig_size[1])

for i in range(widths[n]):
    heatmap = gaussian_smooth(p, a[:, i], sigma=6)
    # ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
n = 1
trial = 0
width = widths[n]
fig_size = fig_sizes[n]

model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

eps = evalu(model, obs_rms, env_kwargs=kwargs, n=500,
      data_callback=poster_data_callback, with_activations=True)

res = clean_eps(eps)
p = res['pos_notinview']
a = res['activ_notinview']

fig, ax = pplt.subplots(nrows=fig_size[0], ncols=fig_size[1])

for i in range(widths[n]):
    heatmap = gaussian_smooth(p, a[:, i], sigma=6)
    # ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
n = 2
trial = 0
width = widths[n]
fig_size = fig_sizes[n]

model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

eps = evalu(model, obs_rms, env_kwargs=kwargs, n=500,
      data_callback=poster_data_callback, with_activations=True)

res = clean_eps(eps)
p = res['pos_notinview']
a = res['activ_notinview']

fig, ax = pplt.subplots(nrows=fig_size[0], ncols=fig_size[1])

for i in range(widths[n]):
    heatmap = gaussian_smooth(p, a[:, i], sigma=6)
    # ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
n = 3
trial = 0
width = widths[n]
fig_size = fig_sizes[n]

model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

eps = evalu(model, obs_rms, env_kwargs=kwargs, n=500,
      data_callback=poster_data_callback, with_activations=True)

res = clean_eps(eps)
p = res['pos_notinview']
a = res['activ_notinview']

fig, ax = pplt.subplots(nrows=fig_size[0], ncols=fig_size[1])

for i in range(widths[n]):
    heatmap = gaussian_smooth(p, a[:, i], sigma=6)
    # ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
n = 4
trial = 0
width = widths[n]
fig_size = fig_sizes[n]

model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

eps = evalu(model, obs_rms, env_kwargs=kwargs, n=500,
      data_callback=poster_data_callback, with_activations=True)

res = clean_eps(eps)
p = res['pos_notinview']
a = res['activ_notinview']

fig, ax = pplt.subplots(nrows=fig_size[0], ncols=fig_size[1])

for i in range(widths[n]):
    heatmap = gaussian_smooth(p, a[:, i], sigma=6)
    # ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
n = 5
trial = 0
width = widths[n]
fig_size = fig_sizes[n]

model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

eps = evalu(model, obs_rms, env_kwargs=kwargs, n=500,
      data_callback=poster_data_callback, with_activations=True)

res = clean_eps(eps)
p = res['pos_notinview']
a = res['activ_notinview']

fig, ax = pplt.subplots(nrows=fig_size[0], ncols=fig_size[1])

for i in range(widths[n]):
    heatmap = gaussian_smooth(p, a[:, i], sigma=6)
    # ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
n = 6
trial = 0
width = widths[n]
fig_size = fig_sizes[n]

model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

eps = evalu(model, obs_rms, env_kwargs=kwargs, n=500,
      data_callback=poster_data_callback, with_activations=True)

res = clean_eps(eps)
p = res['pos_notinview']
a = res['activ_notinview']

fig, ax = pplt.subplots(nrows=fig_size[0], ncols=fig_size[1])

for i in range(widths[n]):
    heatmap = gaussian_smooth(p, a[:, i], sigma=6)
    # ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
n = 6
trial = 0
width = widths[n]
fig_size = fig_sizes[n]

model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

eps = evalu(model, obs_rms, env_kwargs=kwargs, n=500,
      data_callback=poster_data_callback, with_activations=True)

res = clean_eps(eps, clip=False)
p = res['pos']
a = res['activ']

fig, ax = pplt.subplots(nrows=fig_size[0], ncols=fig_size[1])

for i in range(widths[n]):
    heatmap = gaussian_smooth(p, a[:, i], sigma=6)
    # ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
n = 6
trial = 0
width = widths[n]
fig_size = fig_sizes[n]

model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

eps = evalu(model, obs_rms, env_kwargs=kwargs, n=200,
      data_callback=poster_data_callback, with_activations=True)

res = clean_eps(eps, clip=False)
p = res['pos']
a = res['activ']

fig, ax = pplt.subplots(nrows=fig_size[0], ncols=fig_size[1])

for i in range(widths[n]):
    heatmap = gaussian_smooth(p, a[:, i], sigma=10)
    # ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
n = 6
trial = 0
width = 64
fig_size = [8, 8]

model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

all_ep_randinit64 = evalu(model, obs_rms, env_kwargs=kwargs, n=200,
      data_callback=poster_data_callback, with_activations=True)

res = clean_eps(all_ep_randinit64, clip=False)
p = res['pos']
a = res['activ']

fig, ax = pplt.subplots(nrows=fig_size[0], ncols=fig_size[1])

for i in range(64):
    heatmap = gaussian_smooth(p, a[:, i], sigma=10)
    # ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
n = 6
trial = 0
width = widths[n]
fig_size = fig_sizes[n]

model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

# eps = evalu(model, obs_rms, env_kwargs=kwargs, n=500,
#       data_callback=poster_data_callback, with_activations=True)

res = clean_eps(eps)
p = res['pos_notinview']
a = res['activ_notinview']

fig, ax = pplt.subplots(nrows=fig_size[0], ncols=fig_size[1])

for i in range(widths[n]):
    heatmap = gaussian_smooth(p, a[:, i], sigma=15)
    # ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
n = 6
trial = 0
width = widths[n]
fig_size = fig_sizes[n]

model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

# eps = evalu(model, obs_rms, env_kwargs=kwargs, n=500,
#       data_callback=poster_data_callback, with_activations=True)

res = clean_eps(eps, clip=False)
p = res['pos_notinview']
a = res['activ_notinview']

fig, ax = pplt.subplots(nrows=fig_size[0], ncols=fig_size[1])

for i in range(widths[n]):
    heatmap = gaussian_smooth(p, a[:, i], sigma=15)
    heatmap = np.clip(heatmap, 0, 1)
    # ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
n = 6
trial = 0
width = widths[n]
fig_size = fig_sizes[n]

model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

# eps = evalu(model, obs_rms, env_kwargs=kwargs, n=500,
#       data_callback=poster_data_callback, with_activations=True)

res = clean_eps(eps, clip=False)
p = res['pos']
a = res['activ']

fig, ax = pplt.subplots(nrows=fig_size[0], ncols=fig_size[1])

for i in range(widths[n]):
    heatmap = gaussian_smooth(p, a[:, i], sigma=15)
    heatmap = np.clip(heatmap, 0, 1)
    # ax[i, 0].scatter(p.T[0], p.T[1], c=a[:, i], alpha=0.2)
    ax[i].imshow(heatmap, extent=(0, 300, 0, 300))

In [None]:
#Try to fill in the outer rim of points that are missing
WINDOW_SIZE = (300, 300)
step_size = 10.
xs = np.arange(0+step_size, WINDOW_SIZE[0], step_size)
ys = np.arange(0+step_size, WINDOW_SIZE[1], step_size)
# thetas = np.linspace(0, 2*np.pi, 12, endpoint=False)
start_points = []
start_angles = []
for x in xs:
    for y in [5., 295.]:
        point = np.array([x, y])
        angle = np.arctan2(150 - y, 150 - x)
        start_points.append(point)
        start_angles.append(angle)
for y in ys:
    for x in [5, 295]:
        point = np.array([x, y])
        angle = np.arctan2(150 - y, 150 - x)
        start_points.append(point)
        start_angles.append(angle)
        
start_points = np.vstack(start_points)
# plt.scatter(points.T[0], points.T[1])
fig, ax = plt.subplots()
ax.set_xlim([-5, 305])
ax.set_ylim([-5, 305])
for i in range(len(start_points)):
    draw_character(start_points[i], start_angles[i], ax=ax)

In [None]:
action_randomizer = lambda step: np.random.choice([0, 1, 2])

all_ep = []
for i in range(len(points)):
    kw = kwargs.copy()
    kw['fixed_reset'] = [points[i], angles[i]]
    ep = forced_action_evaluate(model, obs_rms, forced_actions=action_randomizer, 
                                seed=i*5, num_episodes=3, env_kwargs=kw, data_callback=poster_data_callback,
                                with_activations=True)
    # ep = clean_eps(ep)
    all_ep.append(ep)
    

In [None]:
all_ep_cleaned = [clean_eps(ep) for ep in all_ep]

In [None]:
i = 2
pos = np.vstack([ep['pos'] for ep in all_ep_cleaned])
angles = np.concatenate([ep['angles'] for ep in all_ep_cleaned])
activ = np.concatenate([ep['activ'][:, i] for ep in all_ep_cleaned])

angle_split_activ = split_by_angle(activ, angles)
angle_split_pos = split_by_angle(pos, angles)

fig, ax = pplt.subplots(nrows=2, ncols=2)
for i in range(4):
    p = angle_split_pos[i]
    a = angle_split_activ[i]
    heatmap = gaussian_smooth(p, a)
    
    ax[i].imshow(heatmap, extent=(5, 295, 5, 295))

## Forced action forward only

In [None]:
fig, ax = pplt.subplots(nrows=2, ncols=2)

for i in tqdm(range(4)):
    pos = np.vstack([ep['pos'] for ep in all_ep])
    activ = np.concatenate([ep['activ'][:, i] for ep in all_ep])
    # plt.scatter(pos.T[0], pos.T[1], alpha=0.1, c=activ)
    heatmap = gaussian_smooth(pos, activ)
    ax[i].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
action_randomizer = lambda step: np.random.choice([0, 1, 2])


# Further Experiments
* Outer rim initial conditions
* Random actions
* Split by poster in view, poster seen
* Borrow actions from another policy

In [None]:
model_name

In [None]:

all_ep = []
for i in range(len(start_points)):
# for i in range():
    kw = kwargs.copy()
    kw['fixed_reset'] = [start_points[i].copy(), start_angles[i].copy()]
    ep = forced_action_evaluate(model, obs_rms,
                                seed=0, num_episodes=1, env_kwargs=kw, data_callback=nav_data_callback,
                                with_activations=True)
    # ep = clean_eps(ep)
    all_ep.append(ep)
    

In [None]:
fig, ax = plt.subplots()
# p = np.vstack(all_ep[0]['data']['pos'])
p = np.vstack([ep['data']['pos'] for ep in all_ep])
ax.scatter(p.T[0], p.T[1], alpha=0.2)
draw_box(ax=ax)

In [None]:
all_activ = [stack_activations(ep['activations'])['shared_activations'] for ep in all_ep]
all_activ = torch.vstack([activ[0, :, :] for activ in all_activ])

fig, ax = pplt.subplots(nrows=2, ncols=2)

for i in range(4):
    activ = all_activ[:, i]
    heatmap = gaussian_smooth(p, activ)
    ax[i].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
saved_actions = [ep['actions'] for ep in all_ep]

In [None]:
model_name = 'nav_poster_netstructure/nav_pdistal_width64batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, 0)

all_ep_copied = []
for i in range(len(start_points)):
    action_copier = lambda step: saved_actions[i][step]
    
    kw = kwargs.copy()
    kw['fixed_reset'] = [start_points[i].copy(), start_angles[i].copy()]
    ep = forced_action_evaluate(model, obs_rms, forced_actions=action_copier,
                                seed=0, num_episodes=1, env_kwargs=kw, data_callback=poster_data_callback,
                                with_activations=True)
    # ep = clean_eps(ep)
    all_ep_copied.append(ep)
    

In [None]:
all_activ = [stack_activations(ep['activations'])['shared_activations'] for ep in all_ep_copied]
all_activ = torch.vstack([activ[0, :, :] for activ in all_activ])
p = np.vstack([ep['data']['pos'] for ep in all_ep_copied])

fig, ax = pplt.subplots(nrows=8, ncols=8)

for i in range(64):
    activ = all_activ[:, i]
    heatmap = gaussian_smooth(p, activ)
    ax[i].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
eps = stack_all_ep(all_ep_copied)
cleaned = clean_eps(eps)

# all_activ = [stack_activations(ep['activations'])['shared_activations'] for ep in all_ep_randact64]
# all_activ = torch.vstack([activ[0, :, :] for activ in all_activ])
# p = np.vstack([ep['data']['pos'] for ep in all_ep_randact64])

all_activ = cleaned['activ_seen']
p = cleaned['pos_seen']

fig, ax = pplt.subplots(nrows=8, ncols=8)

for i in range(64):
    activ = all_activ[:, i]
    heatmap = gaussian_smooth(p, activ)
    ax[i].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
cleaned = clean_eps(eps, prune_first=0)

In [None]:
fig, ax = pplt.subplots(ncols=3)
ax[0].scatter(cleaned['pos'].T[0], cleaned['pos'].T[1], alpha=0.3)
p = cleaned['pos_seen']
ax[1].scatter(p.T[0], p.T[1], alpha=0.3)
p = cleaned['pos_notseen']
ax[2].scatter(p.T[0], p.T[1], alpha=0.3)

In [None]:
model_name = 'nav_poster_netstructure/nav_pdistal_width64batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, 0)

all_ep64 = []
for i in range(len(start_points)):
    action_copier = lambda step: saved_actions[i][step]
    
    kw = kwargs.copy()
    kw['fixed_reset'] = [start_points[i].copy(), start_angles[i].copy()]
    ep = forced_action_evaluate(model, obs_rms,
                                seed=0, num_episodes=1, env_kwargs=kw, data_callback=poster_data_callback,
                                with_activations=True)
    # ep = clean_eps(ep)
    all_ep64.append(ep)
    
    

In [None]:
# all_activ = [stack_activations(ep['activations'])['shared_activations'] for ep in all_ep64]
# all_activ = torch.vstack([activ[0, :, :] for activ in all_activ])
# p = np.vstack([ep['data']['pos'] for ep in all_ep64])
eps = stack_all_ep(all_ep64)
cleaned = clean_eps(eps)

p = cleaned['pos']
all_activ = cleaned['activ']
fig, ax = pplt.subplots(nrows=8, ncols=8)

for i in range(64):
    activ = all_activ[:, i]
    heatmap = gaussian_smooth(p, activ)
    ax[i].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
model_name = 'nav_poster_netstructure/nav_pdistal_width64batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, 0)

all_ep_randact64 = []

#randomize actions following action distribution of original policy
all_act = torch.vstack([torch.vstack(ep['actions']) for ep in all_ep64])
act_probs = [((all_act == i).sum() / len(all_act)).item() for i in range(3)]
act_probs = [act_probs[i] / np.sum(act_probs) for i in range(3)]
action_randomizer = lambda step: np.random.choice([0, 1, 2], p=act_probs)

for i in range(len(start_points)):
    kw = kwargs.copy()
    kw['fixed_reset'] = [start_points[i].copy(), start_angles[i].copy()]
    ep = forced_action_evaluate(model, obs_rms, forced_actions=action_randomizer,
                                seed=i, num_episodes=1, env_kwargs=kw, data_callback=poster_data_callback,
                                with_activations=True)
    # ep = clean_eps(ep)
    all_ep_randact64.append(ep)
    

In [None]:
eps = stack_all_ep(all_ep_randact64)
cleaned = clean_eps(eps)

# all_activ = [stack_activations(ep['activations'])['shared_activations'] for ep in all_ep_randact64]
# all_activ = torch.vstack([activ[0, :, :] for activ in all_activ])
# p = np.vstack([ep['data']['pos'] for ep in all_ep_randact64])

all_activ = cleaned['activ_seen']
p = cleaned['pos_seen']

fig, ax = pplt.subplots(nrows=8, ncols=8)

for i in range(64):
    activ = all_activ[:, i]
    heatmap = gaussian_smooth(p, activ)
    ax[i].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
eps = stack_all_ep(all_ep_randact64)
cleaned = clean_eps(eps)

# all_activ = [stack_activations(ep['activations'])['shared_activations'] for ep in all_ep_randact64]
# all_activ = torch.vstack([activ[0, :, :] for activ in all_activ])
# p = np.vstack([ep['data']['pos'] for ep in all_ep_randact64])

all_activ = cleaned['activ_notseen']
p = cleaned['pos_notseen']

fig, ax = pplt.subplots(nrows=8, ncols=8)

for i in range(64):
    activ = all_activ[:, i]
    heatmap = gaussian_smooth(p, activ)
    ax[i].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
fig, ax = pplt.subplots(ncols=3)
ax[0].scatter(cleaned['pos'].T[0], cleaned['pos'].T[1], alpha=0.3)
p = cleaned['pos_seen']
ax[1].scatter(p.T[0], p.T[1], alpha=0.3)
p = cleaned['pos_notseen']
ax[2].scatter(p.T[0], p.T[1], alpha=0.3)

# Notes

* There doesn't seem to be much use in filtering out when the poster is in view or not - it seems like this gets averaged out from all the other states where the poste is not in view
* Random actions really messes with the acivations, it becomes much less clear that nodes have any principled activation pattern
    * Splitting by view angle does seem to show some symmetry - which makes sense, in general a node cannot break symmetry effectively without consistent cues... 
    * **Should think about this idea some more. Why can't the 64 network break the symmetry when given actions from the 4 network? Seems like it is very restricted to effective performance on its existing trajectories.**
    * **What would then happen if you give it a partway set of actions that cause hallucination, will it fail due to this out of distribution trajectory?**
    * Are there representations that are more resilient and can act in these more out-of-policy scenarios?
* Following policy and starting initial conditions about the boundary gives consistent activations, as the agent has conserved behavior
* Starting about random initial conditions of course makes the patterns more messy, but some of the same structures can be seen
    * This suggests that maybe it might make sense to average activation maps between these types of episodes
* Giving the 64 width network trajectories from the 4 width network creates the same "hallucinations" as the 4 width has. Does this happen in reverse? If this is the case, we might want to force the same actions in all cases so that we can appropriately compare generated activations


# Experiment Summary

Generate data with all meaningful experiments simultaneously

In [None]:
model_name = 'nav_poster_netstructure/nav_pdistal_width64batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

#1. 200 Random initial condition episodes

all_ep_64 = []
for i in range(200):
    ep = forced_action_evaluate(model, obs_rms, env_kwargs=kwargs, 
                                num_episodes=1, data_callback=poster_data_callback, seed=i,
                                with_activations=True)
    all_ep_64.append(ep)
eps_64 = stack_all_ep(all_ep_64)

#Starting around rim - First generate start points and angles
WINDOW_SIZE = (300, 300)
step_size = 10.
xs = np.arange(0+step_size, WINDOW_SIZE[0], step_size)
ys = np.arange(0+step_size, WINDOW_SIZE[1], step_size)
# thetas = np.linspace(0, 2*np.pi, 12, endpoint=False)
start_points = []
start_angles = []
for x in xs:
    for y in [5., 295.]:
        point = np.array([x, y])
        angle = np.arctan2(150 - y, 150 - x)
        start_points.append(point)
        start_angles.append(angle)
for y in ys:
    for x in [5, 295]:
        point = np.array([x, y])
        angle = np.arctan2(150 - y, 150 - x)
        start_points.append(point)
        start_angles.append(angle)
        
start_points = np.vstack(start_points)

# 2. Starting around rim episodes
all_ep_rim64 = []
for i in range(len(start_points)):
    kw = kwargs.copy()
    kw['fixed_reset'] = [start_points[i].copy(), start_angles[i].copy()]
    ep = forced_action_evaluate(model, obs_rms, seed=0, num_episodes=1, 
                                env_kwargs=kw, data_callback=poster_data_callback,
                                with_activations=True)
    all_ep_rim64.append(ep)
eps_rim64 = stack_all_ep(all_ep_rim64)


saved_actions_rim64 = [ep['actions'] for ep in all_ep_rim64]
saved_actions_64 = [ep['actions'] for ep in all_ep_64]

In [None]:
# Put both sets of activations and total averaged onto same plot
array = []
for i in range(8):
    l1 = list(np.arange(8)+1 + i*8)
    l2 = list(np.arange(8)+65 + i*8)
    array.append(l1 + l2)
for i in range(8):
    l1 = [0]*4 + list(np.arange(8)+129 + i*8) + [0]*4
    array.append(l1)
    
fig, ax = pplt.subplots(array, hspace=[0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,],
                       wspace=[0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,])


cleaned = clean_eps(eps_64)
p1 = cleaned['pos']
a1 = cleaned['activ']

cleaned = clean_eps(eps_rim64)
p2 = cleaned['pos']
a2 = cleaned['activ']

p3 = np.vstack([p1, p2])
a3 = np.vstack([a1, a2])


for i in range(64):
    heatmap = gaussian_smooth(p1, a1[:, i])
    ax[i].imshow(heatmap, extent=(5, 295, 5, 295))
for i in range(64):
    heatmap = gaussian_smooth(p2, a2[:, i])
    ax[i+64].imshow(heatmap, extent=(5, 295, 5, 295))
for i in range(64):
    heatmap = gaussian_smooth(p3, a3[:, i])
    ax[i+128].imshow(heatmap, extent=(5, 295, 5, 295))

# 64 -> 4 forcing

In [None]:
model_name = 'nav_poster_netstructure/nav_pdistal_width4batch200'
model, obs_rms, kwargs = load_model_and_env(model_name, trial)

#1. 200 Random initial condition episodes (copied 64 actions)
all_ep_copy4 = []
for i in range(200):
    copied_actions = lambda step: saved_actions_64[i][step]
    ep = forced_action_evaluate(model, obs_rms, env_kwargs=kwargs, 
                                num_episodes=1, data_callback=poster_data_callback, seed=i,
                                with_activations=True, forced_actions=copied_actions)
    all_ep_copy4.append(ep)
eps_copy4 = stack_all_ep(all_ep_copy4)

# 2. Starting around rim episodes (copied 64 actions)
all_ep_rimcopy4 = []
for i in range(len(start_points)):
    kw = kwargs.copy()
    copied_actions = lambda step: saved_actions_rim64[i][step]

    kw['fixed_reset'] = [start_points[i].copy(), start_angles[i].copy()]
    ep = forced_action_evaluate(model, obs_rms, seed=0, num_episodes=1, 
                                env_kwargs=kw, data_callback=poster_data_callback,
                                with_activations=True, forced_actions=copied_actions)
    all_ep_rimcopy4.append(ep)
eps_rimcopy4 = stack_all_ep(all_ep_rimcopy4)


# 3. 200 Random initial condition episodes (copied 64 actions)
all_ep_4 = []
for i in range(200):
    ep = forced_action_evaluate(model, obs_rms, env_kwargs=kwargs, 
                                num_episodes=1, data_callback=poster_data_callback, seed=i,
                                with_activations=True)
    all_ep_4.append(ep)
eps_4 = stack_all_ep(all_ep_4)

# 4. Starting around rim episodes (copied 64 actions)
all_ep_rim4 = []
for i in range(len(start_points)):
    kw = kwargs.copy()
    kw['fixed_reset'] = [start_points[i].copy(), start_angles[i].copy()]
    ep = forced_action_evaluate(model, obs_rms, seed=0, num_episodes=1, 
                                env_kwargs=kw, data_callback=poster_data_callback,
                                with_activations=True)
    all_ep_rim4.append(ep)
eps_rim4 = stack_all_ep(all_ep_rim4)

In [None]:
## 4 -> 64 Forcing

model_name64 = 'nav_poster_netstructure/nav_pdistal_width64batch200'
model64, obs_rms64, kwargs = load_model_and_env(model_name64, trial)

saved_actions_rim4 = [ep['actions'] for ep in all_ep_rim4]
saved_actions_4 = [ep['actions'] for ep in all_ep_4]


# 5. 200 Random initial condition episodes (copied 64 actions)
all_ep_copy64 = []
for i in range(200):
    copied_actions = lambda step: saved_actions_4[i][step]
    ep = forced_action_evaluate(model64, obs_rms64, env_kwargs=kwargs, 
                                num_episodes=1, data_callback=poster_data_callback, seed=i,
                                with_activations=True, forced_actions=copied_actions)
    all_ep_copy64.append(ep)
eps_copy64 = stack_all_ep(all_ep_copy64)

# 6. Starting around rim episodes (copied 64 actions)
all_ep_rimcopy64 = []
for i in range(len(start_points)):
    kw = kwargs.copy()
    copied_actions = lambda step: saved_actions_rim4[i][step]

    kw['fixed_reset'] = [start_points[i].copy(), start_angles[i].copy()]
    ep = forced_action_evaluate(model64, obs_rms64, seed=0, num_episodes=1, 
                                env_kwargs=kw, data_callback=poster_data_callback,
                                with_activations=True, forced_actions=copied_actions)
    all_ep_rimcopy64.append(ep)
eps_rimcopy64 = stack_all_ep(all_ep_rimcopy64)

## Hallucinations

Note that sometimes a cell which primarily with goal specificity will "hallucinate" when moving in a path to a corner without the goal. Interestingly, not all cells to this in the 64 cell case, but many do. The ones that don't are actually far more rare, but those are probably the interesting ones that are actually learning something more specific and able to incorporate information better.

The 4 width network takes far more paths that induce these hallucinations. If we take the pathways from the 64 width network and feed them to the 4 width, we reduce the hallucinations. We can eliminate them completely by filtering out episodes where the path taken is not direct

Questions: 
* Why are certain nodes less prone to hallucinations?
    * Could simply be that there is agreement between activation near the platform and near the hallucination area (blue-blue or red-red)
    * We could probably classify this by the difference of hallucinating activations separated vs activations with those paths removed
* What does knowing about these hallucinations tell us?
    * Maybe it tells us something about why the paths taken influence activations?
    * **In relation to what we can do to classify the types of node representations... This tel             ls us that the representations are highly dependent on paths taken. Maybe it only makes sense to classify with specific pathways - but now it is harder to name what these certain cells are doing? Maybe we need a multi-classifying system, with different combinations of classification along different pathways?**
* Can we modify the paths taken to eliminate the hallucinations? 
* Can we kick activations in a way as to "convince" the agent that the platform is not there?

Maybe instead it makes sense to classify a nodes propensity for hallucination as it instead having a bias towards representing something about where the network thinks it is in relation to the goal. Nodes with a resistance to hallucination are really more static features instead. **We could question whether some representations are better for learning other tasks or not**

In [None]:

fig, ax = pplt.subplots(nrows=2, ncols=4, wspace=(0, 5, 0), hspace=0)

cleaned = clean_eps(eps_rimcopy4)
p = cleaned['pos']
a = cleaned['activ']
for n in range(4):
    i = n // 2    
    j = n % 2
    heatmap = gaussian_smooth(p, a[:,n])
    ax[i, j].imshow(heatmap, extent=(5, 295, 5, 295))
    
cleaned = clean_eps(eps_rim4)
p = cleaned['pos']
a = cleaned['activ']    
for n in range(4):
    i = n // 2
    j = (n % 2) + 2
    heatmap = gaussian_smooth(p, a[:,n])
    ax[i, j].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:

fig, ax = pplt.subplots(nrows=2, ncols=4, wspace=(0, 5, 0), hspace=0)

cleaned = clean_eps(eps_rimcopy4, activations_key='actor_activations', activations_layer=0)
p = cleaned['pos']
a = cleaned['activ']
for n in range(4):
    i = n // 2    
    j = n % 2
    heatmap = gaussian_smooth(p, a[:,n])
    ax[i, j].imshow(heatmap, extent=(5, 295, 5, 295))
    
cleaned = clean_eps(eps_rim4, activations_key='actor_activations', activations_layer=0)
p = cleaned['pos']
a = cleaned['activ']    
for n in range(4):
    i = n // 2
    j = (n % 2) + 2
    heatmap = gaussian_smooth(p, a[:,n])
    ax[i, j].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:

fig, ax = pplt.subplots(nrows=2, ncols=4, wspace=(0, 5, 0), hspace=0)

cleaned = clean_eps(eps_rimcopy4, activations_key='actor_activations', activations_layer=1)
p = cleaned['pos']
a = cleaned['activ']
for n in range(4):
    i = n // 2    
    j = n % 2
    heatmap = gaussian_smooth(p, a[:,n])
    ax[i, j].imshow(heatmap, extent=(5, 295, 5, 295))
    
cleaned = clean_eps(eps_rim4, activations_key='actor_activations', activations_layer=1)
p = cleaned['pos']
a = cleaned['activ']    
for n in range(4):
    i = n // 2
    j = (n % 2) + 2
    heatmap = gaussian_smooth(p, a[:,n])
    ax[i, j].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
cleaned = clean_eps(eps_copy4)
p = cleaned['pos']
a = cleaned['activ']

fig, ax = pplt.subplots(nrows=2, ncols=2)
for i in range(4):
    heatmap = gaussian_smooth(p, a[:, i])
    ax[i].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
cleaned = clean_eps(eps_rim64, prune_first=0)
p = cleaned['pos']
plt.scatter(p.T[0], p.T[1], alpha=0.2)

In [None]:
#Compute directedness of trajectories from all_ep_rim64. There are 7 trajectories that do not use the direct approach out of 116

goal_loc = np.array([250, 70])

directnesses = []
for i in range(len(all_ep_rim64)):
    p = np.vstack(all_ep_rim64[i]['data']['pos'])
    d = p - goal_loc
    d = np.sqrt(np.sum(d**2, axis=1))
    dist_changes = np.diff(d)
    directness = np.sum(dist_changes[:-1] < 0) / np.sum(dist_changes[:-1] != 0)
    directnesses.append(directness)

In [None]:
ep_idxs = np.argwhere(np.array(directnesses) < 0.9).squeeze()

# ep1 = [] #non halluc
# ep2 = [] #halluc
# for i in range(len(all_ep_rim64)):
#     if i in ep_idxs:
#         ep2.append(all_ep_rim64[i])
#     else:
#         ep1.append(all_ep_rim64[i])
        
        
ep1 = [] #non halluc
ep2 = [] #halluc
for i in range(len(all_ep_rimcopy4)):
    if i in ep_idxs:
        ep2.append(all_ep_rimcopy4[i])
    else:
        ep1.append(all_ep_rimcopy4[i])
        
ep1 = clean_eps(stack_all_ep(ep1), prune_first=0)
ep2 = clean_eps(stack_all_ep(ep2), prune_first=0)

fig, ax = pplt.subplots(ncols=2, nrows=2)
ax[0].scatter(ep1['pos'].T[0], ep1['pos'].T[1], alpha=0.2)
ax[1].scatter(ep2['pos'].T[0], ep2['pos'].T[1], alpha=0.2)

heatmap = gaussian_smooth(ep1['pos'], ep1['activ'][:, 1])
ax[2].imshow(heatmap, extent=(5, 295, 5, 295))
heatmap = gaussian_smooth(ep2['pos'], ep2['activ'][:, 1])
ax[3].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
heatmap = gaussian_smooth(ep1['pos'], ep1['activ'][:, 1])
ax[2].imshow(heatmap, extent=(5, 295, 5, 295))
heatmap = gaussian_smooth(ep2['pos'], ep2['activ'][:, 1])
ax[3].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
ep_idxs = np.argwhere(np.array(directnesses) < 0.9).squeeze()

ep1 = [] #non halluc
ep2 = [] #halluc
for i in range(len(all_ep_rim64)):
    if i in ep_idxs:
        ep2.append(all_ep_rim64[i])
    else:
        ep1.append(all_ep_rim64[i])
                
ep1 = clean_eps(stack_all_ep(ep1), prune_first=5)
ep2 = clean_eps(stack_all_ep(ep2), prune_first=5)

fig, ax = pplt.subplots(ncols=16, nrows=8, wspace=(0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0))

for n in range(64):
    i = n // 8
    j = n % 8
    heatmap = gaussian_smooth(ep1['pos'], ep1['activ'][:, n])
    ax[i, j].imshow(heatmap, extent=(5, 295, 5, 295))
    
for n in range(64):
    i = n // 8
    j = n % 8
    heatmap = gaussian_smooth(ep2['pos'], ep2['activ'][:, n])
    ax[i, j+8].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
ep_idxs = np.argwhere(np.array(directnesses) < 0.9).squeeze()

ep1 = [] #non halluc
ep2 = [] #halluc
for i in range(len(all_ep_rim64)):
    if i in ep_idxs:
        ep2.append(all_ep_rim64[i])
    else:
        ep1.append(all_ep_rim64[i])
                
ep1 = clean_eps(stack_all_ep(ep1), prune_first=0)
ep2 = clean_eps(stack_all_ep(ep2), prune_first=0)

fig, ax = pplt.subplots(ncols=16, nrows=8, wspace=(0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0))

for n in range(64):
    i = n // 8
    j = n % 8
    heatmap = gaussian_smooth(ep1['pos'], ep1['activ'][:, n])
    ax[i, j].imshow(heatmap, extent=(5, 295, 5, 295))
    
for n in range(64):
    i = n // 8
    j = n % 8
    heatmap = gaussian_smooth(ep2['pos'], ep2['activ'][:, n])
    ax[i, j+8].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
ep_idxs = np.argwhere(np.array(directnesses) < 0.9).squeeze()
        
        
ep1 = [] #non halluc
ep2 = [] #halluc
for i in range(len(all_ep_rimcopy4)):
    if i in ep_idxs:
        ep2.append(all_ep_rimcopy4[i])
    else:
        ep1.append(all_ep_rimcopy4[i])

In [None]:
fig, ax = pplt.subplots()
ax.format(xlim=[0, 300], ylim=[0, 300])
for i in range(len(ep2)):
    p = np.vstack(ep2[i]['data']['pos'])
    angle = ep2[i]['data']['angle']
    draw_character(p[0], angle[0], ax=ax)
    ax.plot(p.T[0], p.T[1])
    
draw_box(ax=ax)

In [None]:
ep2[0]

In [None]:
fig, ax = pplt.subplots(nrows=2, ncols=4)
ax.format(xlim=[0, 300], ylim=[0, 300])
for i in range(len(ep2)):
    p = np.vstack(ep2[i]['data']['pos'])
    angle = ep2[i]['data']['angle']
    for j in range(p.shape[0]):
        draw_character(p[j], angle[j], ax=ax[i])
    ax[i].plot(p.T[0], p.T[1])
    draw_box(ax=ax[i])

In [None]:
goal_loc = np.array([250, 70])

directnesses4 = []
for i in range(len(all_ep_rim4)):
    p = np.vstack(all_ep_rim4[i]['data']['pos'])
    d = p - goal_loc
    d = np.sqrt(np.sum(d**2, axis=1))
    dist_changes = np.diff(d)
    directness = np.sum(dist_changes[:-1] < 0) / np.sum(dist_changes[:-1] != 0)
    directnesses4.append(directness)
    
ep_idxs = np.argwhere(np.array(directnesses4) < 0.9).squeeze()
        
    
ep1 = [] #non halluc
ep2 = [] #halluc
for i in range(len(all_ep_rimcopy64)):
    if i in ep_idxs:
        ep2.append(all_ep_rimcopy64[i])
    else:
        ep1.append(all_ep_rimcopy64[i])
        
    
fig, ax = pplt.subplots(nrows=5, ncols=5)

ax.format(xlim=[0, 300], ylim=[0, 300])
for i in range(len(ep2)):
    p = np.vstack(ep2[i]['data']['pos'])
    angle = ep2[i]['data']['angle']
    for j in range(p.shape[0]):
        draw_character(p[j], angle[j], ax=ax[i])
    ax[i].plot(p[:-1].T[0], p[:-1].T[1])
    draw_box(ax=ax[i])

In [None]:
ep2

### Computing hallucination rates in 4 -> 64 forcing

In [None]:
#Compute directedness of trajectories from all_ep_rim64. There are 7 trajectories that do not use the direct approach out of 116

goal_loc = np.array([250, 70])

directnesses4 = []
for i in range(len(all_ep_rim4)):
    p = np.vstack(all_ep_rim4[i]['data']['pos'])
    d = p - goal_loc
    d = np.sqrt(np.sum(d**2, axis=1))
    dist_changes = np.diff(d)
    directness = np.sum(dist_changes[:-1] < 0) / np.sum(dist_changes[:-1] != 0)
    directnesses4.append(directness)
    
ep_idxs = np.argwhere(np.array(directnesses4) < 0.9).squeeze()
        
        
ep1 = [] #non halluc
ep2 = [] #halluc
for i in range(len(all_ep_rimcopy64)):
    if i in ep_idxs:
        ep2.append(all_ep_rimcopy64[i])
    else:
        ep1.append(all_ep_rimcopy64[i])
        
ep1 = clean_eps(stack_all_ep(ep1), prune_first=0)
ep2 = clean_eps(stack_all_ep(ep2), prune_first=0)

fig, ax = pplt.subplots(ncols=2, nrows=3)
ax[0].plot(ep1['pos'][:-1].T[0], ep1['pos'][:-1].T[1], alpha=0.2, c='blue2')
ax[1].plot(ep2['pos'][:-1].T[0], ep2['pos'][:-1].T[1], alpha=0.2, c='blue2')

heatmap1 = gaussian_smooth(ep1['pos'], ep1['activ'][:, 58])
ax[2].imshow(heatmap1, extent=(5, 295, 5, 295))
heatmap2, hasval2 = gaussian_smooth(ep2['pos'], ep2['activ'][:, 58], ret_hasval=True)
ax[3].imshow(heatmap2, extent=(5, 295, 5, 295))

heatmap1 = gaussian_smooth(ep1['pos'], ep1['activ'][:, 33])
ax[4].imshow(heatmap1, extent=(5, 295, 5, 295))
heatmap2, hasval2 = gaussian_smooth(ep2['pos'], ep2['activ'][:, 33], ret_hasval=True)
ax[5].imshow(heatmap2, extent=(5, 295, 5, 295))

In [None]:
halucination_diffs = []
for i in range(64):
    heatmap1 = gaussian_smooth(ep1['pos'], ep1['activ'][:, i])
    heatmap2, hasval2 = gaussian_smooth(ep2['pos'], ep2['activ'][:, i], ret_hasval=True)

    idxs = hasval2 == 1
    halucination_diffs.append(np.sum(np.abs(heatmap1[idxs] - heatmap2[idxs])) / np.sum(idxs))

In [None]:
np.argmax(halucination_diffs)

In [None]:
np.argmin(halucination_diffs)

In [None]:
halucination_diffs

In [None]:
fig, ax = pplt.subplots(ncols=16, nrows=8, wspace=(0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0))


#direct path non forced

goal_loc = np.array([250, 70])

directnesses = []
for i in range(len(all_ep_rim64)):
    p = np.vstack(all_ep_rim64[i]['data']['pos'])
    d = p - goal_loc
    d = np.sqrt(np.sum(d**2, axis=1))
    dist_changes = np.diff(d)
    directness = np.sum(dist_changes[:-1] < 0) / np.sum(dist_changes[:-1] != 0)
    directnesses.append(directness)
ep_idxs = np.argwhere(np.array(directnesses) < 0.9).squeeze()

ep1 = [] #non halluc
ep2 = [] #halluc
for i in range(len(all_ep_rim64)):
    if i in ep_idxs:
        ep2.append(all_ep_rim64[i])
    else:
        ep1.append(all_ep_rim64[i])
                
ep1 = clean_eps(stack_all_ep(ep1), prune_first=0)
ep2 = clean_eps(stack_all_ep(ep2), prune_first=0)

for n in range(64):
    i = n // 8
    j = n % 8
    heatmap = gaussian_smooth(ep1['pos'], ep1['activ'][:, n])
    ax[i, j].imshow(heatmap, extent=(5, 295, 5, 295))

    
    
#direct path 4->64 forced    
directnesses4 = []
for i in range(len(all_ep_rim4)):
    p = np.vstack(all_ep_rim4[i]['data']['pos'])
    d = p - goal_loc
    d = np.sqrt(np.sum(d**2, axis=1))
    dist_changes = np.diff(d)
    directness = np.sum(dist_changes[:-1] < 0) / np.sum(dist_changes[:-1] != 0)
    directnesses4.append(directness)
    
ep_idxs = np.argwhere(np.array(directnesses4) < 0.9).squeeze()
ep1 = [] #non halluc
ep2 = [] #halluc
for i in range(len(all_ep_rimcopy64)):
    if i in ep_idxs:
        ep2.append(all_ep_rimcopy64[i])
    else:
        ep1.append(all_ep_rimcopy64[i])
        
ep1 = clean_eps(stack_all_ep(ep1), prune_first=0)
ep2 = clean_eps(stack_all_ep(ep2), prune_first=0)
for n in range(64):
    i = n // 8
    j = n % 8
    heatmap = gaussian_smooth(ep1['pos'], ep1['activ'][:, n])
    ax[i, j+8].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
fig, ax = pplt.subplots(ncols=16, nrows=8, wspace=(0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0))


#direct path non forced

goal_loc = np.array([250, 70])

directnesses = []
for i in range(len(all_ep_rim64)):
    p = np.vstack(all_ep_rim64[i]['data']['pos'])
    d = p - goal_loc
    d = np.sqrt(np.sum(d**2, axis=1))
    dist_changes = np.diff(d)
    directness = np.sum(dist_changes[:-1] < 0) / np.sum(dist_changes[:-1] != 0)
    directnesses.append(directness)
ep_idxs = np.argwhere(np.array(directnesses) < 0.9).squeeze()

ep1 = [] #non halluc
ep2 = [] #halluc
for i in range(len(all_ep_rim64)):
    if i in ep_idxs:
        ep2.append(all_ep_rim64[i])
    else:
        ep1.append(all_ep_rim64[i])
                
ep1 = clean_eps(stack_all_ep(ep1), prune_first=0)
ep2 = clean_eps(stack_all_ep(ep2), prune_first=0)

for n in range(64):
    i = n // 8
    j = n % 8
    heatmap = np.clip(gaussian_smooth(ep1['pos'], ep1['activ'][:, n]), 0, 1)
    ax[i, j].imshow(heatmap, extent=(5, 295, 5, 295), vmin=0, vmax=1)

    
    
#direct path 4->64 forced    
directnesses4 = []
for i in range(len(all_ep_rim4)):
    p = np.vstack(all_ep_rim4[i]['data']['pos'])
    d = p - goal_loc
    d = np.sqrt(np.sum(d**2, axis=1))
    dist_changes = np.diff(d)
    directness = np.sum(dist_changes[:-1] < 0) / np.sum(dist_changes[:-1] != 0)
    directnesses4.append(directness)
    
ep_idxs = np.argwhere(np.array(directnesses4) < 0.9).squeeze()
ep1 = [] #non halluc
ep2 = [] #halluc
for i in range(len(all_ep_rimcopy64)):
    if i in ep_idxs:
        ep2.append(all_ep_rimcopy64[i])
    else:
        ep1.append(all_ep_rimcopy64[i])
        
ep1 = clean_eps(stack_all_ep(ep1), prune_first=0)
ep2 = clean_eps(stack_all_ep(ep2), prune_first=0)
for n in range(64):
    i = n // 8
    j = n % 8
    heatmap = np.clip(gaussian_smooth(ep1['pos'], ep1['activ'][:, n]), 0, 1)
    ax[i, j+8].imshow(heatmap, extent=(5, 295, 5, 295), vmin=0, vmax=1)

In [None]:
a = {'five': 5,  'six': 6}
b = {'seven': 7}
a + b

In [None]:
ep_idxs = np.argwhere(np.array(directnesses) < 0.9).squeeze()
        
        
ep1 = [] #non halluc
ep2 = [] #halluc
for i in range(len(all_ep_rimcopy4)):
    if i in ep_idxs:
        ep2.append(all_ep_rimcopy4[i])
    else:
        ep1.append(all_ep_rimcopy4[i])

In [None]:
all_activ = [stack_activations(ep['activations'])['shared_activations'] for ep in all_ep_randact64]
all_activ = torch.vstack([activ[0, :, :] for activ in all_activ])
p = np.vstack([ep['data']['pos'] for ep in all_ep_randact64])

fig, ax = pplt.subplots(nrows=8, ncols=8)

for i in range(64):
    activ = all_activ[:, i]
    heatmap = gaussian_smooth(p, activ)
    ax[i].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
model, obs_rms, kwargs = load_model_and_env(model_name, 0)
kw = kwargs.copy()
kw['fixed_reset'] = [points[i], angles[i]]
ep = forced_action_evaluate(model, obs_rms, forced_actions=action_randomizer, 
                            seed=i*5, num_episodes=5, env_kwargs=kw, data_callback=poster_data_callback,
                            with_activations=True)

# Collect trajectories for clustering

In [None]:
d = compute_directness(all_ep_64)
idxs = d > 0.9
d_ep = [ep for i, ep in enumerate(all_ep_64) if idxs[i]]
cleaned = clean_eps(stack_all_ep(d_ep))

fig, ax = pplt.subplots(nrows=8, ncols=8)
for i in range(64):
    heatmap = gaussian_smooth(cleaned['pos'], cleaned['activ'][:, i])
    ax[i].imshow(heatmap, extent=(5, 295, 5, 295))

In [None]:
cleaned = clean_eps(stack_all_ep(all_ep_rim64), prune_first=0, save_inview=False,
                   save_seen=False)

In [None]:
pickle.dump(cleaned, open('data/cleaned', 'wb'))

In [None]:
widths = [2, 3, 4, 8, 16, 32, 64]
num_trials = 3

#Starting around rim - First generate start points and angles
WINDOW_SIZE = (300, 300)
step_size = 10.
xs = np.arange(0+step_size, WINDOW_SIZE[0], step_size)
ys = np.arange(0+step_size, WINDOW_SIZE[1], step_size)
# thetas = np.linspace(0, 2*np.pi, 12, endpoint=False)
start_points = []
start_angles = []
for x in xs:
    for y in [5., 295.]:
        point = np.array([x, y])
        angle = np.arctan2(150 - y, 150 - x)
        start_points.append(point)
        start_angles.append(angle)
for y in ys:
    for x in [5, 295]:
        point = np.array([x, y])
        angle = np.arctan2(150 - y, 150 - x)
        start_points.append(point)
        start_angles.append(angle)
        
start_points = np.vstack(start_points)


def filter_all_ep_directness(all_ep, bound=0.9):
    d = compute_directness(all_ep)
    idxs = d > 0.9
    d_ep = [ep for i, ep in enumerate(all_ep) if idxs[i]]
    return d_ep

for width in tqdm(widths):
    for trial in range(num_trials):
        model_name = f'nav_poster_netstructure/nav_pdistal_width{width}batch200'
        model, obs_rms, kwargs = load_model_and_env(model_name, trial)

        all_ep = []
        for i in range(len(start_points)):
            kw = kwargs.copy()
            kw['fixed_reset'] = [start_points[i].copy(), start_angles[i].copy()]
            ep = forced_action_evaluate(model, obs_rms, seed=0, num_episodes=1, 
                                        env_kwargs=kw, data_callback=poster_data_callback,
                                        with_activations=True)
            all_ep.append(ep)
        
        all_ep_f = filter_all_ep_directness(all_ep)
        eps_f = clean_eps(stack_all_ep(all_ep_f), prune_first=0, save_inview=False, save_seen=False)
        eps = clean_eps(stack_all_ep(all_ep), prune_first=0, save_inview=False, save_seen=False)
        
        pickle.dump(eps, open(f'data/pdistal_rim_heatmap/width{width}_t{trial}', 'wb'))
        pickle.dump(eps_f, open(f'data/pdistal_rim_heatmap/width{width}_filt_t{trial}', 'wb'))