# Embedding Space Analysis



## Set Up

In [77]:
import sys 
sys.path.append('..')
import torch 
import json 
from src.decision_transformer.utils import (
    load_decision_transformer,
    # get_max_len_from_model_type,
)
from src.environments.registration import register_envs
from src.environments.environments import make_env

register_envs()


In [78]:
from src.config import EnvironmentConfig

model_path = "../models/MiniGrid-MemoryS7FixedStart-v0/WorkingModel.pt"
state_dict = torch.load(model_path)

env_config = state_dict["environment_config"]
env_config = EnvironmentConfig(**json.loads(env_config))

env = make_env(env_config, seed=4200, idx=0, run_name="dev")
env = env()

dt = load_decision_transformer(
    model_path, env, tlens_weight_processing=True
)



To do:

- [x] Get labels for each vector. 
- [x] Get weight l2 norms, plot.
- [x] Get cosine similarity matrix, plot
- [ ] Cluster it -> interpret clusters. 


## Clustering of Embedding Vectors

In [79]:
embedding = dt.state_embedding.weight.detach().T
print(embedding.shape)

torch.Size([980, 256])


In [114]:
# get the labels. 
from src.streamlit_app.constants import SPARSE_CHANNEL_NAMES
import itertools 

all_index_labels = [
    SPARSE_CHANNEL_NAMES,
    list(range(7)),
    list(range(7)),
]
indices = list(itertools.product(*all_index_labels))
index_labels = ["{0}, ({1},{2})".format(*index) for index in indices]
print(index_labels[:4])

# extract just the channels
channel_labels = [label.split(",")[0] for label in index_labels]
print(channel_labels[:4])

['unseen, (0,0)', 'unseen, (0,1)', 'unseen, (0,2)', 'unseen, (0,3)']
['unseen', 'unseen', 'unseen', 'unseen']


In [115]:
import plotly.express as px
# plot the l2 norm of each vector
norms = torch.norm(embedding, dim=1)
fig = px.strip(x = channel_labels,
               y=norms, 
               labels={"y": "L2 Norm", "x": "Channel"}, 
                color = channel_labels,
                hover_name=index_labels,
                orientation="v")
# hide the legend
fig.update_layout(showlegend=False)
fig.show()

In [116]:
# make a df with the l2 norms
import pandas as pd
df_norms = pd.DataFrame({"L2 Norm": norms, "Index": index_labels})
df_norms[df_norms.Index.str.contains("ball, (0,6)", regex=False)]

Unnamed: 0,L2 Norm,Index
300,2.710176,"ball, (0,6)"


This is interesting. It shows us what is getting represented in out observations:
1. Keys/Balls/Green
2. Empty/Unseen/Wall/grey/red 

In [117]:
# randomly initialize 980 vectors of length 256 
# and plot the l2 norm of each vector
random_embedding = torch.randn(980, 256)
norms = torch.norm(random_embedding, dim=1) * 0.02
# change the std
fig = px.strip(y=norms, labels={"x": "L2 Norm"},
                hover_name=index_labels,
                title="L2 Norm of Randomly Initialized Vectors",
                orientation="v",
                template="plotly_dark")
fig.show() # sqrt of 256 is 16, so the l2 norm is around 16. Independent of std? why?

In [173]:
# now we can get the cossine similarity matrix. but first let's filter for the channels we care about
# channels_we_care_about = ["key", "ball", "unseen"]#, "empty", "green", "grey", "red"]
channels_we_care_about = ["unseen"]#, "empty", "green", "grey", "red"]
index_mask = [label in channels_we_care_about for label in channel_labels]
print(sum(index_mask)) # 7*7*7 = 343 channels

49


In [174]:
restricted_embeddings = embedding[index_mask]
restricted_labels = [label for label, mask in zip(index_labels, index_mask) if mask]
print(restricted_embeddings.shape)
print(len(restricted_labels))
cosine_similarity_matrix = cosine_similarity(restricted_embeddings)

torch.Size([49, 256])
49


In [175]:
px.ecdf(np.abs(cosine_similarity_matrix.flatten()))

In [176]:
# get cosine similarity matrix
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd


df = pd.DataFrame(cosine_similarity_matrix, columns=restricted_labels, index=restricted_labels)
print(cosine_similarity_matrix.shape)

# plot the cosine similarity matrix
fig = fig = px.imshow(
        df,
        color_continuous_scale="RdBu",
        # title="Pairwise Cosine Similarity Heatmap",
        color_continuous_midpoint=0.0,
        labels={"color": "Cosine Similarity"},
    )
fig.update_xaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=restricted_labels,
    showgrid=False,
)
fig.update_yaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=restricted_labels,
    showgrid=False,
)
fig.update_xaxes(
    visible=False,
)
fig.update_yaxes(
    visible=False,
)

# hide legend
fig.update_layout(showlegend=False)
# make it much larger
fig.update_layout(
    autosize=False,
    width=1000,
    height=1000,
)

# hide colorbar
fig.update_layout(coloraxis_showscale=False)

fig.show()

(49, 49)


In [177]:
random_cosine_similarity_matrix = cosine_similarity(random_embedding[:343])
random_cosine_similarity_matrix_df = pd.DataFrame(random_cosine_similarity_matrix, columns=restricted_labels, index=restricted_labels)


# plot the cosine similarity matrix
fig = px.imshow(
        random_cosine_similarity_matrix_df,
        color_continuous_scale="RdBu",
        title="Random Pairwise Cosine Similarity Heatmap",
        color_continuous_midpoint=0.0,
        labels={"color": "Cosine Similarity"},
    )
fig.update_xaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=restricted_labels,
    showgrid=False,
)
fig.update_yaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=restricted_labels,
    showgrid=False,
)
fig.update_xaxes(
    visible=False,
)
fig.update_yaxes(
    visible=False,
)
fig.show()

ValueError: Shape of passed values is (343, 343), indices imply (49, 49)

In [178]:
# flatten the cosine similarity matrix and plot the distribution
# it's a pandas dataframe so we can go wide to long
cosine_similarity_matrix = pd.melt(df, ignore_index=False).reset_index()
# rename the columns
cosine_similarity_matrix.columns = ["channel_1", "channel_2", "cosine_similarity"]
# remove the diagonal
cosine_similarity_matrix = cosine_similarity_matrix[cosine_similarity_matrix["channel_1"] != cosine_similarity_matrix["channel_2"]]
# remove any values less than 0.05
cosine_similarity_matrix = cosine_similarity_matrix[cosine_similarity_matrix["cosine_similarity"].abs() > 0.05]
# remove any values equal to 1
# cosine_similarity_matrix = cosine_similarity_matrix[cosine_similarity_matrix["cosine_similarity"] != 1]
# cosine_similarity_matrix = cosine_similarity_matrix[cosine_similarity_matrix != 0]


# add a column for if the channels are the same or not
cosine_similarity_matrix["same_channel"] = cosine_similarity_matrix["channel_1"].str.split(",").str[0] == cosine_similarity_matrix["channel_2"].str.split(",").str[0]

# add a column for if the positions are the same or not
cosine_similarity_matrix["same_position"] = cosine_similarity_matrix["channel_1"].str.split("(").str[1] == cosine_similarity_matrix["channel_2"].str.split("(").str[1]

# combine the two so we can have different rows for each combination
cosine_similarity_matrix["category"] = cosine_similarity_matrix["same_channel"].astype(str) + ", " + cosine_similarity_matrix["same_position"].astype(str)

# merge df_norms to get l2 norm of either vector
cosine_similarity_matrix = cosine_similarity_matrix.merge(df_norms, left_on="channel_1", right_on="Index")
cosine_similarity_matrix = cosine_similarity_matrix.merge(df_norms, left_on="channel_2", right_on="Index")
cosine_similarity_matrix = cosine_similarity_matrix.drop(columns=["Index_x", "Index_y"])
# rename the columns
cosine_similarity_matrix.columns = ["channel_1", "channel_2", "cosine_similarity", "same_channel", "same_position", "category", "l2_norm_1", "l2_norm_2"]

fig = px.strip(
    cosine_similarity_matrix,
    x="cosine_similarity",
    title="Embedding Cosine Similarity Distribution by Channel/Position Parity",
    hover_data=["channel_1", "channel_2", "l2_norm_1", "l2_norm_2"],
    color="category",
    labels={"category": "Same Channel, Same Position"},
    template="plotly_dark",
)
fig.show()

# fig = px.strip(
#     cosine_similarity_matrix,
#     x="cosine_similarity",
#     title="Cosine Similarity Distribution",
#     hover_data=["channel_1", "channel_2"],
#     color="same_position",
#     template="plotly_dark",
# )
# fig.show()

Observations:
- Key/Balls representations are interesting. Specific positions appear to be highly aligned. 
- Correlated observations tend to be aligned. 

In [179]:
# get cosine similarity matrix
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from scipy.cluster import hierarchy
import numpy as np 

df = pd.DataFrame(cosine_similarity(restricted_embeddings), columns=restricted_labels, index=restricted_labels)

data_array = df.to_numpy()
linkage = hierarchy.linkage(data_array)
dendrogram = hierarchy.dendrogram(
    linkage, no_plot=True, color_threshold=-np.inf
)
reordered_ind = dendrogram["leaves"]
# reorder df by ind
df = df.iloc[reordered_ind, reordered_ind]
# data_array = df.to_numpy()

# plot the cosine similarity matrix
fig = fig = px.imshow(
        df,
        color_continuous_scale="RdBu",
        title="Reordered - Pairwise Cosine Similarity Heatmap",
        color_continuous_midpoint=0.0,
        labels={"color": "Cosine Similarity"},
    )
fig.update_xaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=restricted_labels,
    showgrid=False,
)
fig.update_yaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=restricted_labels,
    showgrid=False,
)
fig.update_xaxes(
    visible=False,
)
fig.update_yaxes(
    visible=False,
)
fig.show()

In [180]:
# the most pandas idiomatic way to filter for a string is to use .str.contains()
# this will return a boolean series that we can use to filter the dataframe
# we can also use the ~ operator to negate the boolean series
# this will return all rows that do not contain the string

embedding_string = "ball, (1,2)"
criteria_one = cosine_similarity_matrix["channel_1"].str.contains(embedding_string, regex=False) | \
    cosine_similarity_matrix["channel_2"].str.contains(embedding_string, regex=False)
print(sum(criteria_one))
criteria_two = cosine_similarity_matrix["cosine_similarity"].abs() > 0.5
print(sum(criteria_two))
mask = criteria_one & criteria_two
print(sum(mask))
cosine_similarity_matrix[mask].sort_values(by="cosine_similarity", ascending=False)


0
32
0


Unnamed: 0,channel_1,channel_2,cosine_similarity,same_channel,same_position,category,l2_norm_1,l2_norm_2


In [181]:
# plot histogram of cosine similarity values
fig = px.strip(
    cosine_similarity_matrix[mask],
    x="cosine_similarity",
    title="Cosine Similarity Distribution",
    template="plotly_dark",
    hover_data=["channel_1", "channel_2", "l2_norm_1", "l2_norm_2"],
)
fig.show()

# Compositionality Experiment

What would be useful here is to pick an embedding and then get back everything that has a high norm with that embedding and to have that highlighted on the graph. 



In [61]:
# vector arithmetic experiment.

# get embedding vectors for ball, (5,2), ball (5,6), key (5,2), key (5,6)
ball_5_2 = restricted_embeddings[restricted_labels.index("ball, (5,2)")] # ball right, agent at start
ball_5_6 = restricted_embeddings[restricted_labels.index("ball, (5,6)")] # ball right, agent at end
key_5_2 = restricted_embeddings[restricted_labels.index("key, (5,2)")] # key right, agent at start
key_5_6 = restricted_embeddings[restricted_labels.index("key, (5,6)")] # key right, agent at end
ball_1_2 = restricted_embeddings[restricted_labels.index("ball, (1,2)")] # ball left, agent at start
ball_1_6 = restricted_embeddings[restricted_labels.index("ball, (1,6)")] # ball left, agent at end
key_1_2 = restricted_embeddings[restricted_labels.index("key, (1,2)")] # key left, agent at start
key_1_6 = restricted_embeddings[restricted_labels.index("key, (1,6)")] # key left, agent at end

# print magnitude of each vector
for vector in [ball_5_2, ball_5_6, key_5_2, key_5_6, ball_1_2, ball_1_6, key_1_2, key_1_6]:
    print(torch.norm(vector).item(), end = ", ")

# hypothesis is that key, (5,2) - key, (5,6) = ball, (5,2) - ball, (5,6)
start_minus_end_hyp_1 = ball_5_2 - ball_5_6 # start not end via ball on right
start_minus_end_hyp_2 = key_5_2 - key_5_6 # start not end via key on right
start_minus_end_hyp_3 = ball_1_2 - ball_1_6 # start not end via ball on left
start_minus_end_hyp_4 = key_1_2 - key_1_6 # start not end via key on left

# These are a positive control, there shouldn't be any semantic differences. 
ball_right_hyp_1 = ball_5_2 - key_5_2 # ball right via right position at start
ball_right_hyp_2 = key_1_2 - ball_1_2 # ball right via left position at start
ball_right_hyp_3 = ball_5_6 - key_5_6 # ball right via right position at end
ball_right_hyp_4 = key_1_6 - ball_1_6 # ball right via left position at end


# get the cosine similarity between each combination of vectors, concatenate and then do cosine similarity
# between the concatenated vectors
hypotheses = torch.stack([start_minus_end_hyp_1, start_minus_end_hyp_2, start_minus_end_hyp_3, start_minus_end_hyp_4,
    ball_right_hyp_1, ball_right_hyp_2, ball_right_hyp_3, ball_right_hyp_4], dim=0)
x_labels = [
    "ball, (5,2) - ball, (5,6)", 
    "key, (5,2) - key, (5,6)",
    "ball, (1,2) - ball, (1,6)", 
    "key, (1,2) - key, (1,6)",
    "ball, (5,2) - key, (5,2)", 
    "key, (1,2) - ball, (1,2)", 
    "ball, (5,6) - key, (5,6)", 
    "key, (1,6) - ball, (1,6)"]
y_labels = [
    "Start - End (Ball, Right)",
    "Start - End (Key, Right)",
    "Start - End (Ball, Left)",
    "Start - End (Key, Left)",
    "Ball Right (Right, Start))",
    "Ball Right (Left, Start))",
    "Ball Right (Right, End)",
    "Ball Right (Left, End)",
]
# make a bar chart of the magnitude of the vectors
hypotheses_magnitude = torch.norm(hypotheses, dim=1)
fig = px.bar(
    x=list(range(len(hypotheses_magnitude))),
    y=hypotheses_magnitude,
    labels = {"x": "Hypothesis", "y": "Magnitude"},
    title="Magnitude of Difference Vectors",
    template="plotly_dark",
    text=y_labels,
)
fig.show()


fig = px.imshow(cosine_similarity(hypotheses, hypotheses),
    color_continuous_scale="RdBu",
    color_continuous_midpoint=0.0,
    title="Cosine Similarity Between Hypotheses",
    labels={"color": "Cosine Similarity"},
    x=y_labels,
    y=y_labels,
    template="plotly_dark",
)
fig.update_xaxes(
    tickmode="array",
    tickvals=list(range(len(x_labels))),
    ticktext=y_labels,
)
fig.update_yaxes(
    tickmode="array",
    tickvals=list(range(len(y_labels))),
    ticktext=y_labels,
)
fig.show()

1.0940918922424316, 2.064030647277832, 1.088807463645935, 1.314337134361267, 1.1246469020843506, 0.8653411269187927, 1.0730576515197754, 1.5535975694656372, 

In [62]:
data_array  = cosine_similarity(hypotheses, hypotheses)
linkage = hierarchy.linkage(data_array)
dendrogram = hierarchy.dendrogram(
    linkage, no_plot=True, color_threshold=-np.inf
)
reordered_ind = dendrogram["leaves"]

# reorder the hypotheses
hypotheses = hypotheses[reordered_ind]
y_labels = [y_labels[i] for i in reordered_ind]
x_labels = [x_labels[i] for i in reordered_ind]



fig = px.imshow(cosine_similarity(hypotheses, hypotheses),
    color_continuous_scale="RdBu",
    color_continuous_midpoint=0.0,
    title="Cosine Similarity Between Hypotheses",
    labels={"color": "Cosine Similarity"},
    x=x_labels,
    y=x_labels,
    template="plotly_dark",
)
fig.update_xaxes(
    tickmode="array",
    tickvals=list(range(len(x_labels))),
    ticktext=x_labels,
)
fig.update_yaxes(
    tickmode="array",
    tickvals=list(range(len(y_labels))),
    ticktext=x_labels,
)
fig.show()

In [63]:
fig = px.imshow(cosine_similarity(hypotheses, hypotheses),
    color_continuous_scale="RdBu",
    color_continuous_midpoint=0.0,
    title="Cosine Similarity Between Hypotheses",
    labels={"color": "Cosine Similarity"},
    x=y_labels,
    y=y_labels,
    template="plotly_dark",
)
fig.update_xaxes(
    tickmode="array",
    tickvals=list(range(len(y_labels))),
    ticktext=y_labels,
)
fig.update_yaxes(
    tickmode="array",
    tickvals=list(range(len(y_labels))),
    ticktext=y_labels,
)
fig.show()

Maybe we skipped ahead. Let's get a cluster of strong pos vs negative similarity, plot/cluster it. 

In [72]:
embedding_string = "ball, (1,2)"
criteria_one = cosine_similarity_matrix["channel_1"].str.contains(embedding_string, regex=False) | \
    cosine_similarity_matrix["channel_2"].str.contains(embedding_string, regex=False)
print(sum(criteria_one))
criteria_two = cosine_similarity_matrix["cosine_similarity"].abs() > 0.5
print(sum(criteria_two))
mask = criteria_one & criteria_two
print(sum(mask))
masked_matrix = cosine_similarity_matrix[mask].sort_values(by="cosine_similarity", ascending=False)

# get all unique channel_1 and channel_2 values and put them in a list
vocab_items = list(set(list(masked_matrix["channel_1"].unique()) + list(masked_matrix["channel_2"].unique())))

index_mask = [True if item in vocab_items else False for item in index_labels]
print(sum(index_mask))
print(len(index_mask))
restricted_embeddings = embedding[index_mask]
print(restricted_embeddings.shape)
restricted_labels = [label for label, mask in zip(index_labels, index_mask) if mask]
len(restricted_labels)



df = pd.DataFrame(cosine_similarity(restricted_embeddings), columns=restricted_labels, index=restricted_labels)
data_array = df.to_numpy()
linkage = hierarchy.linkage(data_array)
dendrogram = hierarchy.dendrogram(
    linkage, no_plot=True, color_threshold=-np.inf
)
reordered_ind = dendrogram["leaves"]
# reorder df by ind
df = df.iloc[reordered_ind, reordered_ind]
# data_array = df.to_numpy()

# plot the cosine similarity matrix
fig = fig = px.imshow(
        df,
        color_continuous_scale="RdBu",
        title="Reordered - Pairwise Cosine Similarity Heatmap",
        color_continuous_midpoint=0.0,
        labels={"color": "Cosine Similarity"},
    )
fig.update_xaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=df.columns,
    showgrid=False,
)
fig.update_yaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=df.columns,
    showgrid=False,
)
# fig.update_xaxes(
#     visible=False,
# )
# fig.update_yaxes(
#     visible=False,
# )
fig.show()

392
522
6
4
980
torch.Size([4, 256])


In [128]:
# get all unique channel_1 and channel_2 values and put them in a list

#target vocab items
vocab_items = ['key, (1,2)', 'ball, (1,2)', 'key, (5,2)', 'ball, (5,2)'] + ['ball, (5,6)', 'key, (5,6)', 'key, (1,6)', 'ball, (1,6)']

# instruction vocab items
# vocab_items = ['key, (2,6)', 'ball, (2,6)', 'key, (0,5)', 'ball, (0,5)', 'key, (4,2)', 'ball, (4,2)']
index_mask = [True if item in vocab_items else False for item in index_labels]
restricted_embeddings = embedding[index_mask]
restricted_labels = [label for label, mask in zip(index_labels, index_mask) if mask]


df = pd.DataFrame(cosine_similarity(restricted_embeddings), columns=restricted_labels, index=restricted_labels)
data_array = df.to_numpy()
linkage = hierarchy.linkage(data_array)
dendrogram = hierarchy.dendrogram(
    linkage, no_plot=True, color_threshold=-np.inf
)
reordered_ind = dendrogram["leaves"]
# reorder df by ind
df = df.iloc[reordered_ind, reordered_ind]
# data_array = df.to_numpy()

# plot the cosine similarity matrix
fig = fig = px.imshow(
        df,
        color_continuous_scale="RdBu",
        # title="Reordered - Pairwise Cosine Similarity Heatmap",
        color_continuous_midpoint=0.0,
        labels={"color": "Cosine Similarity"},
    )
fig.update_xaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=df.columns,
    showgrid=False,
)
fig.update_yaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=df.columns,
    showgrid=False,
)

# Remove the colorbar
fig.update_layout(coloraxis_showscale=False)

fig.show()

In [165]:
# flatten the cosine similarity matrix and plot the distribution
# it's a pandas dataframe so we can go wide to long
cosine_similarity_matrix = pd.melt(df, ignore_index=False).reset_index()

# remove the second half of rows
cosine_similarity_matrix = cosine_similarity_matrix[cosine_similarity_matrix["index"] < cosine_similarity_matrix["variable"]]

# rename the columns
cosine_similarity_matrix.columns = ["channel_1", "channel_2", "cosine_similarity"]
# remove the diagonal
cosine_similarity_matrix = cosine_similarity_matrix[cosine_similarity_matrix["channel_1"] != cosine_similarity_matrix["channel_2"]]
# remove any values less than 0.05
cosine_similarity_matrix = cosine_similarity_matrix[cosine_similarity_matrix["cosine_similarity"].abs() > 0.05]
# remove any values equal to 1
# cosine_similarity_matrix = cosine_similarity_matrix[cosine_similarity_matrix["cosine_similarity"] != 1]
# cosine_similarity_matrix = cosine_similarity_matrix[cosine_similarity_matrix != 0]

# add a column for if the channels are the same or not
# cosine_similarity_matrix["same_channel"] = cosine_similarity_matrix["channel_1"].str.split(",").str[0] == cosine_similarity_matrix["channel_2"].str.split(",").str[0]

# # add a column for if the positions are the same or not
# cosine_similarity_matrix["same_position"] = cosine_similarity_matrix["channel_1"].str.split("(").str[1] == cosine_similarity_matrix["channel_2"].str.split("(").str[1]

# # combine the two so we can have different rows for each combination
# cosine_similarity_matrix["category"] = cosine_similarity_matrix["same_channel"].astype(str) + ", " + cosine_similarity_matrix["same_position"].astype(str)

# add a column for if the positions are the same or not
cosine_similarity_matrix["corridoor_position_1"] =  cosine_similarity_matrix["channel_1"].str.contains("6").astype(str)
cosine_similarity_matrix["corridoor_position_2"] =  cosine_similarity_matrix["channel_2"].str.contains("6").astype(str)
cosine_similarity_matrix["matching_agent_position"] = cosine_similarity_matrix["corridoor_position_1"] == cosine_similarity_matrix["corridoor_position_2"]

# add a column for the same value of B
cosine_similarity_matrix["value_B_1"] = cosine_similarity_matrix["channel_1"].str.contains("ball, (5", regex=False) | \
                                            cosine_similarity_matrix["channel_1"].str.contains("ball, (1", regex=False)
cosine_similarity_matrix["value_B_2"] = cosine_similarity_matrix["channel_2"].str.contains("ball, (5", regex=False) | \
                                            cosine_similarity_matrix["channel_2"].str.contains("ball, (1", regex=False)
cosine_similarity_matrix["matching_value_B"] = cosine_similarity_matrix["value_B_1"] == cosine_similarity_matrix["value_B_2"]

cosine_similarity_matrix["category"] = cosine_similarity_matrix["matching_agent_position"].astype(str) + ", " + cosine_similarity_matrix["matching_value_B"].astype(str)
# cosine_similarity_matrix
# merge df_norms to get l2 norm of either vector
# cosine_similarity_matrix = cosine_similarity_matrix.merge(df_norms, left_on="channel_1", right_on="Index")
# cosine_similarity_matrix = cosine_similarity_matrix.merge(df_norms, left_on="channel_2", right_on="Index")
# cosine_similarity_matrix = cosine_similarity_matrix.drop(columns=["Index_x", "Index_y"])
# rename the columns
# cosine_similarity_matrix.columns = ["channel_1", "channel_2", "cosine_similarity", "same_channel", "same_position", "category", "l2_norm_1", "l2_norm_2"]

fig = px.strip(
    cosine_similarity_matrix,
    y="cosine_similarity",
    x = "corridoor_position_1",
    color="category",
    title="Embedding Cosine Similarity Distribution by Channel/Position Parity",
    hover_data=["channel_1", "channel_2"],
    # color="category",
    # labels={"category": "Same Channel, Same Position"},
    # template="plotly_dark",
    orientation="v",
)
fig.show()


In [172]:
px.strip(cosine_similarity_matrix, 
         y=cosine_similarity_matrix["cosine_similarity"].abs(), 
        #  x = "corridoor_position_1", 
         color= cosine_similarity_matrix["cosine_similarity"] > 0,
         title="Embedding Cosine Similarity Distribution by Channel/Position Parity", 
         hover_data=["channel_1", "channel_2"], orientation="v")

In [65]:
embedding_string = "ball, (1,6)"
criteria_one = cosine_similarity_matrix["channel_1"].str.contains(embedding_string, regex=False) | \
    cosine_similarity_matrix["channel_2"].str.contains(embedding_string, regex=False)
print(sum(criteria_one))
criteria_two = cosine_similarity_matrix["cosine_similarity"].abs() > 0.5
print(sum(criteria_two))
mask = criteria_one & criteria_two
print(sum(mask))
cosine_similarity_matrix[mask].sort_values(by="cosine_similarity", ascending=False)


402
522
6


Unnamed: 0,channel_1,channel_2,cosine_similarity,same_channel,same_position,category,l2_norm_1,l2_norm_2
13066,"key, (5,6)","ball, (1,6)",0.698591,False,False,"False, False",1.314337,0.865341
33576,"ball, (1,6)","key, (5,6)",0.698591,False,False,"False, False",0.865341,1.314337
12944,"ball, (5,6)","ball, (1,6)",-0.532309,True,False,"True, False",2.06403,0.865341
35823,"ball, (1,6)","ball, (5,6)",-0.532309,True,False,"True, False",0.865341,2.06403
12925,"key, (1,6)","ball, (1,6)",-0.582975,False,True,"False, True",1.553598,0.865341
46349,"ball, (1,6)","key, (1,6)",-0.582975,False,True,"False, True",0.865341,1.553598


In [103]:
embedding_string = "ball, (1,6)"
criteria_one = cosine_similarity_matrix["channel_1"].str.contains(embedding_string, regex=False) | \
    cosine_similarity_matrix["channel_2"].str.contains(embedding_string, regex=False)
print(sum(criteria_one))
criteria_two = cosine_similarity_matrix["cosine_similarity"].abs() > 0.5
print(sum(criteria_two))
mask = criteria_one & criteria_two
print(sum(mask))
masked_matrix = cosine_similarity_matrix[mask].sort_values(by="cosine_similarity", ascending=False)

# get all unique channel_1 and channel_2 values and put them in a list
vocab_items = list(set(list(masked_matrix["channel_1"].unique()) + list(masked_matrix["channel_2"].unique())))
print((vocab_items))
index_mask = [True if item in vocab_items else False for item in index_labels]
print(sum(index_mask))
print(len(index_mask))
restricted_embeddings = embedding[index_mask]
print(restricted_embeddings.shape)
restricted_labels = [label for label, mask in zip(index_labels, index_mask) if mask]
len(restricted_labels)



df = pd.DataFrame(cosine_similarity(restricted_embeddings), columns=restricted_labels, index=restricted_labels)
data_array = df.to_numpy()
linkage = hierarchy.linkage(data_array)
dendrogram = hierarchy.dendrogram(
    linkage, no_plot=True, color_threshold=-np.inf
)
reordered_ind = dendrogram["leaves"]
# reorder df by ind
df = df.iloc[reordered_ind, reordered_ind]
# data_array = df.to_numpy()

# plot the cosine similarity matrix
fig = fig = px.imshow(
        df,
        color_continuous_scale="RdBu",
        title="Reordered - Pairwise Cosine Similarity Heatmap",
        color_continuous_midpoint=0.0,
        labels={"color": "Cosine Similarity"},
    )
fig.update_xaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=df.columns,
    showgrid=False,
)
fig.update_yaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=df.columns,
    showgrid=False,
)
# fig.update_xaxes(
#     visible=False,
# )
# fig.update_yaxes(
#     visible=False,
# )
fig.show()

112
362
6
['ball, (5,6)', 'key, (5,6)', 'key, (1,6)', 'ball, (1,6)']
4
980
torch.Size([4, 256])


In [107]:
embedding_string = "ball, (2,6)"
criteria_one = cosine_similarity_matrix["channel_1"].str.contains(embedding_string, regex=False) | \
    cosine_similarity_matrix["channel_2"].str.contains(embedding_string, regex=False)
print(sum(criteria_one))
criteria_two = cosine_similarity_matrix["cosine_similarity"].abs() > 0.5
print(sum(criteria_two))
mask = criteria_one & criteria_two
print(sum(mask))
masked_matrix = cosine_similarity_matrix[mask].sort_values(by="cosine_similarity", ascending=False)

# get all unique channel_1 and channel_2 values and put them in a list
vocab_items = list(set(list(masked_matrix["channel_1"].unique()) + list(masked_matrix["channel_2"].unique())))

index_mask = [True if item in vocab_items else False for item in index_labels]
print(sum(index_mask))
print(len(index_mask))
restricted_embeddings = embedding[index_mask]
print(restricted_embeddings.shape)
restricted_labels = [label for label, mask in zip(index_labels, index_mask) if mask]
len(restricted_labels)



df = pd.DataFrame(cosine_similarity(restricted_embeddings), columns=restricted_labels, index=restricted_labels)
data_array = df.to_numpy()
linkage = hierarchy.linkage(data_array)
dendrogram = hierarchy.dendrogram(
    linkage, no_plot=True, color_threshold=-np.inf
)
reordered_ind = dendrogram["leaves"]
# reorder df by ind
df = df.iloc[reordered_ind, reordered_ind]
# data_array = df.to_numpy()

# plot the cosine similarity matrix
fig = fig = px.imshow(
        df,
        color_continuous_scale="RdBu",
        title="Reordered - Pairwise Cosine Similarity Heatmap",
        color_continuous_midpoint=0.0,
        labels={"color": "Cosine Similarity"},
    )
fig.update_xaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=df.columns,
    showgrid=False,
)
fig.update_yaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=df.columns,
    showgrid=False,
)
# fig.update_xaxes(
#     visible=False,
# )
# fig.update_yaxes(
#     visible=False,
# )
fig.show()

134
362
24
13
980
torch.Size([13, 256])


In [183]:
embedding_string = "unseen, (4,6)"
criteria_one = cosine_similarity_matrix["channel_1"].str.contains(embedding_string, regex=False) | \
    cosine_similarity_matrix["channel_2"].str.contains(embedding_string, regex=False)
print(sum(criteria_one))
criteria_two = cosine_similarity_matrix["cosine_similarity"].abs() > 0.5
print(sum(criteria_two))
mask = criteria_one & criteria_two
print(sum(mask))
masked_matrix = cosine_similarity_matrix[mask].sort_values(by="cosine_similarity", ascending=False)

# get all unique channel_1 and channel_2 values and put them in a list
vocab_items = list(set(list(masked_matrix["channel_1"].unique()) + list(masked_matrix["channel_2"].unique())))

index_mask = [True if item in vocab_items else False for item in index_labels]
print(sum(index_mask))
print(len(index_mask))
restricted_embeddings = embedding[index_mask]
print(restricted_embeddings.shape)
restricted_labels = [label for label, mask in zip(index_labels, index_mask) if mask]
len(restricted_labels)



df = pd.DataFrame(cosine_similarity(restricted_embeddings), columns=restricted_labels, index=restricted_labels)
data_array = df.to_numpy()
linkage = hierarchy.linkage(data_array)
dendrogram = hierarchy.dendrogram(
    linkage, no_plot=True, color_threshold=-np.inf
)
reordered_ind = dendrogram["leaves"]
# reorder df by ind
df = df.iloc[reordered_ind, reordered_ind]
# data_array = df.to_numpy()

# plot the cosine similarity matrix
fig = fig = px.imshow(
        df,
        color_continuous_scale="RdBu",
        color_continuous_midpoint=0.0,
        labels={"color": "Cosine Similarity"},
    )
fig.update_xaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=df.columns,
    showgrid=False,
)
fig.update_yaxes(
    tickmode="array",
    tickvals=list(range(len(restricted_labels))),
    ticktext=df.columns,
    showgrid=False,
)

# remove colorbar
fig.update_layout(
    coloraxis_showscale=False
)

fig.show()

72
32
10
6
980
torch.Size([6, 256])


# RTG Embedding

In [25]:
dt.reward_embedding[0].weight.reshape(1, -1).norm(dim=1)

tensor([0.7832], grad_fn=<NormBackward1>)

# scratch pad while reviewing Lucy's work

In [26]:
in_bias = dt.transformer.blocks[2].mlp.b_in.detach()

# make a boolean vector for color and set it to 0 except at position 79, 132, 235, 255, 1, 108, 132, 158, 169 and 204
color_vector = np.zeros(len(in_bias))
color_vector[[79, 132, 235, 255, 1, 108, 132, 158, 169, 204]] = 1
color_vector = color_vector.astype(bool)
px.scatter(y=in_bias, x=range(len(in_bias)), color=color_vector)



In [27]:
weight_norm = dt.transformer.blocks[2].mlp.W_in.norm(dim=1).detach()
px.scatter(y=weight_norm, x=range(len(weight_norm)), color=color_vector)