In [None]:
%load_ext autoreload 
%autoreload 2

In [None]:
from collections import defaultdict
import copy
import json
import glob
import os
import pickle
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [None]:
random.seed(42)
KNN_GRAPHS_DIR = "../results/knn_graphs/"

In [None]:
knn_graphs_data = defaultdict(dict)
for fname in tqdm(glob.glob(f"{KNN_GRAPHS_DIR}/*-new.pkl")):
    with open(fname, "rb") as f:
        knn_data = pickle.load(f)
    model_name = knn_data["model_name"]
    relative_layers = sorted(knn_data["knn_graphs"].keys())
    for layer_frac, relative_layer_idx in zip([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], relative_layers):
        knn_graphs_data[model_name][layer_frac] = knn_data["knn_graphs"][relative_layer_idx]


In [None]:
knn_graphs_data

In [None]:
pairwise_model_data = defaultdict(list)
count = 0
for layer_frac in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    for model_a in knn_graphs_data.keys():
        for model_b in knn_graphs_data.keys():
            intersection_graph = knn_graphs_data[model_a][layer_frac].multiply(knn_graphs_data[model_b][layer_frac])
            union_graph = knn_graphs_data[model_a][layer_frac] + knn_graphs_data[model_b][layer_frac]
            iou = intersection_graph.nnz / union_graph.nnz
            pairwise_model_data["layer_frac"].append(layer_frac)
            pairwise_model_data["model_a"].append(model_a)
            pairwise_model_data["model_b"].append(model_b)
            pairwise_model_data["knn_iou"].append(iou)
            print(layer_frac, model_a, model_b, intersection_graph.nnz, union_graph.nnz, iou)

In [None]:
pairwise_model_df = pd.DataFrame.from_dict(pairwise_model_data)

In [None]:
pairwise_model_df

In [None]:

similarity_df = pairwise_model_df[pairwise_model_df["layer_frac"] == 0.8].pivot(index="model_a", columns="model_b", values="knn_iou")

from sklearn.manifold import MDS
from sklearn.decomposition import PCA

# Convert similarity to dissimilarity for MDS
dissimilarity_matrix = 1 - similarity_df.values

# Apply MDS
mds = MDS(n_components=2, dissimilarity='precomputed', random_state=42)
mds_embedding = mds.fit_transform(dissimilarity_matrix)

# Create a DataFrame for the 2D embedding
embedding_df = pd.DataFrame(mds_embedding, columns=['Component_1', 'Component_2'], index=similarity_df.index)

embedding_df.keys()


In [None]:
embedding_df.to_json()

In [None]:
plt.figure(figsize=(10, 8)) # Optional: set figure size
plt.xlim(-0.6, 0.8)
plt.ylim(-0.8, 0.8)
sns.scatterplot(data=embedding_df, x='Component_1', y='Component_2')

for index, row in embedding_df.iterrows():
    plt.text(x=row['Component_1'] + 0.02, y=row['Component_2'] + 0.02, s=index, 
             fontdict=dict(color='black', size=9),
             bbox=dict(facecolor='white', alpha=0.5, edgecolor='none'))


In [None]:
sns.heatmap(pairwise_model_df[(pairwise_model_df["model_a"] != pairwise_model_df["model_b"]) & (pairwise_model_df["layer_frac"] == 1.0)].pivot(index="model_a", columns="model_b", values="knn_iou"), vmin=0.2, vmax=0.7)
plt.xlabel('')
plt.ylabel('')
plt.savefig("model_similarity-1.0.pdf", format="pdf", bbox_inches="tight")


In [None]:
pairwise_model_df

In [None]:
layer_frac_dfs = []
for layer_frac in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    layer_frac_dfs.append(pairwise_model_df[pairwise_model_df["layer_frac"] == layer_frac].sort_values(
        by=["model_a", "model_b"], ascending=[True, True]).reset_index(drop=True))
layer_frac_dfs

In [None]:
with open("pairwise_knn_layer_dfs.pkl", "wb") as f:
    pickle.dump(layer_frac_dfs, f)

In [None]:
pairwise_model_df

In [None]:
sns.heatmap(pairwise_model_df[pairwise_model_df["layer_frac"] == 0.8].pivot(index="model_a", columns="model_b", values="knn_iou"))

In [None]:

sns.heatmap(pairwise_model_df[pairwise_model_df["layer_frac"] == 0.2].pivot(index="model_a", columns="model_b", values="knn_iou"))

In [None]:

sns.heatmap(pairwise_model_df[pairwise_model_df["layer_frac"] == 0.3].pivot(index="model_a", columns="model_b", values="knn_iou"))

In [None]:

sns.heatmap(pairwise_model_df[pairwise_model_df["layer_frac"] == 0.4].pivot(index="model_a", columns="model_b", values="knn_iou"))

In [None]:

sns.heatmap(pairwise_model_df[pairwise_model_df["layer_frac"] == 0.5].pivot(index="model_a", columns="model_b", values="knn_iou"))

In [None]:

sns.heatmap(pairwise_model_df[pairwise_model_df["layer_frac"] == 0.6].pivot(index="model_a", columns="model_b", values="knn_iou"))

In [None]:

sns.heatmap(pairwise_model_df[pairwise_model_df["layer_frac"] == 0.7].pivot(index="model_a", columns="model_b", values="knn_iou"))

In [None]:

sns.heatmap(pairwise_model_df[pairwise_model_df["layer_frac"] == 0.8].pivot(index="model_a", columns="model_b", values="knn_iou"))

In [None]:

sns.heatmap(pairwise_model_df[pairwise_model_df["layer_frac"] == 0.9].pivot(index="model_a", columns="model_b", values="knn_iou"))

In [None]:

sns.heatmap(pairwise_model_df[pairwise_model_df["layer_frac"] == 1.0].pivot(index="model_a", columns="model_b", values="knn_iou"))