In [1]:
# uncomment on Colab
#!pip install datasets sklearn torchinfo

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import scipy as sp
import torch
import torch.nn.functional as F

from datasets import load_dataset
from matplotlib.ticker import PercentFormatter
from sklearn.cluster import HDBSCAN
from sklearn.manifold import TSNE
from sklearn.metrics import calinski_harabasz_score, davies_bouldin_score, silhouette_score
from torchinfo import summary
from torch.nn import Identity
from tqdm import tqdm
from transformers import AutoConfig, AutoModel, AutoTokenizer

In [3]:
def stack_padded(ndarray_list, pad_value=0):
    assert ndarray_list  # not empty
    target_shape = [len(ndarray_list), *[max(a.shape[dim] for a in ndarray_list) for dim in range(ndarray_list[0].ndim)]]
    stacked_array = np.full(target_shape, fill_value=pad_value, dtype=ndarray_list[0].dtype)
    for i, a in enumerate(ndarray_list):
        stacked_array[i, *[slice(0, a.shape[dim]) for dim in range(a.ndim)]] = a
    return stacked_array

In [4]:
def build_model(model_id, random_params, no_dense_layers, num_hidden_layers):
    config = AutoConfig.from_pretrained(model_id, num_attention_heads=1)
    if num_hidden_layers:  # override depth (only works for models with shared params)
        config.num_hidden_layers = num_hidden_layers
    if random_params:  # initialise params randomly
        model = AutoModel.from_config(config)
    else:
        model = AutoModel.from_pretrained(model_id, config=config)
    if no_dense_layers:  # set dense layer params to zero
        disable_dense_layers(model)
    return model

In [5]:
def disable_dense_layers(model):
    model_type = model.config.model_type
    match model_type:
        case "albert":
            with torch.no_grad():
                model.encoder.albert_layer_groups[0].albert_layers[0].attention.dense.weight.fill_(0.0)
                model.encoder.albert_layer_groups[0].albert_layers[0].attention.dense.bias.fill_(0.0)
        case "bert":
            for i in range(len(model.encoder.layer)):
                with torch.no_grad():
                    model.encoder.layer[i].attention.output.dense.weight.fill_(0.0)
                    model.encoder.layer[i].attention.output.dense.bias.fill_(0.0)

In [6]:
def make_outdir(model, dataset, use_queries_and_keys, random_params, no_dense_layers):
    runid = f"{model.config._name_or_path}"
    runid += "_uniform-tokens" if dataset is None else "_" + dataset.info.dataset_name
    runid += "_⟨Qx,Ky⟩" if use_queries_and_keys else "_⟨x,y⟩"
    runid += "_random-params" if random_params else "_trained-params"
    runid += "_dense-off" if no_dense_layers else "_dense-on"
    runid += f"_{model.config.num_hidden_layers}-layers"
    outdir = f"rawsults/{runid}"
    os.makedirs(outdir, exist_ok=True)
    return outdir

In [7]:
def get_random_input(dataset, tokeniser):
    MIN_NUM_TOKENS = 100
    while True:
        if dataset is not None:
            index = torch.randint(low=0, high=len(dataset), size=(1,)).item()
            original_text = dataset[index]["text"]
        else:
            original_text = tokeniser.decode(torch.randint(tokeniser.vocab_size, (tokeniser.model_max_length,)))
        input = tokeniser(original_text, return_tensors="pt", truncation=True, return_attention_mask=False)
        if input.input_ids.shape[1] >= MIN_NUM_TOKENS:
            break
    tokenised_text = "(" + ")(".join(tokeniser.batch_decode(input.input_ids.squeeze())) + ")"
    return input, original_text, tokenised_text

In [8]:
def compute_correlations(hidden_states, queries, keys):
    corrs_list = []
    for X, query, key in zip(hidden_states, queries, keys):
        Q_unit = F.normalize(query(X), dim=1)  # shape: N x d'
        K_unit = F.normalize(key(X), dim=1)  # shape: N x d'
        similarities = torch.matmul(Q_unit, K_unit.transpose(0, 1))  # shape: N x N
        assert (similarities.abs() < 1 + 1e-5).all()
        corrs_list.append(similarities.detach())
    return corrs_list

In [9]:
def extract_queries_and_keys(model, replace_with_identity=False):
    num_layers = model.config.num_hidden_layers
    if replace_with_identity:
        return num_layers * [Identity()], num_layers * [Identity()]
    model_type = model.config.model_type
    match model_type:
        case "albert":
            Q = model.encoder.albert_layer_groups[0].albert_layers[0].attention.query
            K = model.encoder.albert_layer_groups[0].albert_layers[0].attention.key
            return num_layers * [Q], num_layers * [K]
        case "bert":
            queries = [l.attention.self.query for l in model.encoder.layer]
            keys = [l.attention.self.key for l in model.encoder.layer]
            return queries, keys
        case _:
            raise NotImplementedError("Unsupported model type", model_type)

In [10]:
def compute_clustering(hidden_states):
    clustereval_arrays = []
    label_arrays = []
    for X in hidden_states:
        # median_dist = np.median(pairwise_distances(layer_latent, metric=metric))
        # eps = median_dist * (3/4 if metric == "euclidean" else 2/3 if metric == "cosine" else None)
        # clustering = DBSCAN(eps=eps, min_samples=2, metric=metric).fit(layer_latent)
        X = X.numpy()
        clusterer = HDBSCAN(min_cluster_size=4)
        clustering = clusterer.fit(X)
        label_arrays.append(clustering.labels_)
        
        num_clusters = clustering.labels_.max() + 1
        outlier_rate = (clustering.labels_ == -1).sum() / len(clustering.labels_)
        try:
            sil_score = silhouette_score(X, clustering.labels_)
            cal_score = calinski_harabasz_score(X, clustering.labels_)
            dav_score = davies_bouldin_score(X, clustering.labels_)
        except ValueError:  # single-cluster case
            sil_score = cal_score = dav_score = float("nan")
        clustereval_arrays.append(np.array([num_clusters, outlier_rate, sil_score, cal_score, dav_score]))
    return clustereval_arrays, label_arrays

In [11]:
def compute_tsne_embeddings(hidden_states):
    return [TSNE(n_components=2, perplexity=5).fit_transform(X) for X in hidden_states]

In [12]:
def run_experiment(dataset, model_id, use_queries_and_keys=False, random_params=False, no_dense_layers=False,
                   num_hidden_layers=None, sample_size=10, num_bins=100):
    print("experiment params:")
    print("\n".join(map(lambda x: f"{x[0]}: {x[1]}", locals().items())))

    model = build_model(model_id, random_params, no_dense_layers, num_hidden_layers)
    tokeniser = AutoTokenizer.from_pretrained(model_id)
    outdir = make_outdir(model, dataset, use_queries_and_keys, random_params, no_dense_layers)
    
    overview_df = pd.DataFrame(columns=["original_text", "tokenised_text", "num_tokens"])
    results = {
        "histograms": [],
        "sim_matrices": []
    } | {
        f"{key}_{target}": []
        for key in ["clustereval", "labels", "tsne"] for target in ["X", "XX.T"]
    }
    
    for sample_idx in tqdm(range(sample_size), desc="Analysing each sample"):
        input, original_text, tokenised_text = get_random_input(dataset, tokeniser)
        overview_df.loc[sample_idx] = (original_text, tokenised_text, input.input_ids.numel())

        output = model(**input, output_hidden_states=True)
        hidden_states = [X.squeeze(0).clone().detach().requires_grad_(False) for X in output.hidden_states[1:]]
        queries, keys = extract_queries_and_keys(model, replace_with_identity=not use_queries_and_keys)
        sim_list = compute_correlations(hidden_states, queries, keys)

        # histograms
        results["histograms"].append(np.stack([
            np.histogram(correls.flatten(), bins=num_bins, range=(-1, 1), density=True)[0] 
            for correls in sim_list
        ]))
        
        # similarity matrices
        results["sim_matrices"].append(np.stack(sim_list).astype(np.float16))

        # clustering and T-SNE
        for target in ["X", "XX.T"]:
            data = hidden_states if target == "X" else sim_list
            clustereval_arrays, label_arrays = compute_clustering(data)
            tsne_arrays = compute_tsne_embeddings(data)
            results[f"clustereval_{target}"].append(np.stack(clustereval_arrays))
            results[f"labels_{target}"].append(np.stack(label_arrays))
            results[f"tsne_{target}"].append(np.stack(tsne_arrays))

    overview_df.to_csv(f"{outdir}/0verview.csv", index_label="sample_idx")  # (num_samples, 3 cols)
    for key, val in results.items():
        np.save(f"{outdir}/{key}.npy", stack_padded(val))

In [24]:
def plot_histograms(run_id, conf_level=0.99):
    outdir = f"visualisation/{run_id}"
    os.makedirs(outdir, exist_ok=True)

    count_tensor = np.load(f"rawsults/{run_id}/histograms.npy")
    count_mean = np.mean(count_tensor, axis=0)  # shape num_layer x num_bins
    t_score = sp.stats.t.ppf(q=1 - (1 - conf_level) / 2, df=count_tensor.shape[0] - 1)
    count_conf = t_score * np.std(count_tensor, axis=0) / np.sqrt(
        count_tensor.shape[0])  # t-distribution confidence deviation
    num_bins = count_tensor.shape[-1]
    max_density = count_tensor.max()

    plt.figure(figsize=(12, 16))
    plt.suptitle(f"cos similarity histograms\n({run_id})\n")
    num_layers = count_mean.shape[0]
    stride = num_layers // 24
    for i, layer in enumerate(range(stride, num_layers + stride, stride)):
        idx = layer - 1
        plt.subplot(6, 4, i + 1)
        plt.stairs(count_mean[idx], np.linspace(-1, 1, num_bins + 1))
        x = np.linspace(-1 + 1 / num_bins, 1 - 1 / num_bins, num_bins)
        plt.fill_between(x, count_mean[idx] - count_conf[idx], count_mean[idx] + count_conf[idx], alpha=0.5, step="mid")
        plt.xlim(-1, 1)
        plt.ylim(0, max_density)  # set a consistent y-axis limit
        plt.title(f"after layer {layer}")
    plt.tight_layout()
    plt.savefig(f"{outdir}/histograms.pdf")
    plt.close()

In [14]:
def plot_heatmaps(run_id):
    outdir = f"visualisation/{run_id}"
    os.makedirs(outdir, exist_ok=True)

    heatmap_tensor = np.load(f"rawsults/{run_id}/sim_matrices.npy")
    seq_lens = pd.read_csv(f"rawsults/{run_id}/0verview.csv", index_col=0)["num_tokens"]

    for sample in range(heatmap_tensor.shape[0]):
        plt.figure(figsize=(12, 16))
        plt.suptitle(f"cos similarity heatmaps\n({run_id})\n")
        num_layers = heatmap_tensor.shape[1]
        stride = num_layers // 24
        for i, layer in enumerate(range(stride, num_layers + stride, stride)):
            sim_matrix = heatmap_tensor[sample, layer - 1, :seq_lens[sample], :seq_lens[sample]]
            plt.subplot(6, 4, i + 1)
            plt.imshow(sim_matrix, cmap="coolwarm", vmin=-1, vmax=1)
            plt.colorbar(label="cosine similarity")
            plt.xlabel("token j")
            plt.ylabel("token i")
            plt.title(f"after layer {layer}")
        plt.tight_layout()
        plt.savefig(f"{outdir}/heatmaps_sample{sample}.pdf")
        plt.close()

In [15]:
def plot_tsne(run_id):
    for target in ["X", "XX.T"]:
        outdir = f"visualisation/{run_id}/tsne_{target}"
        os.makedirs(outdir, exist_ok=True)

        tsne_tensor = np.load(f"rawsults/{run_id}/tsne_{target}.npy")
        labels_tensor = np.load(f"rawsults/{run_id}/labels_{target}.npy")
        seq_lens = pd.read_csv(f"rawsults/{run_id}/0verview.csv", index_col=0)["num_tokens"]

        for sample in range(tsne_tensor.shape[0]):
            plt.figure(figsize=(12, 16))
            plt.suptitle(f"t-SNE visualisations\n({run_id})\n")
            num_layers = tsne_tensor.shape[1]
            stride = num_layers // 24
            for i, layer in enumerate(range(stride, num_layers + stride, stride)):
                embeds = tsne_tensor[sample, layer - 1, :seq_lens[sample]]
                labels = labels_tensor[sample, layer - 1, :seq_lens[sample]]
                plt.subplot(6, 4, i + 1)
                plt.scatter(embeds[:, 0], embeds[:, 1], s=1, c=labels, cmap="viridis")
                plt.title(f"after layer {layer}")
                plt.xlim(-100, 100)
                plt.ylim(-100, 100)
            plt.tight_layout()
            plt.savefig(f"{outdir}/tsne_sample{sample}.pdf")
            plt.close()

In [16]:
def plot_clustering(run_id):
    for target in ["X", "XX.T"]:
        outdir = f"visualisation/{run_id}/clustereval_{target}"
        os.makedirs(outdir, exist_ok=True)
        
        clustereval_tensor = np.load(f"rawsults/{run_id}/clustereval_{target}.npy")
        labels_tensor = np.load(f"rawsults/{run_id}/labels_{target}.npy")
        seq_lens = pd.read_csv(f"rawsults/{run_id}/0verview.csv", index_col=0)["num_tokens"]
        
        for sample in range(clustereval_tensor.shape[0]):
            plt.figure(figsize=(6, 8))
            plt.suptitle(f"HDBSCAN cluster evaluation\n({run_id})\n")
            for i, title in enumerate(
                    ["number of clusters", "outlier rate", "Silhouette score", "Calinski-Harabasz score",
                     "Davies-Bouldin score"]):
                plt.subplot(3, 2, 1 + i)
                plt.title(title)
                plt.plot(clustereval_tensor[sample, :, i])
                plt.xlabel("layer")
                if i == 1:
                    plt.gca().yaxis.set_major_formatter(PercentFormatter(1.0, decimals=1))
            plt.tight_layout()
            plt.savefig(f"{outdir}/cluster_evaluation_sample{sample}.pdf")
            plt.close()

            plt.figure(figsize=(12, 16))
            plt.suptitle(f"HDBSCAN cluster sizes\n({run_id})\n")
            num_layers = clustereval_tensor.shape[1]
            stride = num_layers // 24
            for i, layer in enumerate(range(stride, num_layers + stride, stride)):
                labels = labels_tensor[sample, layer - 1, :seq_lens[sample]]
                cluster_sizes = np.bincount(labels[labels != -1])  # ignore outlier bin
                cluster_sizes = np.sort(cluster_sizes)[::-1]  # sort by decreasing size
                plt.subplot(6, 4, i + 1)
                plt.bar(range(1, len(cluster_sizes) + 1), cluster_sizes)
                plt.title(f"after layer {layer}")
                plt.xlabel("k-th largest cluster")
                plt.xticks(range(1, len(cluster_sizes) + 1))
                if i % 4 == 0:
                    plt.ylabel("cluster size")
            plt.tight_layout()
            plt.savefig(f"{outdir}/cluster_size_sample{sample}.pdf")
            plt.close()

In [17]:
wikitext = load_dataset("wikitext", "wikitext-103-v1", split="all")
imdb = load_dataset("stanfordnlp/imdb", split="all")
print(imdb[0]["text"])

I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered "controversial" I really had to see this for myself.<br /><br />The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.<br /><br />What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far between, eve

In [18]:
MODEL_ID = "albert-xlarge-v2"  # "bert-large-uncased" # "albert-xlarge-v2"
NUM_ATTN_HEADS = 1
tokeniser = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModel.from_pretrained(MODEL_ID, num_attention_heads=NUM_ATTN_HEADS)
print(model.config)

AlbertConfig {
  "_name_or_path": "albert-xlarge-v2",
  "architectures": [
    "AlbertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu_new",
  "hidden_dropout_prob": 0,
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 8192,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 1,
  "num_hidden_groups": 1,
  "num_hidden_layers": 24,
  "num_memory_blocks": 0,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.41.1",
  "type_vocab_size": 2,
  "vocab_size": 30000
}



In [19]:
# note that the same single layer is called repeatedly in ALBERT
print(summary(model, depth=6))

Layer (type:depth-idx)                                  Param #
AlbertModel                                             --
├─AlbertEmbeddings: 1-1                                 --
│    └─Embedding: 2-1                                   3,840,000
│    └─Embedding: 2-2                                   65,536
│    └─Embedding: 2-3                                   256
│    └─LayerNorm: 2-4                                   256
│    └─Dropout: 2-5                                     --
├─AlbertTransformer: 1-2                                --
│    └─Linear: 2-6                                      264,192
│    └─ModuleList: 2-7                                  --
│    │    └─AlbertLayerGroup: 3-1                       --
│    │    │    └─ModuleList: 4-1                        --
│    │    │    │    └─AlbertLayer: 5-1                  --
│    │    │    │    │    └─LayerNorm: 6-1               4,096
│    │    │    │    │    └─AlbertAttention: 6-2         16,789,504
│    │    │    │    │ 

In [20]:
run_experiment(wikitext, "albert-xlarge-v2", False, False, False, num_hidden_layers=192, sample_size=10)

experiment params:
dataset: Dataset({
    features: ['text'],
    num_rows: 1809468
})
model_id: albert-xlarge-v2
use_queries_and_keys: False
random_params: False
no_dense_layers: False
num_hidden_layers: 192
sample_size: 10
num_bins: 100


Analysing each sample: 100%|██████████| 10/10 [25:43<00:00, 154.32s/it]


In [25]:
for run_id in tqdm(os.listdir("rawsults/")):
    plot_histograms(run_id)
    plot_heatmaps(run_id)
    plot_tsne(run_id)
    plot_clustering(run_id)

100%|██████████| 1/1 [01:17<00:00, 77.17s/it]


In [22]:
# for dataset in [None, wikitext, imdb]:
#     for model_id in ["albert-xlarge-v2", "bert-large-uncased"]:
#         for use_queries_and_keys in [False, True]:
#             for random_params in [False, True]:
#                 for no_dense_layers in [False, True]:
#                     run_experiment(dataset, model_id, use_queries_and_keys, random_params, no_dense_layers)

In [23]:
# for dataset in [imdb]:
#     for num_hidden_layers in [192]:
#         for use_queries_and_keys in [False, True]:
#             run_experiment(dataset, "albert-xlarge-v2", use_queries_and_keys=use_queries_and_keys,
#                            num_hidden_layers=num_hidden_layers)