# Investigation of Cosine Similarity of Block Intermediates

> Fill in a module description here

In [None]:
# | hide
from pathlib import Path
from typing import Callable, Dict, List, Optional, Iterable, Protocol, Sequence, Tuple, TypeVar, Type

In [None]:
#| hide
from fastcore.test import *
from matplotlib.axes import Axes
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
import torch
from torch.nn import functional as F
from tqdm.auto import tqdm

In [None]:
# | hide

from transformer_experiments.common.substring_generator import all_unique_substrings
from transformer_experiments.common.text_analysis import (
    build_next_token_map,
    SubstringFrequencyAnalysis,
    top_nonzero_tokens
)
from transformer_experiments.common.utils import (
    aggregate_by_string_key,
    DataWrapper,
    topk_across_batches,
)
from transformer_experiments.dataset_split import split_text_dataset
from transformer_experiments.datasets.tinyshakespeare import (
    TinyShakespeareDataSet,
)
from transformer_experiments.models.transformer import (
    n_layer,
    TransformerLanguageModel
)
from transformer_experiments.models.transformer_helpers import (
    unsqueeze_emb,
    EncodingHelpers,
    LogitsWrapper,
    TransformerAccessors
)
from transformer_experiments.trained_models.tinyshakespeare_transformer import (
    create_model_and_tokenizer
)
from transformer_experiments.experiments.block_internals import (
    BlockInternalsAccessors,
    BlockInternalsExperiment,
    BatchedBlockInternalsExperiment,
    BatchedBlockInternalsExperimentSlicer,
    BlockInternalsAnalysis,
)
from transformer_experiments.experiments.similar_strings import (
    SimilarStringsData,
    SimilarStringsExperiment,
    SimilarStringsResult
)
from transformer_experiments.experiments.logit_lens import LogitLens

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
ts = TinyShakespeareDataSet(cache_file='../artifacts/input.txt')
m, tokenizer = create_model_and_tokenizer(
    saved_model_filename='../artifacts/shakespeare.pt',
    dataset=ts,
    device=device,
)
_, val_data = split_text_dataset(ts.text, tokenizer, train_pct=0.9)
encoding_helpers = EncodingHelpers(tokenizer, device)
accessors = TransformerAccessors(m, device)

In [None]:
print(f"device is {device}")

device is cpu


In [None]:
if list(Path('../artifacts/block_internals_results/large_files/slen10/').glob('*')) == []:
    print("Run `make block_internals_slen10_dataset` in the project root to generate the required dataset")

In [None]:
strings10 = all_unique_substrings(ts.text, 10)

In [None]:
exp10 = BatchedBlockInternalsExperiment(
    eh=encoding_helpers,
    accessors=accessors,
    strings=strings10,
    output_dir=Path('../artifacts/block_internals_results/large_files/slen10/'),
    batch_size=10000,
)

First, investigate whether there is a lot of variance in the norms of the block intermediates. If so, it suggests that cosine similarity may be a better measure than Euclidean distance.

In [None]:
for block_idx in range(n_layer):
    proj_out_batch = torch.load(str(exp10._block_output_filename(batch_idx=0, block_idx=block_idx)), mmap=True)
    proj_out_norms = torch.norm(proj_out_batch[:, -1, :], dim=-1)
    print(f"Layer {block_idx}: mean {proj_out_norms.mean()}, std {proj_out_norms.std()}")


Layer 0: mean 27.912761688232422, std 1.1137561798095703
Layer 1: mean 33.36977767944336, std 1.7041479349136353
Layer 2: mean 39.782466888427734, std 2.0023622512817383
Layer 3: mean 46.48314666748047, std 3.300010919570923
Layer 4: mean 53.44303894042969, std 6.607938289642334
Layer 5: mean 61.70024871826172, std 11.696634292602539


In [None]:
for block_idx in range(n_layer):
    ffwd_out_batch = torch.load(str(exp10._ffwd_output_filename(batch_idx=0, block_idx=block_idx)), mmap=True)
    ffwd_out_norms = torch.norm(ffwd_out_batch[:, -1, :], dim=-1)
    print(f"Layer {block_idx}: mean {ffwd_out_norms.mean()}, std {ffwd_out_norms.std()}")


Layer 0: mean 6.409949779510498, std 1.142516851425171
Layer 1: mean 8.440470695495605, std 0.9452682137489319
Layer 2: mean 9.34270191192627, std 1.0641635656356812
Layer 3: mean 11.903395652770996, std 1.5840272903442383
Layer 4: mean 13.59791374206543, std 2.9059391021728516
Layer 5: mean 19.13654136657715, std 5.285085201263428


OK, so for both proj_out and ffwd_out, norm goes up in the later layers and so does std dev. So, cosine similarity is probably a better measure than Euclidean distance.