In [4]:
# Import stuff
import torch as t
import numpy as np
# Plotly needs a different renderer for VSCode/Notebooks vs Colab argh
import plotly.io as pio
pio.renderers.default = "notebook_connected"
import plotly.express as px
import einops
import plotly.graph_objects as go 
from functools import partial
import tqdm.auto as tqdm
import datasets

import transformer_lens
from transformer_lens import HookedTransformer, HookedTransformerConfig, FactoredMatrix, ActivationCache

# import torch.nn as nn
# import torch.nn.functional as F
# import torch.optim as optim
# from fancy_einsum import einsum
# import random
# from pathlib import Path
# from torch.utils.data import DataLoader

# from jaxtyping import Float, Int
# from typing import List, Union, Optional
# import copy

# import itertools
# from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
# import dataclasses
# from IPython.display import HTML
# import transformer_lens
# import transformer_lens.utils as utils
# from transformer_lens.hook_points import (
#     HookedRootModule,
#     HookPoint,
# )  # Hooking utilities


# import circuitsvis as cv
# # Testing that the library works
# cv.examples.hello("Neel")


Let's start here with a clean setup, looking a little deeper at TL's capabilities. Builds on stuff figured out in "EasyTransformer.ipynb"
This doc relies on https://github.com/neelnanda-io/TransformerLens/blob/99e8a599f244dcdd70a5cf7005b47c7a057f4681/demos/Main_Demo.ipynb
which is up-to-date, detailed, and well-annotated 

In [5]:
## turn off AD to save memory, since we're focusing on model inference here 
t.set_grad_enabled(False)

device = 'cuda' if t.cuda.is_available() else 'cpu'
model = HookedTransformer.from_pretrained('gpt2-small', device=device)


Using pad_token, but it is not set yet.


Loaded pretrained model gpt2-small into HookedTransformer


We can return the loss (cross-entropy), logits, both, or none (for storing intermediate activations) by specifying "return_type"

In [6]:
text1 = "This is my input to gpt2-small"
logits, loss = model(text1, return_type = 'both')

## LG experiment: the loss should decrease with more context
text2 = text1 + "The more context I give it, the lower the loss I will achieve"
text3 = text2 + "Let's see how low we can make it while still only giving it a reasonable number of prompts."
text4 = text3 + "It's still not very low but at least it's going down..."
texts = [text1,text2,text3,text4]
test_losses = [model(text, return_type = 'loss') for text in texts]
print('cross-entropy losses with increasing context: ', test_losses)

cross-entropy losses with increasing context:  [tensor(5.6510), tensor(5.1699), tensor(4.5359), tensor(4.2693)]


In [7]:
## can run on text or tokens 
gpt2_text = text1
gpt2_tokens = model.to_tokens(gpt2_text)
print(gpt2_tokens.device) ## should be 'cpu'
gpt2_logits, gpt2_cache = model.run_with_cache(gpt2_tokens, remove_batch_dim=True)

# check that the loss on tokens is the same as on text
assert model(gpt2_tokens, return_type='loss') ==model(gpt2_text, return_type='loss')



cpu


IOI demo

In [9]:
#Plotting functions
# This is mostly a bunch of over-engineered mess to hack Plotly into producing 
# the pretty pictures I want, I recommend not reading too closely unless you 
# want Plotly hacking practice
def to_numpy(tensor, flat=False):
    if type(tensor)!=t.Tensor:
        return tensor
    if flat:
        return tensor.flatten().detach().cpu().numpy()
    else:
        return tensor.detach().cpu().numpy()
def imshow(tensor, xaxis=None, yaxis=None, animation_name='Snapshot', **kwargs):
    tensor = t.squeeze(tensor)
    px.imshow(to_numpy(tensor, flat=False), 
              labels={'x':xaxis, 'y':yaxis, 'animation_name':animation_name}, 
              **kwargs).show()
# Set default colour scheme
# Creates good defaults for showing divergent colour scales (ie with both 
# positive and negative values, where 0 is white)
imshow = partial(imshow, color_continuous_scale='RdBu', color_continuous_midpoint=0.0)

def line(x, y=None, hover=None, xaxis='', yaxis='', **kwargs):
    if type(y)==t.Tensor:
        y = to_numpy(y, flat=True)
    if type(x)==t.Tensor:
        x=to_numpy(x, flat=True)
    fig = px.line(x, y=y, hover_name=hover, **kwargs)
    fig.update_layout(xaxis_title=xaxis, yaxis_title=yaxis)
    fig.show()
def scatter(x, y, **kwargs):
    px.scatter(x=to_numpy(x, flat=True), y=to_numpy(y, flat=True), **kwargs).show()
def lines(lines_list, x=None, mode='lines', labels=None, xaxis='', yaxis='', title = '', log_y=False, hover=None, **kwargs):
    # Helper function to plot multiple lines
    if type(lines_list)==t.Tensor:
        lines_list = [lines_list[i] for i in range(lines_list.shape[0])]
    if x is None:
        x=np.arange(len(lines_list[0]))
    fig = go.Figure(layout={'title':title})
    fig.update_xaxes(title=xaxis)
    fig.update_yaxes(title=yaxis)
    for c, line in enumerate(lines_list):
        if type(line)==t.Tensor:
            line = to_numpy(line)
        if labels is not None:
            label = labels[c]
        else:
            label = c
        fig.add_trace(go.Scatter(x=x, y=line, mode=mode, name=label, hovertext=hover, **kwargs))
    if log_y:
        fig.update_layout(yaxis_type="log")
    fig.show()

Important points of the IOI problem: 
- It is nontrivial enough to learn something from (an actual circuit you might care about, from a non toy model)
- It's simple enough to have an algorithmic solution
- Sees two names at the start, then eliminates the one that appears again to return the second 

Exercise: Check that this works but giving it 10 examples of the task and check that they work but looking at the logits 



In [94]:
names1 = [' John', ' Mary', ' Lori', ' john', ' mary', ' lori']
names2 = [' Andres', ' Laurel', ' Mariel', ' andres', ' laurel', ' mariel']
places = ['store', 'bar', 'apartment', 'park']
things = ['some beer', 'some milk', 'an apple', 'a pencil']

import random
prompt_dict = {'After John and Mary went to the store, Mary handed a bottle of milk to': ' John'}
logits_diff = []


for i in range(10):
    n1 = random.choice(names1)
    # get index of first token of name
    n1_index = model.tokenizer.encode(n1)[0]
    n2 = random.choice(names2)
    n2_index = model.tokenizer.encode(n2)[0]
    prompt1 = f"After{n1} and{n2} went to the {random.choice(places)},{n1} handed {random.choice(things)} to"
    answer = n2
    prompt_dict[prompt1] = answer
    prompt2 = f"After{n1} and{n2} went to the {random.choice(places)},{n2} handed {random.choice(things)} to"
    answer = n1
    prompt_dict[prompt2] = answer
    ## get logit difference for each pair of names in prompts1 and prompts2
    logits1 = model(prompt1)
    logits2 = model(prompt2)
    logit1_diff = logits1[0,-1,n1_index] - logits1[0,-1,n2_index]
    logits_diff.append(logit1_diff)
    logit2_diff = logits2[0,-1,n1_index] - logits2[0,-1,n2_index]
    logits_diff.append(logit2_diff)


def get_top_preds(prompt, answer, top_n):
    prompt_str_toks = model.to_str_tokens(prompt,prepend_bos = True)
    answer_str_toks = model.to_str_tokens(answer, prepend_bos = False)
    both_str_toks = model.to_str_tokens(prompt + answer,prepend_bos = True)
    print('tokenized prompt: ', prompt_str_toks)
    print('tokenized answer: ', answer_str_toks)
    #print('tokenized total: ', both_str_toks)
    prompt_len = len(prompt_str_toks)
    answer_len = len(answer_str_toks)
    logits = model(prompt + answer) #logits for full sentence 
    ## loop over the answer tokens
    for idx in range(prompt_len, prompt_len + answer_len):
        print("Logits for token:", answer_str_toks[idx - prompt_len])
        # get prediction of next token from the token index before idx
        token_logits = logits[0, idx - 1]
        probs = t.nn.functional.softmax(token_logits, dim = -1)
        # sort the probabilities in descending order 
        vals, ids = token_logits.sort(descending = True)
        for i in range(top_n):
            print(f"Top {i}th logit. Logit = {vals[i]}, prob = {probs[ids[i]].item():.2%}, token = {model.tokenizer.decode(ids[i])}")



In [43]:
logits[0,10].sort(descending = True)

torch.return_types.sort(
values=tensor([13.2429, 13.0791, 11.8180,  ..., -9.4433, -9.7380, -9.7437]),
indices=tensor([   13,    12,    11,  ..., 40242, 33434, 45449]))

In [95]:
test =list(prompt_dict.items())[1]
print(test)
get_top_preds(test[0], test[1],5)

('After John and Andres went to the apartment, John handed an apple to', ' Andres')
tokenized prompt:  ['<|endoftext|>', 'After', ' John', ' and', ' And', 'res', ' went', ' to', ' the', ' apartment', ',', ' John', ' handed', ' an', ' apple', ' to']
tokenized answer:  [' And', 'res']
Logits for token:  And
Top 0th logit. Logit = 17.644664764404297, prob = 72.57%, token =  And
Top 1th logit. Logit = 14.795616149902344, prob = 4.20%, token =  John
Top 2th logit. Logit = 14.460119247436523, prob = 3.00%, token =  the
Top 3th logit. Logit = 14.068599700927734, prob = 2.03%, token =  them
Top 4th logit. Logit = 13.848136901855469, prob = 1.63%, token =  his
Logits for token: res
Top 0th logit. Logit = 27.33884620666504, prob = 99.89%, token = res
Top 1th logit. Logit = 19.31625747680664, prob = 0.03%, token = ré
Top 2th logit. Logit = 19.03575325012207, prob = 0.02%, token = ros
Top 3th logit. Logit = 18.689151763916016, prob = 0.02%, token = r
Top 4th logit. Logit = 17.922199249267578, prob

Takeaway, it seems to do pretty well! The main issues are probably attached to tokenization
- For short words (like the example of John, Mary, store, milk), it gives the right answer  high (>80%) prob
- For longer names ("Laurel"), the probability for the first token is much lower (and sometimes does not work at all), but later tokens in the name are predicted with higher probabilities.
- For lower-case names, it also has a harder time, with the top choice usually given as "her" or "the", though all of the probabilities for the top ~3 logits are around the same. This makes sense 
- Also for longer names (and sometimes shorter ones too), the tokenization of the names as they appear in the prompt is different than the tokenization in the answer. It usually guesses the tokenization as it's shown in the prompt, but the way we've done things here (continuously prompting from the answer tokens) changes its path a little. We would probably get a better probability estimate if we appended the first guessed token logit to the logits instead of iterating through the answer tokens, but this would only work if the first guess is "correct" (which it's not always.)
- I think this is further evidence that the IOI circuit is really copying tokens (that are upper case!) from the prompt, and that this isn't always consistent with the tokenization of the model. 

In [101]:
logits.shape

torch.Size([1, 11, 50257])

In [115]:
### looking at the logit differences (logits[first name] - logits[second name])

assert len(logits_diff) == len(list(prompt_dict.keys())[1:])
keys_list = list(prompt_dict.keys())[1:]
for ind in range(len(logits_diff)):
    print(f"Input: {keys_list[ind]}, logit diff = {logits_diff[ind]}")
    ## for the first example in the pair, the second name should have higher probability, so the difference should be negative: 
    if ind%2 ==0 and logits_diff[ind] >0:
        test =list(prompt_dict.items())[ind+1]

        print(f"Difference Greater than Zero!, Top pred = {get_top_preds(test[0], test[1],1)}")
       #assert logits_diff[ind] < 0
    elif ind%2 !=0 and logits_diff[ind] < 0:
        print(f"Difference Less Than Zero!")

# logit differences make sense unless there are two lower cased names... 



Input: After John and Andres went to the apartment, John handed an apple to, logit diff = -2.849048614501953
Input: After John and Andres went to the apartment, Andres handed some beer to, logit diff = 0.9219741821289062
Input: After Mary and Mariel went to the store, Mary handed a pencil to, logit diff = -0.9477148056030273
Input: After Mary and Mariel went to the apartment, Mariel handed a pencil to, logit diff = 3.476107597351074
Input: After john and Laurel went to the bar, john handed some beer to, logit diff = -3.8186416625976562
Input: After john and Laurel went to the bar, Laurel handed some beer to, logit diff = 5.98280143737793
Input: After Lori and andres went to the store, Lori handed some milk to, logit diff = -2.0365066528320312
Input: After Lori and andres went to the bar, andres handed an apple to, logit diff = 1.2254600524902344
Input: After mary and andres went to the bar, mary handed some beer to, logit diff = 0.3899421691894531
tokenized prompt:  ['<|endoftext|>', '

Exercise from notebook (Aside): 

In [61]:
get_top_preds("The Big Government Organisation (BGO) organised the Complicated Bureaucratic Dalliance (", "CBD). ", 2)
get_top_preds("The Big Government Organisation (BGO) organised the Complicated Bureaucratic Dalliance (CBD)", ". ", 2)

### Doesn't guess "."

tokenized prompt:  ['<|endoftext|>', 'The', ' Big', ' Government', ' Organisation', ' (', 'B', 'GO', ')', ' organised', ' the', ' Compl', 'icated', ' Bureau', 'cr', 'atic', ' D', 'all', 'iance', ' (']
tokenized answer:  ['C', 'BD', ').', ' ']
Logits for token: C
Top 0th logit. Logit = 18.716259002685547, prob = 47.92%, token = C
Top 1th logit. Logit = 17.525310516357422, prob = 14.56%, token = CC
Logits for token: BD
Top 0th logit. Logit = 16.978485107421875, prob = 33.23%, token = BD
Top 1th logit. Logit = 15.589770317077637, prob = 8.29%, token = BO
Logits for token: ).
Top 0th logit. Logit = 17.5432186126709, prob = 77.99%, token = )
Top 1th logit. Logit = 15.107534408569336, prob = 6.83%, token = ),
Logits for token:  
Top 0th logit. Logit = 18.157094955444336, prob = 24.18%, token = 

Top 1th logit. Logit = 17.881729125976562, prob = 18.36%, token =  The
tokenized prompt:  ['<|endoftext|>', 'The', ' Big', ' Government', ' Organisation', ' (', 'B', 'GO', ')', ' organised', ' the', 

Comparing Activations: 

Let's compare the activations for a pair of prompts (where the logit diff is pos/neg). 
- Naive thing to try: look at the norm of the difference of internal activations within the model on the two inputs, thinking that the activations shouldn't be that similar. 
Exercise: Flaw in this approach: the norm of the difference is always positive, so won't give the relative importance of the activations. 
This also only works if the prompts are the same length (depending on the length of the name, and tokenization, this won't be true, and if it's not the approach will be mismatched (you think you are comparing the "john" and "Andres" tokens since the rest will be similar)). It's also probably not as good a measure as comparing the 

In [197]:
## choose two examples that are the "reverse" of one another AND the same size 

# example = 'After John and Mary went to the store, Mary handed a bottle of milk to'
# reverse = 'After John and Mary went to the store, John handed a bottle of milk to'

example = list(prompt_dict.keys())[5]
reverse = list(prompt_dict.keys())[6]

example_logits, example_cache = model.run_with_cache(example, remove_batch_dim= True)
reverse_logits, reverse_cache = model.run_with_cache(reverse, remove_batch_dim= True)

example_len = len(model.to_tokens(example, prepend_bos =True)[0])
reverse_len = len(model.to_tokens(reverse, prepend_bos =True)[0])

assert example_len == reverse_len
def get_norm_diff(cache1, cache2, example_len, hook):
    # initialize
    norm_diff = t.zeros(model.cfg.n_layers, example_len)
    model.cfg.n_layers
    for layer in range(model.cfg.n_layers):
        example_result = cache1[f"blocks.{layer}.{hook}"]
        reverse_result = cache2[f"blocks.{layer}.{hook}"]

        result_diff = example_result - reverse_result
        #normalize the residual difference by the average size of the original
        norm_diff[layer] = result_diff.norm(dim = -1)/ t.sqrt(example_result.norm(dim=-1)*reverse_result.norm(dim=-1))
    example_str_toks = model.to_str_tokens(example, prepend_bos =True)
    x_labels = [f"{n}.{tok}" for n, tok in enumerate(example_str_toks)]
    imshow(norm_diff, yaxis = "Layer", title = f'Norm of Difference in: {hook}', x = x_labels)
## norm in d_model (embedding dimension)
get_norm_diff(example_cache, reverse_cache, example_len, 'hook_resid_pre')


In [271]:
print(example_cache['blocks.0.mlp.hook_pre'].shape,example_cache['blocks.0.mlp.hook_post'].shape,example_cache['blocks.0.hook_mlp_out'].shape)
model.cfg.d_mlp / model.cfg.d_model #Why?? 
#model


torch.Size([15, 3072]) torch.Size([15, 3072]) torch.Size([15, 768])


4.0

In [198]:
## for the attention layers 
### Also the residual stream after the attention layer, so summing over embedding dimension (represents features?)
get_norm_diff(example_cache, reverse_cache, example_len, 'hook_attn_out')


In [199]:
#print(model.blocks[0])
## hook_pre and hook_post are summing over d_mlp, mlp_out is summing over d_model 
get_norm_diff(example_cache, reverse_cache, example_len, 'mlp.hook_pre')
get_norm_diff(example_cache, reverse_cache, example_len, 'mlp.hook_post')
get_norm_diff(example_cache, reverse_cache, example_len, 'hook_mlp_out')



In the above plots, the token with the greatest difference in attention for the two examples is the name token in the second half of the sentence, during the early layers of the network, which makes sense. At later layers, the last token "to" also becomes important, and the difference also becomes larger, since each sentence will learn difference names for what comes next. 

Instead of looking at the difference across layers for each token in the context, next we'll plot the difference of the attention patterns across layers and attention heads. The pattern is not super clear to me, but it seems like the n

In [280]:
print(example_cache[f"blocks.0.attn.hook_pattern"].shape) ## (n_heads, n_context, n_context)
print(example_cache[f"blocks.0.hook_attn_out"].shape) ## (n_heads, d_model)
print(example_cache[f"blocks.0.hook_resid_mid"].shape) ## (n_heads, d_model)


torch.Size([12, 15, 15])
torch.Size([15, 768])
torch.Size([15, 768])


In [274]:
example_cache.keys()

dict_keys(['hook_embed', 'hook_pos_embed', 'blocks.0.hook_resid_pre', 'blocks.0.ln1.hook_scale', 'blocks.0.ln1.hook_normalized', 'blocks.0.attn.hook_q', 'blocks.0.attn.hook_k', 'blocks.0.attn.hook_v', 'blocks.0.attn.hook_attn_scores', 'blocks.0.attn.hook_pattern', 'blocks.0.attn.hook_z', 'blocks.0.hook_attn_out', 'blocks.0.hook_resid_mid', 'blocks.0.ln2.hook_scale', 'blocks.0.ln2.hook_normalized', 'blocks.0.mlp.hook_pre', 'blocks.0.mlp.hook_post', 'blocks.0.hook_mlp_out', 'blocks.0.hook_resid_post', 'blocks.1.hook_resid_pre', 'blocks.1.ln1.hook_scale', 'blocks.1.ln1.hook_normalized', 'blocks.1.attn.hook_q', 'blocks.1.attn.hook_k', 'blocks.1.attn.hook_v', 'blocks.1.attn.hook_attn_scores', 'blocks.1.attn.hook_pattern', 'blocks.1.attn.hook_z', 'blocks.1.hook_attn_out', 'blocks.1.hook_resid_mid', 'blocks.1.ln2.hook_scale', 'blocks.1.ln2.hook_normalized', 'blocks.1.mlp.hook_pre', 'blocks.1.mlp.hook_post', 'blocks.1.hook_mlp_out', 'blocks.1.hook_resid_post', 'blocks.2.hook_resid_pre', 'block

In [257]:


norm_diff = t.zeros(model.cfg.n_layers, model.cfg.n_heads)
dims = [-2,-1]
hook = 'attn.hook_pattern'
for layer in range(model.cfg.n_layers):
    example_result = example_cache[f"blocks.{layer}.{hook}"]
    reverse_result = reverse_cache[f"blocks.{layer}.{hook}"]
    diff = example_result - reverse_result 
    a = norm_multi_dim(diff, dims)
    b = norm_multi_dim(example_result, dims)
    c = norm_multi_dim(reverse_result, dims)
    #print(a.shape,b.shape, c.shape, example_result.shape)
    norm_diff[layer] = a/t.sqrt(b*c)
imshow(norm_diff, yaxis = 'Layer', xaxis = 'Heads', title = "Norm of difference of Attention Pattern")

model.cfg.n_layers, model.cfg.n_heads


(12, 12)

Ablating Layers

In [None]:
def ablate_layer_hook(layer_out, hook):
    layer_out[:] = 0
    return layer_out
def 
attn_ablation = []
for layer in range(model.cfg.n_layers):
    logits = model.run_with_hooks(example_prompt, fwd_hooks = [(f"blocks.{layer}.hook_attn_out", ablate_layer_hook)])
    ablated_logit_diff = get_logit_diff(logits)


In [85]:
from transformers import GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

list(tokenizer.encoder.keys())




['!',
 '"',
 '#',
 '$',
 '%',
 '&',
 "'",
 '(',
 ')',
 '*',
 '+',
 ',',
 '-',
 '.',
 '/',
 '0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 ':',
 ';',
 '<',
 '=',
 '>',
 '?',
 '@',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z',
 '[',
 '\\',
 ']',
 '^',
 '_',
 '`',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '{',
 '|',
 '}',
 '~',
 '¡',
 '¢',
 '£',
 '¤',
 '¥',
 '¦',
 '§',
 '¨',
 '©',
 'ª',
 '«',
 '¬',
 '®',
 '¯',
 '°',
 '±',
 '²',
 '³',
 '´',
 'µ',
 '¶',
 '·',
 '¸',
 '¹',
 'º',
 '»',
 '¼',
 '½',
 '¾',
 '¿',
 'À',
 'Á',
 'Â',
 'Ã',
 'Ä',
 'Å',
 'Æ',
 'Ç',
 'È',
 'É',
 'Ê',
 'Ë',
 'Ì',
 'Í',
 'Î',
 'Ï',
 'Ð',
 'Ñ',
 'Ò',
 'Ó',
 'Ô',
 'Õ',
 'Ö',
 '×',
 'Ø',
 'Ù',
 'Ú',
 'Û',
 'Ü',
 'Ý',
 'Þ',
 'ß',
 'à',
 'á',
 'â',
 'ã',
 'ä',
 'å',
 'æ',
 'ç',
 'è',
 'é',
 'ê

In [12]:
# for later
model.tokens_to_residual_directions()



print(type(gpt2_cache))
## get the attention pattern for layer 0 
attn_pattern = gpt2_cache['pattern', 0, 'attn']
print('shape of attention pattern: ', attn_pattern.shape)
# get a list of all the tokens in string form
gpt2_string_tokens = model.to_str_tokens(gpt2_text)

#Visualize the attention pattern for layer 0 
print("Layer 0 attention head patterns: ")
cv.attention.attention_patterns(tokens = gpt2_string_tokens, attention = attn_pattern)

<class 'transformer_lens.ActivationCache.ActivationCache'>
shape of attention pattern:  torch.Size([12, 11, 11])
Layer 0 attention head patterns: 


NameError: name 'cv' is not defined

In [13]:
## looking at the unembed bias that folding in introduces (default)
unembed_bias = model.unembed.b_U
bias_vals, bias_idx = unembed_bias.sort()

top_k = 10
print(f"Bottom {top_k} values")
for i in range(top_k):
    print(f"{bias_vals[i].item():.2f} {repr(model.to_string(bias_idx[i]))}")

print(f"Top {top_k} values")
for i in range(top_k,0,-1):
    print(f"{bias_vals[-i].item():.2f} {repr(model.to_string(bias_idx[-i]))}")

Bottom 10 values
-11.96 'Gordon'
-11.64 ' touchscreen'
-11.52 ' pipes'
-11.35 ' dental'
-10.99 'waters'
-10.96 ' extravag'
-10.71 ' Sorce'
-10.53 'stores'
-10.51 ' pricey'
Top 10 values
15.98 'ed'
15.99 '5'
16.07 '4'
16.59 '�'
16.71 '3'
17.22 'b'
17.82 '/'
17.85 ' of'
17.86 '\x00'
18.16 '1'
