In [116]:
import torch
from tqdm.auto import tqdm
from transformer_lens import HookedTransformer, ActivationCache, utils, patching
from jaxtyping import Float, Int, Bool
from torch import Tensor
from tqdm.auto import tqdm
import plotly.io as pio
import ipywidgets as widgets
from IPython.display import display, clear_output
from einops import einsum
import plotly.express as px
import numpy as np
import pandas as pd

pio.renderers.default = "notebook_connected"
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.autograd.set_grad_enabled(False)
torch.set_grad_enabled(False)

from haystack_utils import get_mlp_activations
import haystack_utils

%reload_ext autoreload
%autoreload 2

ImportError: cannot import name 'HookPoints' from 'transformer_lens.hook_points' (/opt/conda/lib/python3.10/site-packages/transformer_lens/hook_points.py)

In [2]:
haystack_utils.clean_cache()

In [3]:
model = HookedTransformer.from_pretrained("EleutherAI/pythia-70m",
    center_unembed=True,
    center_writing_weights=True,
    fold_ln=True,
    device=device)

german_data = haystack_utils.load_json_data("data/german_europarl.json")[:200]
english_data = haystack_utils.load_json_data("data/english_europarl.json")[:200]


english_activations = {}
german_activations = {}
for layer in range(3, 4):
    english_activations[layer] = get_mlp_activations(english_data, layer, model, mean=False)
    german_activations[layer] = get_mlp_activations(german_data, layer, model, mean=False)

LAYER_TO_ABLATE = 3
NEURONS_TO_ABLATE = [669]
MEAN_ACTIVATION_ACTIVE = german_activations[LAYER_TO_ABLATE][:, NEURONS_TO_ABLATE].mean()
MEAN_ACTIVATION_INACTIVE = english_activations[LAYER_TO_ABLATE][:, NEURONS_TO_ABLATE].mean()

def deactivate_neurons_hook(value, hook):
    value[:, :, NEURONS_TO_ABLATE] = MEAN_ACTIVATION_INACTIVE
    return value
deactivate_neurons_fwd_hooks=[(f'blocks.{LAYER_TO_ABLATE}.mlp.hook_post', deactivate_neurons_hook)]

def activate_neurons_hook(value, hook):
    value[:, :, NEURONS_TO_ABLATE] = MEAN_ACTIVATION_ACTIVE
    return value
activate_neurons_fwd_hooks=[(f'blocks.{LAYER_TO_ABLATE}.mlp.hook_post', activate_neurons_hook)]

all_ignore, not_ignore = haystack_utils.get_weird_tokens(model, plot_norms=False)

Using pad_token, but it is not set yet.


Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer
data/german_europarl.json: Loaded 2000 examples with 152 to 2000 characters each.
data/english_europarl.json: Loaded 2000 examples with 165 to 2000 characters each.


  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

In [124]:
common_tokens = haystack_utils.get_common_tokens(german_data, model, all_ignore, k=50)
prompts = haystack_utils.generate_random_prompts(" Vorschlägen", model, common_tokens, 200, length=20)

  0%|          | 0/200 [00:00<?, ?it/s]

## Find important prev token heads

In [5]:
random_prompts = haystack_utils.generate_random_prompts(" Vorschlägen", model, common_tokens, 100, length=20)[:, :-4]
print(random_prompts.shape)
print(model.to_str_tokens(random_prompts[0]))

torch.Size([100, 20])
[' W', ' zu', 'ge', ' auf', 'ge', ' in', 'icht', 'ß', ' W', 'ig', ' in', 'g', ' er', 'gen', ' W', ' das', 'in', ' er', 'ch', 'in']


In [38]:
loss, cache = model.run_with_cache(random_prompts)
mean_attention_activations = [] 
for layer in range(6):
    activation = cache[f'blocks.{layer}.attn.hook_z'].mean((0, 1))
    mean_attention_activations.append(activation)
mean_attention_activations = torch.stack(mean_attention_activations)
print(mean_attention_activations.shape) # layer head pattern

torch.Size([6, 8, 64])


In [39]:
def get_ablate_attention_hook(layer, head, mean_activation, pos=-2):
    def ablate_attention_head(value, hook):
        value[:, pos, head, :] = mean_activation[layer, head, :]
        return value

    return (f'blocks.{layer}.attn.hook_z', ablate_attention_head)

In [55]:
losses = []
pos = -2
for layer in range(6):
    for head in range(8):
        ablate_head_hook = [get_ablate_attention_hook(layer, head, mean_attention_activations, pos=pos)]
        with model.hooks(fwd_hooks=ablate_head_hook):
            loss = model(prompts, return_type="loss", loss_per_token=True)[:, -1].tolist()
            losses.append(loss)

original_loss = model(prompts, return_type="loss", loss_per_token=True)[:, -1].tolist()

In [56]:
names = ["Original"] + [f"L{layer}H{head}" for layer in range(5) for head in range(8)]
all_losses = [original_loss] + losses

haystack_utils.plot_barplot(all_losses, names, legend=False, title=f"Loss for ablated attention heads for pos {pos}")

- If head boosts "gen" directly it is a skip bigram
- Otherwise we don't know

In [72]:
_, cache = model.run_with_cache(prompts)

ablate_head_hook = [get_ablate_attention_hook(layer=0, head=7, mean_activation=mean_attention_activations, pos=-2)]
with model.hooks(fwd_hooks=ablate_head_hook):
    _, ablated_cache = model.run_with_cache(prompts)

def resid_to_logprob(cache, layer=0, pos=-2):
    resid = cache[f'blocks.{layer}.hook_resid_post']
    resid = model.ln_final(resid)
    logprobs = model.unembed(resid)[:, pos].log_softmax(-1).mean(0)
    return logprobs

original_logprob = resid_to_logprob(cache, 1)
ablated_logprob = resid_to_logprob(ablated_cache, 1)

diffs = original_logprob - ablated_logprob

top_diff, top_tokens = diffs.topk(20)
print(model.to_str_tokens(top_tokens))
print(diffs[model.to_single_token("gen")])

['izophren', ' fathers', ' fights', 'arman', 'aland', 'harma', 'apiro', 'ariat', 'etine', 'yr', ' lane', 'ability', 'ijn', 'enix', ' championships', 'agram', 'pires', 'omore', 'amond', 'icism']
tensor(-2.2559, device='cuda:0')


In [133]:
# Residual stream head activation value on gen token
gen_z = cache[f'blocks.0.attn.hook_z'][:, -2, 7, :]
gen_res = einsum(gen_z, model.W_O[0, 7], "batch d_head, d_head d_model -> batch d_model")
gen_res = gen_res.mean(0)

# Dot products with mlp layers
res = {}
for layer in range(6):
    mlp_weight = model.W_in[layer]
    mlp_dot = einsum(gen_res, mlp_weight, "d_model, d_model d_mlp -> d_mlp")
    res[layer] = mlp_dot.cpu().numpy()
    print(f"Layer {layer}: {mlp_dot.abs().mean()}")


Layer 0: 0.05415569990873337
Layer 1: 0.15168869495391846
Layer 2: 0.09743906557559967
Layer 3: 0.11201384663581848
Layer 4: 0.11475804448127747
Layer 5: 0.12647250294685364


In [135]:
px.line(np.sort(res[1]))


In [132]:
threshold = 0.5
for layer in range(1, 6):
    layer_res = res[layer]
    print(f"Layer {layer}: {np.sum(layer_res > threshold)}", np.argwhere(layer_res > threshold).flatten().tolist())

Layer 1: 22 [32, 163, 472, 493, 544, 726, 733, 959, 1019, 1089, 1190, 1203, 1227, 1280, 1287, 1448, 1502, 1782, 1794, 1899, 1980, 1985]
Layer 2: 0 []
Layer 3: 3 [96, 272, 2014]
Layer 4: 1 [265]
Layer 5: 4 [166, 188, 881, 1885]


In [125]:
loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
loss = loss[:, -1].mean()

ablate_head_hook = [get_ablate_attention_hook(layer=0, head=7, mean_activation=mean_attention_activations, pos=-2)]
with model.hooks(fwd_hooks=ablate_head_hook):
    ablated_loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
    ablated_loss = ablated_loss[:, -1].mean()

print(loss, ablated_loss)

tensor(1.2433, device='cuda:0') tensor(4.3510, device='cuda:0')


In [165]:
# Get ablated cache 
# This isn't actually clean path patching
# E.g. when activating MLP5, the layer sees all activated activations of earlier layers, not just the attention head


with model.hooks(fwd_hooks=ablate_head_hook):
    _, ablated_cache = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)



original_loss, original_cache = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)

losses = [original_loss[:, -1].cpu().tolist()]
def ablate_component_hook(value, hook):
    value[:, -2] = ablated_cache[hook.name][:, -2]
    return value

def activate_component_hook(value, hook):
    value[:, -2] = original_cache[hook.name][:, -2]
    return value




for layer in range(1, 6):
    component = f"blocks.{layer}.mlp.hook_post"
    ablate_mlp_hook = [(component, ablate_component_hook)]
    with model.hooks(fwd_hooks=ablate_mlp_hook):
        ablated_loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
        ablated_loss = ablated_loss[:, -1].mean()
        print(f"Total effect MLP{layer}: {ablated_loss:.2f} (+{ablated_loss-loss:.2f})")
    
    activate_layers = [i for i in range(1, 6) if i != layer]
    activate_mlp_hooks = [(f"blocks.{i}.mlp.hook_post", activate_component_hook) for i in activate_layers]
    activate_attn_hooks = [(f"blocks.{i}.hook_attn_out", activate_component_hook) for i in range(1, 6)]
    with model.hooks(fwd_hooks=ablate_mlp_hook+activate_mlp_hooks+activate_attn_hooks):
        ablated_loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
        ablated_loss = ablated_loss[:, -1]
        #print(f"Direct effect MLP{layer}: {ablated_loss:.2f} (+{ablated_loss-loss:.2f})")
        losses.append(ablated_loss.cpu().tolist())
    

Total effect MLP1: 4.28 (+3.03)
Total effect MLP2: 2.14 (+0.90)
Total effect MLP3: 0.44 (+-0.80)
Total effect MLP4: 1.07 (+-0.18)
Total effect MLP5: 3.14 (+1.89)


In [164]:
haystack_utils.plot_barplot(losses, ["Original"] + [f"MLP{layer}" for layer in range(1, 6)], legend=False, title=f"Loss when patching ablated L0H7 information to later MLP layers")

In [249]:
# CURSED PATCHING FOR DIRECT EFFECTS

# mean ablate everything except previous token and mlp1+2+5
# Be sure they are independ trigram tables
model.set_use_attn_result(True)
original_loss, original_cache = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
_, random_cache = model.run_with_cache(random_prompts)
def mean_ablate_component_hook(value, hook):
    value[:, :-3] = random_cache[hook.name].mean((0, 1))
    return value

# Get ablated cache '
with model.hooks(fwd_hooks=ablate_head_hook):
    ablated_loss, ablated_cache = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
    print("Ablated loss", ablated_loss[:, -1].mean().item())

mean_ablate_everything_hooks = [(f"blocks.{layer}.mlp.hook_post", mean_ablate_component_hook) for layer in range(0, 6)] +  [(f"blocks.{layer}.attn.hook_z", mean_ablate_component_hook) for layer in range(0, 6)]

# Activate L0H7
def activate_head_7_hook(value, hook):
    value[:, -2, 7, :] = original_cache[hook.name][:, -2, 7, :] 
    return value
head_7_hook = [(f"blocks.0.attn.hook_result", activate_head_7_hook)]

def ablate_component_hook(value, hook):
    value[:, -2] = ablated_cache[hook.name][:, -2]
    return value

# Semi clean original cache
# Leave L0H7 active, ablate all later components
ablate_all_hook = [(f"blocks.{i}.attn.hook_z", ablate_component_hook) for i in range(0, 6)] + [(f"blocks.{i}.mlp.hook_post", ablate_component_hook) for i in range(0, 6)]
with model.hooks(fwd_hooks=ablate_all_hook+head_7_hook):
    half_ablated_loss, half_ablated_cache = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
    print(half_ablated_loss[:, -1].mean())

def half_activate_component_hook(value, hook):
    value[:, -2] = half_ablated_cache[hook.name][:, -2]
    return value


loss = original_loss[:, -1].mean()

patch_important = [(f"blocks.{layer}.mlp.hook_pre", half_activate_component_hook) for layer in [1, 2, 5]] #Grab value before post hooks
ablate_mlp_hook = [(f"blocks.{layer}.mlp.hook_post", ablate_component_hook) for layer in [0, 3, 4]]
ablate_attn_hooks = [(f"blocks.{layer}.attn.hook_z", ablate_component_hook) for layer in range(0, 6)]
with model.hooks(fwd_hooks=ablate_mlp_hook+ablate_attn_hooks+mean_ablate_everything_hooks+patch_important+head_7_hook):
    ablated_loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
    ablated_loss = ablated_loss[:, -1].mean()
    print(f"Patched loss: {ablated_loss:.2f} (+{ablated_loss-loss:.2f})")

Ablated loss 4.35098123550415
tensor(5.1810, device='cuda:0')
Patched loss: 3.21 (+1.97)


In [239]:
original_cache

ActivationCache with keys ['hook_embed', 'blocks.0.hook_resid_pre', 'blocks.0.ln1.hook_scale', 'blocks.0.ln1.hook_normalized', 'blocks.0.attn.hook_q', 'blocks.0.attn.hook_k', 'blocks.0.attn.hook_v', 'blocks.0.attn.hook_rot_q', 'blocks.0.attn.hook_rot_k', 'blocks.0.attn.hook_attn_scores', 'blocks.0.attn.hook_pattern', 'blocks.0.attn.hook_z', 'blocks.0.hook_attn_out', 'blocks.0.ln2.hook_scale', 'blocks.0.ln2.hook_normalized', 'blocks.0.mlp.hook_pre', 'blocks.0.mlp.hook_post', 'blocks.0.hook_mlp_out', 'blocks.0.hook_resid_post', 'blocks.1.hook_resid_pre', 'blocks.1.ln1.hook_scale', 'blocks.1.ln1.hook_normalized', 'blocks.1.attn.hook_q', 'blocks.1.attn.hook_k', 'blocks.1.attn.hook_v', 'blocks.1.attn.hook_rot_q', 'blocks.1.attn.hook_rot_k', 'blocks.1.attn.hook_attn_scores', 'blocks.1.attn.hook_pattern', 'blocks.1.attn.hook_z', 'blocks.1.hook_attn_out', 'blocks.1.ln2.hook_scale', 'blocks.1.ln2.hook_normalized', 'blocks.1.mlp.hook_pre', 'blocks.1.mlp.hook_post', 'blocks.1.hook_mlp_out', 'bloc

In [194]:
original_loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
loss = original_loss[:, -1].mean()

ablate_mlp_hook = [(f"blocks.{layer}.mlp.hook_post", ablate_component_hook) for layer in [0, 1, 2, 3, 4, 5]]
ablate_attn_hooks = [(f"blocks.{layer}.hook_attn_out", ablate_component_hook) for layer in range(1, 6)]
with model.hooks(fwd_hooks=ablate_mlp_hook+ablate_attn_hooks+mean_ablate_everything_hooks):
    ablated_loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
    ablated_loss = ablated_loss[:, -1].mean()
    print(f"Loss: {ablated_loss:.2f} (+{ablated_loss-loss:.2f})")

Loss: 5.18 (+3.94)


In [180]:
original_loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
loss = original_loss[:, -1].mean()

ablate_mlp_hook = [(f"blocks.{layer}.mlp.hook_post", ablate_component_hook) for layer in [0, 1, 3, 4, 5]]
ablate_attn_hooks = [(f"blocks.{layer}.hook_attn_out", ablate_component_hook) for layer in range(1, 6)]
with model.hooks(fwd_hooks=ablate_mlp_hook+ablate_attn_hooks+mean_ablate_everything_hooks):
    ablated_loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
    ablated_loss = ablated_loss[:, -1].mean()
    print(f"Loss: {ablated_loss:.2f} (+{ablated_loss-loss:.2f})")

Loss: 4.56 (+3.32)


In [181]:
original_loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
loss = original_loss[:, -1].mean()

ablate_mlp_hook = [(f"blocks.{layer}.mlp.hook_post", ablate_component_hook) for layer in [0, 2, 3, 4, 5]]
ablate_attn_hooks = [(f"blocks.{layer}.hook_attn_out", ablate_component_hook) for layer in range(1, 6)]
with model.hooks(fwd_hooks=ablate_mlp_hook+ablate_attn_hooks+mean_ablate_everything_hooks):
    ablated_loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
    ablated_loss = ablated_loss[:, -1].mean()
    print(f"Loss: {ablated_loss:.2f} (+{ablated_loss-loss:.2f})")

Loss: 3.35 (+2.10)


In [None]:

_, random_cache = model.run_with_cache(random_prompts)
def mean_ablate_component_hook(value, hook):
    value[:, :-3] = random_cache[hook.name].mean((0, 1))
    return value

mean_ablate_everything_hooks = [(f"blocks.{layer}.mlp.hook_post", mean_ablate_component_hook) for layer in range(0, 6)] +  [(f"blocks.{layer}.hook_attn_out", mean_ablate_component_hook) for layer in range(0, 6)]


test_prompts = haystack_utils.generate_random_prompts("orschlägen", model, common_tokens, 200, length=20)

original_loss, _ = model.run_with_cache(test_prompts, return_type="loss", loss_per_token=True)
loss = original_loss[:, -1].mean()

ablate_mlp_hook = [(f"blocks.{layer}.mlp.hook_post", ablate_component_hook) for layer in [0, 1, 2, 3, 4]]
ablate_attn_hooks = [(f"blocks.{layer}.hook_attn_out", ablate_component_hook) for layer in range(1, 6)]
with model.hooks(fwd_hooks=ablate_mlp_hook+ablate_attn_hooks+mean_ablate_everything_hooks):
    ablated_loss, _ = model.run_with_cache(test_prompts, return_type="loss", loss_per_token=True)
    ablated_loss = ablated_loss[:, -1].mean()
    print(f"Loss: {ablated_loss:.2f} (+{ablated_loss-loss:.2f})")

In [152]:
for layer in range(1, 6):
    component = f"blocks.{layer}.hook_attn_out"
    ablate_attn_hook = [(component, ablate_component_hook)]
    with model.hooks(fwd_hooks=ablate_attn_hook):
        ablated_loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
        ablated_loss = ablated_loss[:, -1].mean()
        print(f"Total effect Attn{layer}: {ablated_loss:.2f} (+{ablated_loss-loss:.2f})")
    
    activate_layers = [i for i in range(1, 6) if i != layer]
    activate_mlp_hooks = [(f"blocks.{i}.mlp.hook_post", activate_component_hook) for i in range(1, 6)]
    activate_attn_hooks = [(f"blocks.{i}.hook_attn_out", activate_component_hook) for i in activate_layers]
    with model.hooks(fwd_hooks=ablate_attn_hook+activate_mlp_hooks+activate_attn_hooks):
        ablated_loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
        ablated_loss = ablated_loss[:, -1].mean()
        print(f"Direct effect Attn{layer}: {ablated_loss:.2f} (+{ablated_loss-loss:.2f})")

Total effect Attn1: 1.00 (+-0.24)
Direct effect Attn1: 1.00 (+-0.24)
Total effect Attn2: 1.03 (+-0.22)
Direct effect Attn2: 1.28 (+0.03)
Total effect Attn3: 1.10 (+-0.15)
Direct effect Attn3: 1.28 (+0.04)
Total effect Attn4: 1.24 (+-0.00)
Direct effect Attn4: 1.24 (+-0.00)
Total effect Attn5: 1.26 (+0.02)
Direct effect Attn5: 1.26 (+0.02)


In [153]:
def get_ablate_neurons_hook(neurons):
    def ablate_neurons_hook(value, hook):
        value[:, -2, neurons] = ablated_cache[hook.name][:, -2, neurons]
        return value
    return ablate_neurons_hook
loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
loss = loss[:, -1].mean()


component = f"blocks.1.mlp.hook_post"
neurons = torch.LongTensor([32, 163, 472, 493, 544, 726, 733, 959, 1019, 1089, 1190, 1203, 1227, 1280, 1287, 1448, 1502, 1782, 1794, 1899, 1980, 1985])
ablate_neurons_hook = [(component, get_ablate_neurons_hook(neurons))]

ablate_head_hook = [get_ablate_attention_hook(layer=0, head=7, mean_activation=mean_attention_activations, pos=-2)]
with model.hooks(fwd_hooks=ablate_neurons_hook):
    ablated_loss, _ = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)
    ablated_loss = ablated_loss[:, -1].mean()

print(loss, ablated_loss)

tensor(1.2433, device='cuda:0') tensor(0.6377, device='cuda:0')


MLP1 MLP2 direct effect

In [160]:
# Get ablated cache 
with model.hooks(fwd_hooks=ablate_head_hook+deactivate_neurons_fwd_hooks):
    _, ablated_cache = model.run_with_cache(prompts, return_type="loss", loss_per_token=True)

with model.hooks(fwd_hooks=deactivate_neurons_fwd_hooks):
    original_logits, original_cache = model.run_with_cache(prompts, return_type="logits", loss_per_token=True)

def to_logprobs(logits, pos=-2):
    return logits.log_softmax(dim=-1)[:, pos]

original_logprobs = to_logprobs(original_logits)

def ablate_component_hook(value, hook):
    value[:, -2] = ablated_cache[hook.name][:, -2]
    return value

def activate_component_hook(value, hook):
    value[:, -2] = original_cache[hook.name][:, -2]
    return value

for layer in range(1, 6):
    component = f"blocks.{layer}.mlp.hook_post"
    ablate_mlp_hook = [(component, ablate_component_hook)]
    activate_layers = [i for i in range(1, 6) if i != layer]
    activate_mlp_hooks = [(f"blocks.{i}.mlp.hook_post", activate_component_hook) for i in activate_layers]
    activate_attn_hooks = [(f"blocks.{i}.hook_attn_out", activate_component_hook) for i in range(1, 6)]
    with model.hooks(fwd_hooks=ablate_mlp_hook+activate_mlp_hooks+activate_attn_hooks+deactivate_neurons_fwd_hooks):
        ablated_logits, _ = model.run_with_cache(prompts, return_type="logits", loss_per_token=True)
    ablated_logprobs = to_logprobs(ablated_logits)
    diffs = (original_logprobs - ablated_logprobs).mean(0)
    diffs[all_ignore] = 0
    diffs[original_logprobs.mean(0) < -7] = 0

    sorted_diffs, sorted_indices = diffs.sort(descending=True)
    print("Layer", layer)
    print(sorted_diffs[:10][sorted_diffs[:10]>0].tolist())
    print(model.to_str_tokens(sorted_indices[:10][sorted_diffs[:10]>0]))
    #print(sorted_diffs[-10:], model.to_str_tokens(sorted_indices[-10:]))
    
    

Layer 1
[3.295524835586548, 2.1306657791137695]
['gen', 'ge']
Layer 2
[2.3385634422302246, 2.1701858043670654, 0.8951563835144043]
['ges', 'gen', 'ge']
Layer 3
[2.1852922439575195, 1.3046529293060303, 1.0308555364608765]
['ges', 'ge', 'gs']
Layer 4
[5.201571941375732, 2.4203109741210938, 2.1195180416107178, 1.4416520595550537]
['gar', 'g', 'ges', 'gs']
Layer 5
[2.6164331436157227, 2.348926067352295, 0.8594949245452881, 0.13929252326488495]
['ges', 'gs', 'ge', 'gen']
