In [1]:
import os

import numpy as np
from sklearn.decomposition import PCA

from utils import load_hidden_representations_from_hdf5, read_templates_from_file

----

In [2]:
# params
log_dir = "/logfiles"
model = "bigscience-T0" # bigscience-T0_B or bigscience-T0
module = "decoder"
task = "rte"
# task = "cb"
# task = "wic"

## Prompts

In [3]:
df = read_templates_from_file(f"/t0-analysis/prompts/{task}/fixed_prompt.csv")
# df = read_templates_from_file(f"/t0-analysis/prompts/{task}/fixed_target_yes_no.csv")
display(df)

Unnamed: 0,name,template,category,includes_targets,targets,target_ids,shuffle
0,gpt_3_yes_no_with_targets,{premise} Question: {hypothesis} Yes or No?,instructive,True,"▁Yes, ▁No","0, 1",False
1,gpt_3_true_false_with_targets,{premise} Question: {hypothesis} True or False?,instructive,True,"▁True, ▁Fal","0, 1",False
2,gpt_3_cat_dog_with_targets,{premise} Question: {hypothesis} Cat or Dog?,instructive,True,"▁Cat, ▁Dog","0, 1",False
3,gpt_3_yes_no_without_targets,{premise} Question: {hypothesis}?,instructive,False,"▁Yes, ▁No","0, 1",False


## PCA

In [4]:
def unison_shuffled_copies(a, b):
    # from: https://stackoverflow.com/questions/4601373/better-way-to-shuffle-two-numpy-arrays-in-unison
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [5]:
if task == 'rte':
    use_pattern = [
        "gpt_3_yes_no_with_targets",
        "gpt_3_true_false_with_targets",
        "gpt_3_cat_dog_with_targets",
        "gpt_3_yes_no_without_targets",
    ]
elif task == 'cb':
    use_pattern = [
        "null_pattern",
        "null_pattern_reversed",
        "gpt_3_true_false_neither",
        "gpt_3_yes_no_maybe",
        "mnli_crowdsource",
        "always_sometimes_never",
        "based_on_previous_passage",
        "infer",
        "claim",
        "consider",
        "follow",
        "imply",
        "guaranteed",
        "guaranteed_possible",
        "justified",
        "must_be_true",
        "should_assume",
        "take_the_following",
    ]
elif task == 'wic':
    use_pattern = [
        "gpt_3",
        "gpt_3_yes_no",
        "affirmation",
        "grammar_homework",
        "polysemous",
        "question_context",
        "question_meaning",
        "question_meaning_yes_no",
        "same_sense",
        "similar_sense",
        "similar_sense_yes_no",
    ]

In [6]:
for t in range(2):
    # for layer in range(0, 10):
    for layer in range(0, 25):
    # for layer in range(24, 25):
        print(f"token: {t}\tlayer: {layer}")
        file_names, prompt_names = [], []

        for _, row in df.iterrows():
            if row['name'] in use_pattern:
                file_names.append(f"{task}/{model}/{module}/{row['name']}/hidden_represenations_t{t}_layer{layer}_avg.hdf5",)
                prompt_names.append(row['name'])


        # load hidden representations from hdf5 file
        representations = None
        classes = []
        n_sequences = 0

        for idx, file_name in enumerate(file_names):
            hidden_representations = load_hidden_representations_from_hdf5(os.path.join(log_dir, file_name))
            # print(hidden_representations.shape)
            n_sequences = hidden_representations.shape[0]

            if representations is None:
                representations = hidden_representations
            else:
                representations = np.concatenate((representations, hidden_representations), axis=0)

            classes += n_sequences * [idx] # assign representations to classes
        
        classes = np.asarray(classes)

        # shuffle representations and classes
        X, y = unison_shuffled_copies(representations, classes)
        print(X.shape, y.shape)

        # perform PCA on hidden representations
        print('PCA for prompts:', prompt_names)

        for n_components in range(1, 4):
            pca = PCA(n_components=n_components)
            pca.fit(X)

            # variance explained by each of the principal components
            print(f"model:{model}; module:{module}; token:{t}; layer:{layer}; n_components: {n_components}; variance explained: {pca.explained_variance_ratio_}")
            print(np.sum(pca.explained_variance_ratio_))
        print('\n')


token: 0	layer: 0


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5045.59it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4181.94it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5111.38it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5506.81it/s]
  self.explained_variance_ / total_var.sum()
  self.explained_variance_ / total_var.sum()


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:0; n_components: 1; variance explained: [nan]
nan
model:bigscience-T0; module:decoder; token:0; layer:0; n_components: 2; variance explained: [nan nan]
nan


  self.explained_variance_ / total_var.sum()


model:bigscience-T0; module:decoder; token:0; layer:0; n_components: 3; variance explained: [nan nan nan]
nan


token: 0	layer: 1


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4144.63it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5331.98it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4954.40it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5512.01it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:1; n_components: 1; variance explained: [0.91972893]
0.91972893
model:bigscience-T0; module:decoder; token:0; layer:1; n_components: 2; variance explained: [0.91972893 0.01331339]
0.93304235
model:bigscience-T0; module:decoder; token:0; layer:1; n_components: 3; variance explained: [0.91972893 0.01331339 0.00667707]
0.93971944


token: 0	layer: 2


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4224.27it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5555.10it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5527.12it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5545.48it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:2; n_components: 1; variance explained: [0.9320126]
0.9320126
model:bigscience-T0; module:decoder; token:0; layer:2; n_components: 2; variance explained: [0.9320125  0.03345007]
0.96546257
model:bigscience-T0; module:decoder; token:0; layer:2; n_components: 3; variance explained: [0.9320125  0.03345009 0.00689409]
0.9723567


token: 0	layer: 3


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4046.20it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4823.88it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5532.64it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4913.19it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:3; n_components: 1; variance explained: [0.9607144]
0.9607144
model:bigscience-T0; module:decoder; token:0; layer:3; n_components: 2; variance explained: [0.9607144  0.01350653]
0.97422093
model:bigscience-T0; module:decoder; token:0; layer:3; n_components: 3; variance explained: [0.9607144  0.01350654 0.00312071]
0.97734165


token: 0	layer: 4


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3944.48it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5447.53it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3352.03it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5421.80it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:4; n_components: 1; variance explained: [0.96854216]
0.96854216
model:bigscience-T0; module:decoder; token:0; layer:4; n_components: 2; variance explained: [0.96854216 0.01049002]
0.97903216
model:bigscience-T0; module:decoder; token:0; layer:4; n_components: 3; variance explained: [0.96854186 0.01049002 0.00187797]
0.9809098


token: 0	layer: 5


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3375.35it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5436.90it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5217.38it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5341.76it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:5; n_components: 1; variance explained: [0.96704036]
0.96704036
model:bigscience-T0; module:decoder; token:0; layer:5; n_components: 2; variance explained: [0.9670402  0.01021816]
0.9772583
model:bigscience-T0; module:decoder; token:0; layer:5; n_components: 3; variance explained: [0.9670402  0.01021816 0.00195991]
0.97921824


token: 0	layer: 6


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3507.31it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5389.31it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5570.23it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5455.10it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:6; n_components: 1; variance explained: [0.9745324]
0.9745324
model:bigscience-T0; module:decoder; token:0; layer:6; n_components: 2; variance explained: [0.97453237 0.00750657]
0.9820389
model:bigscience-T0; module:decoder; token:0; layer:6; n_components: 3; variance explained: [0.9745324  0.00750658 0.00218515]
0.98422414


token: 0	layer: 7


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3562.64it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5716.03it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5039.33it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5421.60it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:7; n_components: 1; variance explained: [0.9913897]
0.9913897
model:bigscience-T0; module:decoder; token:0; layer:7; n_components: 2; variance explained: [0.9913896 0.002824 ]
0.9942136
model:bigscience-T0; module:decoder; token:0; layer:7; n_components: 3; variance explained: [0.9913896  0.002824   0.00160309]
0.99581665


token: 0	layer: 8


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3658.23it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5477.30it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5488.22it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5597.63it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:8; n_components: 1; variance explained: [0.9919332]
0.9919332
model:bigscience-T0; module:decoder; token:0; layer:8; n_components: 2; variance explained: [0.99193347 0.00197831]
0.9939118
model:bigscience-T0; module:decoder; token:0; layer:8; n_components: 3; variance explained: [0.9919332  0.00197831 0.0014532 ]
0.9953648


token: 0	layer: 9


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3581.29it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5517.33it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5265.02it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4055.25it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:9; n_components: 1; variance explained: [0.99209225]
0.99209225
model:bigscience-T0; module:decoder; token:0; layer:9; n_components: 2; variance explained: [0.99209225 0.00178917]
0.9938814
model:bigscience-T0; module:decoder; token:0; layer:9; n_components: 3; variance explained: [0.99209225 0.00178917 0.00148302]
0.9953644


token: 0	layer: 10


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3366.20it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5275.66it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5453.44it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5482.80it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:10; n_components: 1; variance explained: [0.99220496]
0.99220496
model:bigscience-T0; module:decoder; token:0; layer:10; n_components: 2; variance explained: [0.99220496 0.00165896]
0.99386394
model:bigscience-T0; module:decoder; token:0; layer:10; n_components: 3; variance explained: [0.99220496 0.00165896 0.00149969]
0.99536365


token: 0	layer: 11


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4184.42it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5232.82it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5235.44it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5562.10it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:11; n_components: 1; variance explained: [0.9924607]
0.9924607
model:bigscience-T0; module:decoder; token:0; layer:11; n_components: 2; variance explained: [0.99246055 0.00163718]
0.9940977
model:bigscience-T0; module:decoder; token:0; layer:11; n_components: 3; variance explained: [0.9924607  0.00163718 0.00146603]
0.9955639


token: 0	layer: 12


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3745.01it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5171.22it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5192.80it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5479.44it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:12; n_components: 1; variance explained: [0.9925776]
0.9925776
model:bigscience-T0; module:decoder; token:0; layer:12; n_components: 2; variance explained: [0.9925776  0.00165891]
0.9942365
model:bigscience-T0; module:decoder; token:0; layer:12; n_components: 3; variance explained: [0.9925776  0.00165891 0.00141059]
0.99564713


token: 0	layer: 13


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4212.00it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5467.63it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5061.17it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5584.23it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:13; n_components: 1; variance explained: [0.9894708]
0.9894708
model:bigscience-T0; module:decoder; token:0; layer:13; n_components: 2; variance explained: [0.9894708  0.00171337]
0.9911842
model:bigscience-T0; module:decoder; token:0; layer:13; n_components: 3; variance explained: [0.989471   0.00171337 0.00151224]
0.99269664


token: 0	layer: 14


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4054.66it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5519.74it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5488.73it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5437.41it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:14; n_components: 1; variance explained: [0.9892674]
0.9892674
model:bigscience-T0; module:decoder; token:0; layer:14; n_components: 2; variance explained: [0.9892674  0.00174743]
0.99101484
model:bigscience-T0; module:decoder; token:0; layer:14; n_components: 3; variance explained: [0.9892674  0.00174743 0.00153754]
0.9925524


token: 0	layer: 15


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3697.27it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5631.00it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4032.87it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5398.33it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:15; n_components: 1; variance explained: [0.988247]
0.988247
model:bigscience-T0; module:decoder; token:0; layer:15; n_components: 2; variance explained: [0.988247   0.00185204]
0.990099
model:bigscience-T0; module:decoder; token:0; layer:15; n_components: 3; variance explained: [0.988247   0.00185204 0.00150914]
0.99160814


token: 0	layer: 16


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3399.70it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5783.38it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4917.91it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5163.40it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:16; n_components: 1; variance explained: [0.97942847]
0.97942847
model:bigscience-T0; module:decoder; token:0; layer:16; n_components: 2; variance explained: [0.9794287  0.00585025]
0.98527896
model:bigscience-T0; module:decoder; token:0; layer:16; n_components: 3; variance explained: [0.97942847 0.00585025 0.00228076]
0.9875595


token: 0	layer: 17


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3676.56it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4798.00it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5468.81it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5326.09it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:17; n_components: 1; variance explained: [0.9784722]
0.9784722
model:bigscience-T0; module:decoder; token:0; layer:17; n_components: 2; variance explained: [0.97847193 0.00519226]
0.9836642
model:bigscience-T0; module:decoder; token:0; layer:17; n_components: 3; variance explained: [0.9784722  0.00519226 0.00238504]
0.9860495


token: 0	layer: 18


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3768.76it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5455.10it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5178.71it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5417.10it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:18; n_components: 1; variance explained: [0.91452456]
0.91452456
model:bigscience-T0; module:decoder; token:0; layer:18; n_components: 2; variance explained: [0.9145247  0.01442506]
0.9289497
model:bigscience-T0; module:decoder; token:0; layer:18; n_components: 3; variance explained: [0.9145247  0.01442506 0.00687208]
0.9358218


token: 0	layer: 19


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3377.97it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5116.09it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5485.96it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5511.02it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:19; n_components: 1; variance explained: [0.7389529]
0.7389529
model:bigscience-T0; module:decoder; token:0; layer:19; n_components: 2; variance explained: [0.738953   0.03401883]
0.7729718
model:bigscience-T0; module:decoder; token:0; layer:19; n_components: 3; variance explained: [0.738953   0.03401882 0.01450585]
0.7874777


token: 0	layer: 20


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 2869.91it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4933.87it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4869.37it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5353.43it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:20; n_components: 1; variance explained: [0.5659779]
0.5659779
model:bigscience-T0; module:decoder; token:0; layer:20; n_components: 2; variance explained: [0.5659777 0.0846751]
0.65065277
model:bigscience-T0; module:decoder; token:0; layer:20; n_components: 3; variance explained: [0.5659779  0.0846751  0.04102185]
0.6916748


token: 0	layer: 21


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3341.82it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4287.34it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4887.68it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4901.85it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:21; n_components: 1; variance explained: [0.5605061]
0.5605061
model:bigscience-T0; module:decoder; token:0; layer:21; n_components: 2; variance explained: [0.5605061  0.08886828]
0.64937437
model:bigscience-T0; module:decoder; token:0; layer:21; n_components: 3; variance explained: [0.56050617 0.08886831 0.03730233]
0.6866768


token: 0	layer: 22


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3339.91it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5235.79it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4794.84it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5436.11it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:22; n_components: 1; variance explained: [0.6300503]
0.6300503
model:bigscience-T0; module:decoder; token:0; layer:22; n_components: 2; variance explained: [0.6300506  0.06979188]
0.69984245
model:bigscience-T0; module:decoder; token:0; layer:22; n_components: 3; variance explained: [0.6300503  0.06979188 0.04271581]
0.742558


token: 0	layer: 23


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3767.37it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5504.52it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5342.72it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5267.24it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:23; n_components: 1; variance explained: [0.62818605]
0.62818605
model:bigscience-T0; module:decoder; token:0; layer:23; n_components: 2; variance explained: [0.62818605 0.07512952]
0.70331556
model:bigscience-T0; module:decoder; token:0; layer:23; n_components: 3; variance explained: [0.62818605 0.07512959 0.0367535 ]
0.7400691


token: 0	layer: 24


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3639.24it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5591.73it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5555.16it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5480.63it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:0; layer:24; n_components: 1; variance explained: [0.87068355]
0.87068355
model:bigscience-T0; module:decoder; token:0; layer:24; n_components: 2; variance explained: [0.87068355 0.02679551]
0.89747906
model:bigscience-T0; module:decoder; token:0; layer:24; n_components: 3; variance explained: [0.87068355 0.02679548 0.01043749]
0.90791655


token: 1	layer: 0


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3900.17it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5509.11it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4898.55it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4958.08it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:0; n_components: 1; variance explained: [0.32597634]
0.32597634
model:bigscience-T0; module:decoder; token:1; layer:0; n_components: 2; variance explained: [0.3259764 0.2266581]
0.5526345
model:bigscience-T0; module:decoder; token:1; layer:0; n_components: 3; variance explained: [0.3259764  0.22665812 0.13435166]
0.6869862


token: 1	layer: 1


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 2897.41it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5651.08it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5071.71it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5661.40it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:1; n_components: 1; variance explained: [0.33147904]
0.33147904
model:bigscience-T0; module:decoder; token:1; layer:1; n_components: 2; variance explained: [0.33147913 0.22899392]
0.5604731
model:bigscience-T0; module:decoder; token:1; layer:1; n_components: 3; variance explained: [0.33147913 0.22899364 0.13159114]
0.6920639


token: 1	layer: 2


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4371.19it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5556.80it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5323.26it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5335.63it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:2; n_components: 1; variance explained: [0.32681495]
0.32681495
model:bigscience-T0; module:decoder; token:1; layer:2; n_components: 2; variance explained: [0.3268151  0.24740723]
0.5742223
model:bigscience-T0; module:decoder; token:1; layer:2; n_components: 3; variance explained: [0.32681513 0.24740735 0.12817276]
0.7023952


token: 1	layer: 3


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4091.05it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5543.02it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5182.91it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5517.46it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:3; n_components: 1; variance explained: [0.49900874]
0.49900874
model:bigscience-T0; module:decoder; token:1; layer:3; n_components: 2; variance explained: [0.49900883 0.18779676]
0.6868056
model:bigscience-T0; module:decoder; token:1; layer:3; n_components: 3; variance explained: [0.49900907 0.1877967  0.09631664]
0.7831224


token: 1	layer: 4


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3483.14it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5473.74it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5517.43it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5608.66it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:4; n_components: 1; variance explained: [0.71420056]
0.71420056
model:bigscience-T0; module:decoder; token:1; layer:4; n_components: 2; variance explained: [0.71420056 0.10283156]
0.8170321
model:bigscience-T0; module:decoder; token:1; layer:4; n_components: 3; variance explained: [0.7142008  0.10283158 0.05529137]
0.87232375


token: 1	layer: 5


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4235.85it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 1826.61it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4868.02it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4845.45it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:5; n_components: 1; variance explained: [0.79267067]
0.79267067
model:bigscience-T0; module:decoder; token:1; layer:5; n_components: 2; variance explained: [0.79267067 0.07159963]
0.86427027
model:bigscience-T0; module:decoder; token:1; layer:5; n_components: 3; variance explained: [0.79267067 0.0715997  0.03950733]
0.9037777


token: 1	layer: 6


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3645.53it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5497.56it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5036.31it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5481.04it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:6; n_components: 1; variance explained: [0.8458302]
0.8458302
model:bigscience-T0; module:decoder; token:1; layer:6; n_components: 2; variance explained: [0.8458302  0.05332671]
0.8991569
model:bigscience-T0; module:decoder; token:1; layer:6; n_components: 3; variance explained: [0.8458302  0.05332672 0.02810195]
0.92725885


token: 1	layer: 7


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4138.87it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5565.30it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5363.66it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5430.42it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:7; n_components: 1; variance explained: [0.906834]
0.906834
model:bigscience-T0; module:decoder; token:1; layer:7; n_components: 2; variance explained: [0.906834   0.03123203]
0.93806607
model:bigscience-T0; module:decoder; token:1; layer:7; n_components: 3; variance explained: [0.906834   0.031232   0.01654864]
0.95461464


token: 1	layer: 8


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3332.77it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5601.28it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4774.81it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5488.63it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:8; n_components: 1; variance explained: [0.9203834]
0.9203834
model:bigscience-T0; module:decoder; token:1; layer:8; n_components: 2; variance explained: [0.9203834  0.02662104]
0.94700444
model:bigscience-T0; module:decoder; token:1; layer:8; n_components: 3; variance explained: [0.9203834  0.02662104 0.01394355]
0.960948


token: 1	layer: 9


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4256.38it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5436.19it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5650.45it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5141.40it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:9; n_components: 1; variance explained: [0.9298281]
0.9298281
model:bigscience-T0; module:decoder; token:1; layer:9; n_components: 2; variance explained: [0.929828   0.02506897]
0.9548969
model:bigscience-T0; module:decoder; token:1; layer:9; n_components: 3; variance explained: [0.929828   0.02506899 0.01161601]
0.966513


token: 1	layer: 10


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3806.72it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5769.30it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5259.49it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5481.25it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:10; n_components: 1; variance explained: [0.9346072]
0.9346072
model:bigscience-T0; module:decoder; token:1; layer:10; n_components: 2; variance explained: [0.9346073  0.02313813]
0.95774543
model:bigscience-T0; module:decoder; token:1; layer:10; n_components: 3; variance explained: [0.934607   0.02313814 0.01028337]
0.96802855


token: 1	layer: 11


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3396.95it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5541.46it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4732.76it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5093.48it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:11; n_components: 1; variance explained: [0.932689]
0.932689
model:bigscience-T0; module:decoder; token:1; layer:11; n_components: 2; variance explained: [0.932689   0.02449386]
0.9571829
model:bigscience-T0; module:decoder; token:1; layer:11; n_components: 3; variance explained: [0.932689   0.02449389 0.0091443 ]
0.9663272


token: 1	layer: 12


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3544.30it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5462.64it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5324.70it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4802.05it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:12; n_components: 1; variance explained: [0.92174554]
0.92174554
model:bigscience-T0; module:decoder; token:1; layer:12; n_components: 2; variance explained: [0.92174554 0.02848436]
0.9502299
model:bigscience-T0; module:decoder; token:1; layer:12; n_components: 3; variance explained: [0.92174536 0.02848436 0.01033071]
0.96056044


token: 1	layer: 13


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3033.64it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5525.99it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5141.83it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5421.45it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:13; n_components: 1; variance explained: [0.8982214]
0.8982214
model:bigscience-T0; module:decoder; token:1; layer:13; n_components: 2; variance explained: [0.8982215  0.03625587]
0.9344774
model:bigscience-T0; module:decoder; token:1; layer:13; n_components: 3; variance explained: [0.8982214  0.03625586 0.0137269 ]
0.9482041


token: 1	layer: 14


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3954.92it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5578.39it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5700.46it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5572.98it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:14; n_components: 1; variance explained: [0.88551915]
0.88551915
model:bigscience-T0; module:decoder; token:1; layer:14; n_components: 2; variance explained: [0.88551915 0.03835036]
0.9238695
model:bigscience-T0; module:decoder; token:1; layer:14; n_components: 3; variance explained: [0.885519   0.03835038 0.01587257]
0.93974197


token: 1	layer: 15


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3551.50it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5562.36it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4558.13it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5541.09it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:15; n_components: 1; variance explained: [0.8507134]
0.8507134
model:bigscience-T0; module:decoder; token:1; layer:15; n_components: 2; variance explained: [0.85071325 0.04802972]
0.898743
model:bigscience-T0; module:decoder; token:1; layer:15; n_components: 3; variance explained: [0.85071325 0.04802975 0.02116202]
0.919905


token: 1	layer: 16


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3500.80it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5116.69it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4773.74it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5207.56it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:16; n_components: 1; variance explained: [0.77713555]
0.77713555
model:bigscience-T0; module:decoder; token:1; layer:16; n_components: 2; variance explained: [0.77713555 0.06656523]
0.84370077
model:bigscience-T0; module:decoder; token:1; layer:16; n_components: 3; variance explained: [0.77713567 0.06656526 0.03343173]
0.87713265


token: 1	layer: 17


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4130.70it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5507.49it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4569.16it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5434.56it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:17; n_components: 1; variance explained: [0.66346735]
0.66346735
model:bigscience-T0; module:decoder; token:1; layer:17; n_components: 2; variance explained: [0.66346735 0.09476217]
0.7582295
model:bigscience-T0; module:decoder; token:1; layer:17; n_components: 3; variance explained: [0.66346735 0.09476215 0.05176894]
0.80999845


token: 1	layer: 18


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3455.93it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5435.68it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5047.34it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5362.42it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:18; n_components: 1; variance explained: [0.56817317]
0.56817317
model:bigscience-T0; module:decoder; token:1; layer:18; n_components: 2; variance explained: [0.5681731  0.11993741]
0.68811053
model:bigscience-T0; module:decoder; token:1; layer:18; n_components: 3; variance explained: [0.56817317 0.11993745 0.06452591]
0.7526365


token: 1	layer: 19


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3473.22it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5494.55it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5710.52it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4736.95it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:19; n_components: 1; variance explained: [0.47461566]
0.47461566
model:bigscience-T0; module:decoder; token:1; layer:19; n_components: 2; variance explained: [0.47461584 0.13625516]
0.610871
model:bigscience-T0; module:decoder; token:1; layer:19; n_components: 3; variance explained: [0.47461578 0.1362552  0.07650257]
0.6873735


token: 1	layer: 20


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3349.05it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5518.53it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4795.71it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5450.37it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:20; n_components: 1; variance explained: [0.48221672]
0.48221672
model:bigscience-T0; module:decoder; token:1; layer:20; n_components: 2; variance explained: [0.4822168  0.12931912]
0.6115359
model:bigscience-T0; module:decoder; token:1; layer:20; n_components: 3; variance explained: [0.4822168  0.12931907 0.08474886]
0.6962848


token: 1	layer: 21


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3546.46it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5011.07it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5510.13it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5636.49it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:21; n_components: 1; variance explained: [0.45150796]
0.45150796
model:bigscience-T0; module:decoder; token:1; layer:21; n_components: 2; variance explained: [0.45150802 0.12431841]
0.5758264
model:bigscience-T0; module:decoder; token:1; layer:21; n_components: 3; variance explained: [0.45150813 0.12431841 0.0957318 ]
0.6715583


token: 1	layer: 22


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4320.34it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4377.64it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5282.40it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5371.97it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:22; n_components: 1; variance explained: [0.26628307]
0.26628307
model:bigscience-T0; module:decoder; token:1; layer:22; n_components: 2; variance explained: [0.266283  0.1741751]
0.44045812
model:bigscience-T0; module:decoder; token:1; layer:22; n_components: 3; variance explained: [0.26628307 0.17417528 0.11407206]
0.55453044


token: 1	layer: 23


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3359.20it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3992.06it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5588.45it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5444.11it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:23; n_components: 1; variance explained: [0.38359633]
0.38359633
model:bigscience-T0; module:decoder; token:1; layer:23; n_components: 2; variance explained: [0.38359624 0.1520612 ]
0.5356574
model:bigscience-T0; module:decoder; token:1; layer:23; n_components: 3; variance explained: [0.38359615 0.15206118 0.08985174]
0.6255091


token: 1	layer: 24


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3806.86it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5631.85it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4996.57it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4125.58it/s]


(1108, 4096) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0; module:decoder; token:1; layer:24; n_components: 1; variance explained: [0.76760083]
0.76760083
model:bigscience-T0; module:decoder; token:1; layer:24; n_components: 2; variance explained: [0.7676007  0.10794663]
0.87554735
model:bigscience-T0; module:decoder; token:1; layer:24; n_components: 3; variance explained: [0.7676007  0.10794659 0.03061317]
0.9061605


