In [1]:
import os

import numpy as np
from sklearn.decomposition import PCA

from utils import load_hidden_representations_from_hdf5, read_templates_from_file

----

In [2]:
# params
log_dir = "/logfiles"
model = "bigscience-T0_3B" # bigscience-T0_B or bigscience-T0
module = "decoder"
task = "rte"
# task = "cb"
# task = "wic"

## Prompts

In [3]:
df = read_templates_from_file(f"/t0-analysis/prompts/{task}/fixed_prompt.csv")
# df = read_templates_from_file(f"/t0-analysis/prompts/{task}/fixed_target_yes_no.csv")
display(df)

Unnamed: 0,name,template,category,includes_targets,targets,target_ids,shuffle
0,gpt_3_yes_no_with_targets,{premise} Question: {hypothesis} Yes or No?,instructive,True,"▁Yes, ▁No","0, 1",False
1,gpt_3_true_false_with_targets,{premise} Question: {hypothesis} True or False?,instructive,True,"▁True, ▁False","0, 1",False
2,gpt_3_cat_dog_with_targets,{premise} Question: {hypothesis} Cat or Dog?,instructive,True,"▁Cat, ▁Dog","0, 1",False
3,gpt_3_yes_no_without_targets,{premise} Question: {hypothesis}?,instructive,False,"▁Yes, ▁No","0, 1",False


## PCA

In [4]:
def unison_shuffled_copies(a, b):
    # from: https://stackoverflow.com/questions/4601373/better-way-to-shuffle-two-numpy-arrays-in-unison
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [5]:
if task == 'rte':
    use_pattern = [
        "gpt_3_yes_no_with_targets",
        "gpt_3_true_false_with_targets",
        "gpt_3_cat_dog_with_targets",
        "gpt_3_yes_no_without_targets",
    ]
elif task == 'cb':
    use_pattern = [
        "null_pattern",
        "null_pattern_reversed",
        "gpt_3_true_false_neither",
        "gpt_3_yes_no_maybe",
        "mnli_crowdsource",
        "always_sometimes_never",
        "based_on_previous_passage",
        "infer",
        "claim",
        "consider",
        "follow",
        "imply",
        "guaranteed",
        "guaranteed_possible",
        "justified",
        "must_be_true",
        "should_assume",
        "take_the_following",
    ]
elif task == 'wic':
    use_pattern = [
        "gpt_3",
        "gpt_3_yes_no",
        "affirmation",
        "grammar_homework",
        "polysemous",
        "question_context",
        "question_meaning",
        "question_meaning_yes_no",
        "same_sense",
        "similar_sense",
        "similar_sense_yes_no",
    ]

In [6]:
for t in range(2):
    # for layer in range(0, 10):
    for layer in range(0, 25):
    # for layer in range(24, 25):
        print(f"token: {t}\tlayer: {layer}")
        file_names, prompt_names = [], []

        for _, row in df.iterrows():
            if row['name'] in use_pattern:
                file_names.append(f"{task}/{model}/{module}/{row['name']}/hidden_represenations_t{t}_layer{layer}_avg.hdf5",)
                prompt_names.append(row['name'])


        # load hidden representations from hdf5 file
        representations = None
        classes = []
        n_sequences = 0

        for idx, file_name in enumerate(file_names):
            hidden_representations = load_hidden_representations_from_hdf5(os.path.join(log_dir, file_name))
            # print(hidden_representations.shape)
            n_sequences = hidden_representations.shape[0]

            if representations is None:
                representations = hidden_representations
            else:
                representations = np.concatenate((representations, hidden_representations), axis=0)

            classes += n_sequences * [idx] # assign representations to classes
        
        classes = np.asarray(classes)

        # shuffle representations and classes
        X, y = unison_shuffled_copies(representations, classes)
        print(X.shape, y.shape)

        # perform PCA on hidden representations
        print('PCA for prompts:', prompt_names)

        for n_components in range(1, 4):
            pca = PCA(n_components=n_components)
            pca.fit(X)

            # variance explained by each of the principal components
            print(f"model:{model}; module:{module}; token:{t}; layer:{layer}; n_components: {n_components}; variance explained: {pca.explained_variance_ratio_}")
            print(np.sum(pca.explained_variance_ratio_))
        print('\n')


token: 0	layer: 0


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4909.41it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4222.26it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5737.76it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5864.15it/s]
  self.explained_variance_ / total_var.sum()
  self.explained_variance_ / total_var.sum()
  self.explained_variance_ / total_var.sum()


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:0; n_components: 1; variance explained: [nan]
nan
model:bigscience-T0_3B; module:decoder; token:0; layer:0; n_components: 2; variance explained: [nan nan]
nan
model:bigscience-T0_3B; module:decoder; token:0; layer:0; n_components: 3; variance explained: [nan nan nan]
nan


token: 0	layer: 1


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4265.92it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5600.90it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5468.89it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5681.59it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:1; n_components: 1; variance explained: [0.9793358]
0.9793358
model:bigscience-T0_3B; module:decoder; token:0; layer:1; n_components: 2; variance explained: [0.9793358  0.00613705]
0.98547286
model:bigscience-T0_3B; module:decoder; token:0; layer:1; n_components: 3; variance explained: [0.9793358  0.00613705 0.00237561]
0.98784846


token: 0	layer: 2


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3448.92it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5550.86it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5677.78it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5675.90it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:2; n_components: 1; variance explained: [0.99270725]
0.99270725
model:bigscience-T0_3B; module:decoder; token:0; layer:2; n_components: 2; variance explained: [0.99270725 0.00193185]
0.9946391
model:bigscience-T0_3B; module:decoder; token:0; layer:2; n_components: 3; variance explained: [0.99270725 0.00193186 0.00111519]
0.9957543


token: 0	layer: 3


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3396.45it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5629.47it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5877.35it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5689.68it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:3; n_components: 1; variance explained: [0.99729466]
0.99729466
model:bigscience-T0_3B; module:decoder; token:0; layer:3; n_components: 2; variance explained: [9.9729455e-01 5.6458561e-04]
0.9978591
model:bigscience-T0_3B; module:decoder; token:0; layer:3; n_components: 3; variance explained: [9.9729455e-01 5.6458538e-04 4.9405295e-04]
0.9983532


token: 0	layer: 4


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 2460.14it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5766.78it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5052.17it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5779.40it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:4; n_components: 1; variance explained: [0.9974827]
0.9974827
model:bigscience-T0_3B; module:decoder; token:0; layer:4; n_components: 2; variance explained: [9.974827e-01 8.015798e-04]
0.9982843
model:bigscience-T0_3B; module:decoder; token:0; layer:4; n_components: 3; variance explained: [9.9748290e-01 8.0158020e-04 4.0019894e-04]
0.99868464


token: 0	layer: 5


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4694.55it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5456.97it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5635.45it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5661.87it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:5; n_components: 1; variance explained: [0.99667495]
0.99667495
model:bigscience-T0_3B; module:decoder; token:0; layer:5; n_components: 2; variance explained: [0.9966751  0.00147543]
0.9981505
model:bigscience-T0_3B; module:decoder; token:0; layer:5; n_components: 3; variance explained: [9.9667495e-01 1.4754346e-03 3.9233809e-04]
0.9985427


token: 0	layer: 6


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4203.32it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5550.04it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5724.93it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4910.97it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:6; n_components: 1; variance explained: [0.9967464]
0.9967464
model:bigscience-T0_3B; module:decoder; token:0; layer:6; n_components: 2; variance explained: [0.9967464  0.00139639]
0.99814284
model:bigscience-T0_3B; module:decoder; token:0; layer:6; n_components: 3; variance explained: [9.9674642e-01 1.3963885e-03 3.7716641e-04]
0.99852


token: 0	layer: 7


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4445.63it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5270.06it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5694.96it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5816.41it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:7; n_components: 1; variance explained: [0.99927235]
0.99927235
model:bigscience-T0_3B; module:decoder; token:0; layer:7; n_components: 2; variance explained: [9.9927235e-01 6.2097504e-04]
0.9998933
model:bigscience-T0_3B; module:decoder; token:0; layer:7; n_components: 3; variance explained: [9.9927235e-01 6.2097487e-04 2.5095336e-05]
0.9999184


token: 0	layer: 8


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4657.61it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5375.55it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5619.02it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5710.41it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:8; n_components: 1; variance explained: [0.9992556]
0.9992556
model:bigscience-T0_3B; module:decoder; token:0; layer:8; n_components: 2; variance explained: [9.9925566e-01 6.2647316e-04]
0.9998821
model:bigscience-T0_3B; module:decoder; token:0; layer:8; n_components: 3; variance explained: [9.9925566e-01 6.2647351e-04 2.4874504e-05]
0.99990696


token: 0	layer: 9


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3834.09it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5602.73it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5875.24it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5848.88it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:9; n_components: 1; variance explained: [0.9992064]
0.9992064
model:bigscience-T0_3B; module:decoder; token:0; layer:9; n_components: 2; variance explained: [9.9920654e-01 6.6280115e-04]
0.99986935
model:bigscience-T0_3B; module:decoder; token:0; layer:9; n_components: 3; variance explained: [9.9920642e-01 6.6280103e-04 2.3487144e-05]
0.9998927


token: 0	layer: 10


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3168.15it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5466.37it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5556.27it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5695.43it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:10; n_components: 1; variance explained: [0.9990725]
0.9990725
model:bigscience-T0_3B; module:decoder; token:0; layer:10; n_components: 2; variance explained: [9.9907249e-01 7.6821516e-04]
0.99984074
model:bigscience-T0_3B; module:decoder; token:0; layer:10; n_components: 3; variance explained: [9.9907249e-01 7.6821516e-04 2.7088341e-05]
0.9998678


token: 0	layer: 11


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4590.48it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5387.36it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5806.85it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5898.95it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:11; n_components: 1; variance explained: [0.99861383]
0.99861383
model:bigscience-T0_3B; module:decoder; token:0; layer:11; n_components: 2; variance explained: [0.99861383 0.00120914]
0.999823
model:bigscience-T0_3B; module:decoder; token:0; layer:11; n_components: 3; variance explained: [9.9861389e-01 1.2091356e-03 3.4425571e-05]
0.9998575


token: 0	layer: 12


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4727.99it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5085.79it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5727.13it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5881.13it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:12; n_components: 1; variance explained: [0.9983611]
0.9983611
model:bigscience-T0_3B; module:decoder; token:0; layer:12; n_components: 2; variance explained: [0.9983613  0.00145354]
0.9998148
model:bigscience-T0_3B; module:decoder; token:0; layer:12; n_components: 3; variance explained: [9.9836111e-01 1.4535425e-03 3.2697615e-05]
0.99984735


token: 0	layer: 13


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4262.83it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5369.09it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5601.98it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5746.33it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:13; n_components: 1; variance explained: [0.99753654]
0.99753654
model:bigscience-T0_3B; module:decoder; token:0; layer:13; n_components: 2; variance explained: [0.99753654 0.00183825]
0.9993748
model:bigscience-T0_3B; module:decoder; token:0; layer:13; n_components: 3; variance explained: [9.9753648e-01 1.8382476e-03 1.4676536e-04]
0.9995215


token: 0	layer: 14


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4625.68it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5473.76it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5470.20it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5707.63it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:14; n_components: 1; variance explained: [0.9971152]
0.9971152
model:bigscience-T0_3B; module:decoder; token:0; layer:14; n_components: 2; variance explained: [0.9971152  0.00215407]
0.99926925
model:bigscience-T0_3B; module:decoder; token:0; layer:14; n_components: 3; variance explained: [9.9711519e-01 2.1540679e-03 1.6254191e-04]
0.9994318


token: 0	layer: 15


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4713.22it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5438.20it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5490.34it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5972.67it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:15; n_components: 1; variance explained: [0.99591947]
0.99591947
model:bigscience-T0_3B; module:decoder; token:0; layer:15; n_components: 2; variance explained: [0.99591947 0.0021096 ]
0.99802905
model:bigscience-T0_3B; module:decoder; token:0; layer:15; n_components: 3; variance explained: [0.99591947 0.0021096  0.00105026]
0.99907935


token: 0	layer: 16


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4466.59it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5410.27it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5684.53it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5449.55it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:16; n_components: 1; variance explained: [0.9928606]
0.9928606
model:bigscience-T0_3B; module:decoder; token:0; layer:16; n_components: 2; variance explained: [0.99286073 0.0026184 ]
0.9954791
model:bigscience-T0_3B; module:decoder; token:0; layer:16; n_components: 3; variance explained: [0.99286073 0.0026184  0.00133107]
0.9968102


token: 0	layer: 17


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4436.43it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5531.28it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5036.40it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4514.89it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:17; n_components: 1; variance explained: [0.99199384]
0.99199384
model:bigscience-T0_3B; module:decoder; token:0; layer:17; n_components: 2; variance explained: [0.99199384 0.0026178 ]
0.9946116
model:bigscience-T0_3B; module:decoder; token:0; layer:17; n_components: 3; variance explained: [0.9919937  0.0026178  0.00148036]
0.99609184


token: 0	layer: 18


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4251.13it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4927.61it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5424.06it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5895.93it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:18; n_components: 1; variance explained: [0.9800054]
0.9800054
model:bigscience-T0_3B; module:decoder; token:0; layer:18; n_components: 2; variance explained: [0.9800054  0.00545339]
0.9854588
model:bigscience-T0_3B; module:decoder; token:0; layer:18; n_components: 3; variance explained: [0.9800054  0.00545339 0.00313694]
0.9885957


token: 0	layer: 19


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4645.43it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5178.50it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5558.00it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5909.18it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:19; n_components: 1; variance explained: [0.94800496]
0.94800496
model:bigscience-T0_3B; module:decoder; token:0; layer:19; n_components: 2; variance explained: [0.9480051  0.01246161]
0.9604667
model:bigscience-T0_3B; module:decoder; token:0; layer:19; n_components: 3; variance explained: [0.94800496 0.01246161 0.00926181]
0.96972835


token: 0	layer: 20


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4712.07it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4917.33it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5650.17it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5705.70it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:20; n_components: 1; variance explained: [0.86167365]
0.86167365
model:bigscience-T0_3B; module:decoder; token:0; layer:20; n_components: 2; variance explained: [0.86167365 0.03745803]
0.89913166
model:bigscience-T0_3B; module:decoder; token:0; layer:20; n_components: 3; variance explained: [0.86167365 0.03745805 0.02467528]
0.923807


token: 0	layer: 21


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4685.86it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5553.72it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5886.94it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4675.40it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:21; n_components: 1; variance explained: [0.7608182]
0.7608182
model:bigscience-T0_3B; module:decoder; token:0; layer:21; n_components: 2; variance explained: [0.7608182  0.04810777]
0.8089259
model:bigscience-T0_3B; module:decoder; token:0; layer:21; n_components: 3; variance explained: [0.7608183  0.04810778 0.03248793]
0.84141403


token: 0	layer: 22


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4579.35it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5271.47it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5898.95it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5926.06it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:22; n_components: 1; variance explained: [0.9019495]
0.9019495
model:bigscience-T0_3B; module:decoder; token:0; layer:22; n_components: 2; variance explained: [0.9019495  0.01591462]
0.91786414
model:bigscience-T0_3B; module:decoder; token:0; layer:22; n_components: 3; variance explained: [0.9019495  0.01591463 0.01015158]
0.9280157


token: 0	layer: 23


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4504.99it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5420.06it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5656.00it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5545.88it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:23; n_components: 1; variance explained: [0.68323934]
0.68323934
model:bigscience-T0_3B; module:decoder; token:0; layer:23; n_components: 2; variance explained: [0.6832395  0.05124732]
0.7344868
model:bigscience-T0_3B; module:decoder; token:0; layer:23; n_components: 3; variance explained: [0.6832395  0.05124734 0.02409185]
0.7585787


token: 0	layer: 24


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3648.16it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5706.48it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5745.79it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4812.49it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:0; layer:24; n_components: 1; variance explained: [0.7363083]
0.7363083
model:bigscience-T0_3B; module:decoder; token:0; layer:24; n_components: 2; variance explained: [0.7363082  0.08087158]
0.8171798
model:bigscience-T0_3B; module:decoder; token:0; layer:24; n_components: 3; variance explained: [0.7363083  0.08087157 0.03402222]
0.8512021


token: 1	layer: 0


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3780.68it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5714.23it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5951.78it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5879.85it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:0; n_components: 1; variance explained: [0.33654103]
0.33654103
model:bigscience-T0_3B; module:decoder; token:1; layer:0; n_components: 2; variance explained: [0.33654103 0.17927758]
0.5158186
model:bigscience-T0_3B; module:decoder; token:1; layer:0; n_components: 3; variance explained: [0.33654115 0.17927758 0.11174927]
0.627568


token: 1	layer: 1


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3137.99it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5414.53it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4124.34it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5840.71it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:1; n_components: 1; variance explained: [0.36220828]
0.36220828
model:bigscience-T0_3B; module:decoder; token:1; layer:1; n_components: 2; variance explained: [0.3622085  0.17854315]
0.54075164
model:bigscience-T0_3B; module:decoder; token:1; layer:1; n_components: 3; variance explained: [0.3622087  0.17854315 0.10485899]
0.6456108


token: 1	layer: 2


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4637.18it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5302.12it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5621.79it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5581.26it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:2; n_components: 1; variance explained: [0.3821908]
0.3821908
model:bigscience-T0_3B; module:decoder; token:1; layer:2; n_components: 2; variance explained: [0.3821908  0.18429364]
0.56648445
model:bigscience-T0_3B; module:decoder; token:1; layer:2; n_components: 3; variance explained: [0.3821909  0.18429354 0.09259874]
0.6590832


token: 1	layer: 3


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4680.39it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5656.86it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5591.27it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5729.67it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:3; n_components: 1; variance explained: [0.5267851]
0.5267851
model:bigscience-T0_3B; module:decoder; token:1; layer:3; n_components: 2; variance explained: [0.52678496 0.15082821]
0.67761314
model:bigscience-T0_3B; module:decoder; token:1; layer:3; n_components: 3; variance explained: [0.5267851  0.15082811 0.0656416 ]
0.7432548


token: 1	layer: 4


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4428.37it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5507.83it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5620.16it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5607.60it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:4; n_components: 1; variance explained: [0.62293273]
0.62293273
model:bigscience-T0_3B; module:decoder; token:1; layer:4; n_components: 2; variance explained: [0.62293273 0.12134928]
0.744282
model:bigscience-T0_3B; module:decoder; token:1; layer:4; n_components: 3; variance explained: [0.62293273 0.12134929 0.04973058]
0.7940126


token: 1	layer: 5


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4430.95it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5452.70it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5540.90it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5593.27it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:5; n_components: 1; variance explained: [0.68717915]
0.68717915
model:bigscience-T0_3B; module:decoder; token:1; layer:5; n_components: 2; variance explained: [0.68717897 0.10915637]
0.79633534
model:bigscience-T0_3B; module:decoder; token:1; layer:5; n_components: 3; variance explained: [0.68717897 0.10915639 0.04104463]
0.83738


token: 1	layer: 6


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4820.38it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4128.84it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5782.89it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5745.28it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:6; n_components: 1; variance explained: [0.75479645]
0.75479645
model:bigscience-T0_3B; module:decoder; token:1; layer:6; n_components: 2; variance explained: [0.75479627 0.0830625 ]
0.8378588
model:bigscience-T0_3B; module:decoder; token:1; layer:6; n_components: 3; variance explained: [0.75479627 0.08306252 0.03256497]
0.8704238


token: 1	layer: 7


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4277.54it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5668.28it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5695.32it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5604.65it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:7; n_components: 1; variance explained: [0.9510407]
0.9510407
model:bigscience-T0_3B; module:decoder; token:1; layer:7; n_components: 2; variance explained: [0.9510407 0.0169748]
0.9680155
model:bigscience-T0_3B; module:decoder; token:1; layer:7; n_components: 3; variance explained: [0.9510407  0.01697479 0.00619441]
0.9742099


token: 1	layer: 8


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3984.85it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4217.63it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5605.49it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5644.38it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:8; n_components: 1; variance explained: [0.95102715]
0.95102715
model:bigscience-T0_3B; module:decoder; token:1; layer:8; n_components: 2; variance explained: [0.95102715 0.01690965]
0.9679368
model:bigscience-T0_3B; module:decoder; token:1; layer:8; n_components: 3; variance explained: [0.951027   0.01690967 0.00616446]
0.97410107


token: 1	layer: 9


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3933.62it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5306.31it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5674.51it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5651.96it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:9; n_components: 1; variance explained: [0.94811386]
0.94811386
model:bigscience-T0_3B; module:decoder; token:1; layer:9; n_components: 2; variance explained: [0.94811386 0.016416  ]
0.9645299
model:bigscience-T0_3B; module:decoder; token:1; layer:9; n_components: 3; variance explained: [0.94811386 0.01641601 0.0075998 ]
0.97212964


token: 1	layer: 10


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4691.08it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5521.39it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5185.60it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5620.27it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:10; n_components: 1; variance explained: [0.9412988]
0.9412988
model:bigscience-T0_3B; module:decoder; token:1; layer:10; n_components: 2; variance explained: [0.94129896 0.01937332]
0.96067226
model:bigscience-T0_3B; module:decoder; token:1; layer:10; n_components: 3; variance explained: [0.9412988  0.01937334 0.00769094]
0.9683631


token: 1	layer: 11


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3946.07it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5766.30it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5763.12it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5432.45it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:11; n_components: 1; variance explained: [0.91718054]
0.91718054
model:bigscience-T0_3B; module:decoder; token:1; layer:11; n_components: 2; variance explained: [0.91718054 0.02620845]
0.943389
model:bigscience-T0_3B; module:decoder; token:1; layer:11; n_components: 3; variance explained: [0.9171807  0.02620846 0.01135956]
0.95474875


token: 1	layer: 12


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4702.17it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5099.85it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4993.67it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5550.06it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:12; n_components: 1; variance explained: [0.90168214]
0.90168214
model:bigscience-T0_3B; module:decoder; token:1; layer:12; n_components: 2; variance explained: [0.90168214 0.03151163]
0.9331938
model:bigscience-T0_3B; module:decoder; token:1; layer:12; n_components: 3; variance explained: [0.90168214 0.03151165 0.01457632]
0.9477701


token: 1	layer: 13


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4525.06it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5576.46it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5627.73it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5914.00it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:13; n_components: 1; variance explained: [0.875403]
0.875403
model:bigscience-T0_3B; module:decoder; token:1; layer:13; n_components: 2; variance explained: [0.875403   0.04021634]
0.9156193
model:bigscience-T0_3B; module:decoder; token:1; layer:13; n_components: 3; variance explained: [0.875403   0.04021634 0.01938534]
0.93500465


token: 1	layer: 14


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4233.06it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5721.99it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5618.04it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5891.47it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:14; n_components: 1; variance explained: [0.8336765]
0.8336765
model:bigscience-T0_3B; module:decoder; token:1; layer:14; n_components: 2; variance explained: [0.83367664 0.05111016]
0.8847868
model:bigscience-T0_3B; module:decoder; token:1; layer:14; n_components: 3; variance explained: [0.83367664 0.05111017 0.02857251]
0.9133593


token: 1	layer: 15


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4489.61it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5322.11it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4466.47it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4901.69it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:15; n_components: 1; variance explained: [0.7560332]
0.7560332
model:bigscience-T0_3B; module:decoder; token:1; layer:15; n_components: 2; variance explained: [0.75603336 0.07091186]
0.82694525
model:bigscience-T0_3B; module:decoder; token:1; layer:15; n_components: 3; variance explained: [0.75603336 0.07091183 0.05212753]
0.8790727


token: 1	layer: 16


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4236.79it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4891.88it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5348.79it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5584.23it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:16; n_components: 1; variance explained: [0.6411451]
0.6411451
model:bigscience-T0_3B; module:decoder; token:1; layer:16; n_components: 2; variance explained: [0.6411452  0.09926774]
0.74041295
model:bigscience-T0_3B; module:decoder; token:1; layer:16; n_components: 3; variance explained: [0.6411452  0.09926774 0.0905076 ]
0.8309206


token: 1	layer: 17


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4551.08it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5361.58it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5534.67it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5074.32it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:17; n_components: 1; variance explained: [0.52200425]
0.52200425
model:bigscience-T0_3B; module:decoder; token:1; layer:17; n_components: 2; variance explained: [0.5220041  0.13480185]
0.656806
model:bigscience-T0_3B; module:decoder; token:1; layer:17; n_components: 3; variance explained: [0.52200407 0.13480185 0.11880279]
0.7756087


token: 1	layer: 18


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3035.22it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5679.70it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4062.80it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5812.54it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:18; n_components: 1; variance explained: [0.35093805]
0.35093805
model:bigscience-T0_3B; module:decoder; token:1; layer:18; n_components: 2; variance explained: [0.35093814 0.19303565]
0.5439738
model:bigscience-T0_3B; module:decoder; token:1; layer:18; n_components: 3; variance explained: [0.35093823 0.19303574 0.15172097]
0.6956949


token: 1	layer: 19


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4417.24it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5535.91it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5339.94it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5548.45it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:19; n_components: 1; variance explained: [0.22249432]
0.22249432
model:bigscience-T0_3B; module:decoder; token:1; layer:19; n_components: 2; variance explained: [0.22249407 0.20711406]
0.4296081
model:bigscience-T0_3B; module:decoder; token:1; layer:19; n_components: 3; variance explained: [0.22249402 0.20711406 0.15660514]
0.58621323


token: 1	layer: 20


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4092.75it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4215.81it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 6067.46it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5983.81it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:20; n_components: 1; variance explained: [0.311665]
0.311665
model:bigscience-T0_3B; module:decoder; token:1; layer:20; n_components: 2; variance explained: [0.3116652  0.20963104]
0.52129626
model:bigscience-T0_3B; module:decoder; token:1; layer:20; n_components: 3; variance explained: [0.31166497 0.20963083 0.13055159]
0.65184736


token: 1	layer: 21


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4431.49it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4605.75it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5863.49it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5805.31it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:21; n_components: 1; variance explained: [0.49190488]
0.49190488
model:bigscience-T0_3B; module:decoder; token:1; layer:21; n_components: 2; variance explained: [0.49190465 0.15841812]
0.6503228
model:bigscience-T0_3B; module:decoder; token:1; layer:21; n_components: 3; variance explained: [0.49190488 0.15841812 0.08789162]
0.7382147


token: 1	layer: 22


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3409.02it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5383.69it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5887.09it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5903.84it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:22; n_components: 1; variance explained: [0.57567465]
0.57567465
model:bigscience-T0_3B; module:decoder; token:1; layer:22; n_components: 2; variance explained: [0.5756748  0.14866985]
0.7243446
model:bigscience-T0_3B; module:decoder; token:1; layer:22; n_components: 3; variance explained: [0.57567495 0.14866976 0.06921825]
0.793563


token: 1	layer: 23


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4679.73it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5730.40it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4761.37it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4432.37it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:23; n_components: 1; variance explained: [0.6629282]
0.6629282
model:bigscience-T0_3B; module:decoder; token:1; layer:23; n_components: 2; variance explained: [0.66292816 0.13095386]
0.793882
model:bigscience-T0_3B; module:decoder; token:1; layer:23; n_components: 3; variance explained: [0.6629281  0.13095388 0.04393126]
0.83781326


token: 1	layer: 24


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4663.85it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5132.52it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5468.92it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5878.00it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:decoder; token:1; layer:24; n_components: 1; variance explained: [0.86665285]
0.86665285
model:bigscience-T0_3B; module:decoder; token:1; layer:24; n_components: 2; variance explained: [0.86665285 0.0853231 ]
0.95197594
model:bigscience-T0_3B; module:decoder; token:1; layer:24; n_components: 3; variance explained: [0.86665285 0.08532301 0.01280805]
0.96478385


