In [1]:
import os

import numpy as np
from sklearn.decomposition import PCA

from utils import load_hidden_representations_from_hdf5, read_templates_from_file

----

In [2]:
# params
log_dir = "/logfiles"
model = "bigscience-T0_3B" # bigscience-T0_B or bigscience-T0
module = "encoder" # encoder
task = "rte"
# task = "cb"
# task = "wic"

In [3]:
assert module == "encoder" # TODO(mm): support decoder as well

## Prompts

In [4]:
df = read_templates_from_file(f"/t0-analysis/prompts/{task}/fixed_prompt.csv")
# df = read_templates_from_file(f"/t0-analysis/prompts/{task}/fixed_target_yes_no.csv")
display(df)

Unnamed: 0,name,template,category,includes_targets,targets,target_ids,shuffle
0,gpt_3_yes_no_with_targets,{premise} Question: {hypothesis} Yes or No?,instructive,True,"▁Yes, ▁No","0, 1",False
1,gpt_3_true_false_with_targets,{premise} Question: {hypothesis} True or False?,instructive,True,"▁True, ▁False","0, 1",False
2,gpt_3_cat_dog_with_targets,{premise} Question: {hypothesis} Cat or Dog?,instructive,True,"▁Cat, ▁Dog","0, 1",False
3,gpt_3_yes_no_without_targets,{premise} Question: {hypothesis}?,instructive,False,"▁Yes, ▁No","0, 1",False


## PCA

In [5]:
def unison_shuffled_copies(a, b):
    # from: https://stackoverflow.com/questions/4601373/better-way-to-shuffle-two-numpy-arrays-in-unison
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [6]:
if task == 'rte':
    use_pattern = [
        "gpt_3_yes_no_with_targets",
        "gpt_3_true_false_with_targets",
        "gpt_3_cat_dog_with_targets",
        "gpt_3_yes_no_without_targets",
    ]
elif task == 'cb':
    use_pattern = [
        "null_pattern",
        "null_pattern_reversed",
        "gpt_3_true_false_neither",
        "gpt_3_yes_no_maybe",
        "mnli_crowdsource",
        "always_sometimes_never",
        "based_on_previous_passage",
        "infer",
        "claim",
        "consider",
        "follow",
        "imply",
        "guaranteed",
        "guaranteed_possible",
        "justified",
        "must_be_true",
        "should_assume",
        "take_the_following",
    ]
elif task == 'wic':
    use_pattern = [
        "gpt_3",
        "gpt_3_yes_no",
        "affirmation",
        "grammar_homework",
        "polysemous",
        "question_context",
        "question_meaning",
        "question_meaning_yes_no",
        "same_sense",
        "similar_sense",
        "similar_sense_yes_no",
    ]

In [7]:
# for layer in range(0, 10):
for layer in range(0, 25):
# for layer in range(24, 25):
    print('layer=', layer)
    file_names, prompt_names = [], []

    for _, row in df.iterrows():
        if row['name'] in use_pattern:
            file_names.append(f"{task}/{model}/{module}/{row['name']}/hidden_represenations_layer{layer}_avg.hdf5",)
            prompt_names.append(row['name'])


    # load hidden representations from hdf5 file
    representations = None
    classes = []
    n_sequences = 0

    for idx, file_name in enumerate(file_names):
        hidden_representations = load_hidden_representations_from_hdf5(os.path.join(log_dir, file_name))
        # print(hidden_representations.shape)
        n_sequences = hidden_representations.shape[0]

        if representations is None:
            representations = hidden_representations
        else:
            representations = np.concatenate((representations, hidden_representations), axis=0)

        classes += n_sequences * [idx] # assign representations to classes
    
    classes = np.asarray(classes)

    # shuffle representations and classes
    X, y = unison_shuffled_copies(representations, classes)
    print(X.shape, y.shape)

    # perform PCA on hidden representations
    print('PCA for prompts:', prompt_names)

    for n_components in range(1, 4):
        pca = PCA(n_components=n_components)
        pca.fit(X)

        # variance explained by each of the principal components
        print(f"model:{model}; module:{module}; layer:{layer}; n_components: {n_components}; variance explained: {pca.explained_variance_ratio_}")
        print(np.sum(pca.explained_variance_ratio_))
    print('\n')


layer= 0


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5467.12it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5852.48it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5667.95it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4997.21it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:0; n_components: 1; variance explained: [0.91322815]
0.91322815
model:bigscience-T0_3B; module:encoder; layer:0; n_components: 2; variance explained: [0.91322803 0.00506137]
0.9182894
model:bigscience-T0_3B; module:encoder; layer:0; n_components: 3; variance explained: [0.91322815 0.00506137 0.00405438]
0.9223439


layer= 1


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4776.25it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4792.64it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5847.68it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 6017.06it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:1; n_components: 1; variance explained: [0.23550335]
0.23550335
model:bigscience-T0_3B; module:encoder; layer:1; n_components: 2; variance explained: [0.23550317 0.1462696 ]
0.38177276
model:bigscience-T0_3B; module:encoder; layer:1; n_components: 3; variance explained: [0.2355033  0.14626974 0.07257798]
0.45435104


layer= 2


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4306.36it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5644.22it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5593.19it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5748.89it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:2; n_components: 1; variance explained: [0.98407835]
0.98407835
model:bigscience-T0_3B; module:encoder; layer:2; n_components: 2; variance explained: [0.9840782  0.00883231]
0.99291056
model:bigscience-T0_3B; module:encoder; layer:2; n_components: 3; variance explained: [0.98407835 0.00883231 0.00141225]
0.99432296


layer= 3


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4360.72it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5231.38it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5754.87it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5847.35it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:3; n_components: 1; variance explained: [0.9844427]
0.9844427
model:bigscience-T0_3B; module:encoder; layer:3; n_components: 2; variance explained: [0.9844427  0.00712584]
0.99156857
model:bigscience-T0_3B; module:encoder; layer:3; n_components: 3; variance explained: [0.98444253 0.00712583 0.0031549 ]
0.99472326


layer= 4


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4462.78it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5593.05it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5810.10it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5516.59it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:4; n_components: 1; variance explained: [0.98475385]
0.98475385
model:bigscience-T0_3B; module:encoder; layer:4; n_components: 2; variance explained: [0.9847539  0.00614626]
0.99090016
model:bigscience-T0_3B; module:encoder; layer:4; n_components: 3; variance explained: [0.9847539  0.00614626 0.00322178]
0.9941219


layer= 5


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4738.80it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4725.22it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5487.88it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5907.35it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:5; n_components: 1; variance explained: [0.984645]
0.984645
model:bigscience-T0_3B; module:encoder; layer:5; n_components: 2; variance explained: [0.984645   0.00564387]
0.99028885
model:bigscience-T0_3B; module:encoder; layer:5; n_components: 3; variance explained: [0.984645   0.00564387 0.00303771]
0.99332654


layer= 6


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4479.28it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5558.51it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5962.59it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5387.46it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:6; n_components: 1; variance explained: [0.9991639]
0.9991639
model:bigscience-T0_3B; module:encoder; layer:6; n_components: 2; variance explained: [9.9916410e-01 3.9299193e-04]
0.9995571
model:bigscience-T0_3B; module:encoder; layer:6; n_components: 3; variance explained: [9.9916393e-01 3.9299173e-04 1.3002341e-04]
0.9996869


layer= 7


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4422.18it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4575.04it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5452.13it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5896.77it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:7; n_components: 1; variance explained: [0.9990618]
0.9990618
model:bigscience-T0_3B; module:encoder; layer:7; n_components: 2; variance explained: [9.9906200e-01 3.8983574e-04]
0.9994518
model:bigscience-T0_3B; module:encoder; layer:7; n_components: 3; variance explained: [9.9906200e-01 3.8983574e-04 1.4156556e-04]
0.9995934


layer= 8


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4086.92it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5492.29it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5927.45it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5832.35it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:8; n_components: 1; variance explained: [0.9989372]
0.9989372
model:bigscience-T0_3B; module:encoder; layer:8; n_components: 2; variance explained: [9.9893719e-01 3.8038552e-04]
0.9993176
model:bigscience-T0_3B; module:encoder; layer:8; n_components: 3; variance explained: [9.9893719e-01 3.8038546e-04 1.6636155e-04]
0.99948394


layer= 9


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4834.52it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5272.62it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5765.84it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5811.41it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:9; n_components: 1; variance explained: [0.99873495]
0.99873495
model:bigscience-T0_3B; module:encoder; layer:9; n_components: 2; variance explained: [9.987352e-01 3.638970e-04]
0.9990991
model:bigscience-T0_3B; module:encoder; layer:9; n_components: 3; variance explained: [9.9873495e-01 3.6389683e-04 1.8174316e-04]
0.9992806


layer= 10


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4680.56it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5463.85it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5031.65it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5665.91it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:10; n_components: 1; variance explained: [0.9984353]
0.9984353
model:bigscience-T0_3B; module:encoder; layer:10; n_components: 2; variance explained: [9.984353e-01 3.522909e-04]
0.9987876
model:bigscience-T0_3B; module:encoder; layer:10; n_components: 3; variance explained: [9.9843538e-01 3.5229087e-04 2.1666016e-04]
0.9990043


layer= 11


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4241.37it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5244.94it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5129.16it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5872.24it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:11; n_components: 1; variance explained: [0.99799776]
0.99799776
model:bigscience-T0_3B; module:encoder; layer:11; n_components: 2; variance explained: [9.9799776e-01 3.6162260e-04]
0.9983594
model:bigscience-T0_3B; module:encoder; layer:11; n_components: 3; variance explained: [9.9799776e-01 3.6162321e-04 2.5670766e-04]
0.9986161


layer= 12


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4770.95it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5332.08it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5481.77it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5861.31it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:12; n_components: 1; variance explained: [0.9974137]
0.9974137
model:bigscience-T0_3B; module:encoder; layer:12; n_components: 2; variance explained: [9.974137e-01 3.982121e-04]
0.9978119
model:bigscience-T0_3B; module:encoder; layer:12; n_components: 3; variance explained: [9.9741369e-01 3.9821208e-04 3.2914008e-04]
0.99814105


layer= 13


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4376.27it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4417.69it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4533.00it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5042.22it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:13; n_components: 1; variance explained: [0.99639964]
0.99639964
model:bigscience-T0_3B; module:encoder; layer:13; n_components: 2; variance explained: [9.9639964e-01 4.9196638e-04]
0.9968916
model:bigscience-T0_3B; module:encoder; layer:13; n_components: 3; variance explained: [9.9639964e-01 4.9196591e-04 4.0927320e-04]
0.99730086


layer= 14


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4212.51it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5083.96it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4852.07it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5113.90it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:14; n_components: 1; variance explained: [0.9953063]
0.9953063
model:bigscience-T0_3B; module:encoder; layer:14; n_components: 2; variance explained: [9.9530613e-01 5.6844129e-04]
0.9958746
model:bigscience-T0_3B; module:encoder; layer:14; n_components: 3; variance explained: [9.9530613e-01 5.6844100e-04 4.7438726e-04]
0.996349


layer= 15


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4540.44it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5599.57it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5688.35it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5967.79it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:15; n_components: 1; variance explained: [0.9937378]
0.9937378
model:bigscience-T0_3B; module:encoder; layer:15; n_components: 2; variance explained: [9.9373782e-01 7.2730036e-04]
0.9944651
model:bigscience-T0_3B; module:encoder; layer:15; n_components: 3; variance explained: [9.9373782e-01 7.2729954e-04 6.1524642e-04]
0.99508035


layer= 16


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4571.98it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5631.00it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5404.68it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5836.40it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:16; n_components: 1; variance explained: [0.99143434]
0.99143434
model:bigscience-T0_3B; module:encoder; layer:16; n_components: 2; variance explained: [9.9143434e-01 9.6437294e-04]
0.9923987
model:bigscience-T0_3B; module:encoder; layer:16; n_components: 3; variance explained: [9.9143434e-01 9.6437469e-04 7.7617809e-04]
0.9931749


layer= 17


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4399.38it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4574.05it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5695.93it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4665.43it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:17; n_components: 1; variance explained: [0.98748666]
0.98748666
model:bigscience-T0_3B; module:encoder; layer:17; n_components: 2; variance explained: [0.98748666 0.00158377]
0.9890704
model:bigscience-T0_3B; module:encoder; layer:17; n_components: 3; variance explained: [0.98748666 0.00158377 0.00117682]
0.99024725


layer= 18


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4782.11it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5292.58it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5874.46it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5747.33it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:18; n_components: 1; variance explained: [0.9823134]
0.9823134
model:bigscience-T0_3B; module:encoder; layer:18; n_components: 2; variance explained: [0.9823134  0.00232101]
0.9846344
model:bigscience-T0_3B; module:encoder; layer:18; n_components: 3; variance explained: [0.9823134  0.00232101 0.00171001]
0.9863444


layer= 19


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4707.51it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4983.22it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5733.15it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5845.38it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:19; n_components: 1; variance explained: [0.9761332]
0.9761332
model:bigscience-T0_3B; module:encoder; layer:19; n_components: 2; variance explained: [0.9761332  0.00284858]
0.9789818
model:bigscience-T0_3B; module:encoder; layer:19; n_components: 3; variance explained: [0.9761332  0.00284858 0.00250812]
0.9814899


layer= 20


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4032.14it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4330.08it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5726.59it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5558.11it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:20; n_components: 1; variance explained: [0.967927]
0.967927
model:bigscience-T0_3B; module:encoder; layer:20; n_components: 2; variance explained: [0.9679273  0.00359196]
0.97151923
model:bigscience-T0_3B; module:encoder; layer:20; n_components: 3; variance explained: [0.967927   0.00359196 0.00318143]
0.9747004


layer= 21


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4763.64it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5560.61it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5855.93it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5773.66it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:21; n_components: 1; variance explained: [0.95778]
0.95778
model:bigscience-T0_3B; module:encoder; layer:21; n_components: 2; variance explained: [0.95778    0.00456588]
0.9623459
model:bigscience-T0_3B; module:encoder; layer:21; n_components: 3; variance explained: [0.95778    0.00456589 0.00435708]
0.966703


layer= 22


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4790.96it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5326.33it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5707.32it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5924.01it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:22; n_components: 1; variance explained: [0.9403951]
0.9403951
model:bigscience-T0_3B; module:encoder; layer:22; n_components: 2; variance explained: [0.9403951  0.00650239]
0.9468975
model:bigscience-T0_3B; module:encoder; layer:22; n_components: 3; variance explained: [0.9403951  0.00650239 0.00546497]
0.9523625


layer= 23


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5000.66it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5199.08it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5653.58it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5815.57it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:23; n_components: 1; variance explained: [0.9159726]
0.9159726
model:bigscience-T0_3B; module:encoder; layer:23; n_components: 2; variance explained: [0.9159724  0.00843732]
0.92440975
model:bigscience-T0_3B; module:encoder; layer:23; n_components: 3; variance explained: [0.9159724  0.00843733 0.00724282]
0.93165255


layer= 24


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4437.55it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4514.30it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5476.91it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5802.44it/s]


(1108, 2048) (1108,)
PCA for prompts: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets', 'gpt_3_cat_dog_with_targets', 'gpt_3_yes_no_without_targets']
model:bigscience-T0_3B; module:encoder; layer:24; n_components: 1; variance explained: [0.05968468]
0.05968468
model:bigscience-T0_3B; module:encoder; layer:24; n_components: 2; variance explained: [0.05968472 0.05574055]
0.115425274
model:bigscience-T0_3B; module:encoder; layer:24; n_components: 3; variance explained: [0.05968466 0.0557405  0.05277915]
0.16820432


