In [1]:
import os

import numpy as np
from sklearn.decomposition import PCA

from utils import load_hidden_representations_from_hdf5, read_templates_from_file

----

In [2]:
# params
log_dir = "/logfiles"
model = "bigscience-T0_3B" # bigscience-T0_B or bigscience-T0
module = "encoder" # encoder
# task = "rte"
task = "cb"
# task = "wic"

In [10]:
assert module == "encoder" # TODO(mm): support decoder as well

## Prompts

In [3]:
df = read_templates_from_file(f"/t0-analysis/prompts/{task}.csv")
display(df)

Unnamed: 0,name,template,category,includes_labels,shuffle
0,null_pattern,{premise} {hypothesis},neutral,False,False
1,null_pattern_reversed,{hypothesis} {premise},neutral,False,False
2,gpt_3_true_false_neither,"{premise} Question: {hypothesis} True, False, ...",instructive,True,False
3,gpt_3_yes_no_maybe,"{premise} Question: {hypothesis} Yes, No, or M...",instructive,True,False
4,mnli_crowdsource,{premise} Using only the above description and...,instructive,True,False
5,always_sometimes_never,"Suppose it's true that {premise} Then, is ""{hy...",instructive,True,False
6,based_on_previous_passage,"{premise} Based on the previous passage, is it...",instructive,True,False
7,infer,"Suppose {premise} Can we infer that ""{hypothes...",instructive,True,False
8,claim,"{premise} Based on that information, is the cl...",instructive,True,False
9,consider,"{premise} Keeping in mind the above text, cons...",instructive,True,False


## PCA

In [4]:
def unison_shuffled_copies(a, b):
    # from: https://stackoverflow.com/questions/4601373/better-way-to-shuffle-two-numpy-arrays-in-unison
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [5]:
# RTE patterns
# use_pattern = [
#     "null_pattern",
#     "null_pattern_reversed",
#     "gpt_3_yes_no",
#     "gpt_3_yes_no_shuffled",
#     "gpt_3_true_false",
#     "gpt_3_true_false_shuffled",
#     "start_with_the",
#     "mnli_crowdsource",
#     "based_on_previous_passage",
#     "infer",
#     "follow",
#     "imply",
#     "guaranteed",
#     "justified", 
#     "must_be_true",
#     "should_assume"
# ]

# CB patterns
use_pattern = [
    "null_pattern",
    "null_pattern_reversed",
    "gpt_3_true_false_neither",
    "gpt_3_yes_no_maybe",
    "mnli_crowdsource",
    "always_sometimes_never",
    "based_on_previous_passage",
    "infer",
    "claim",
    "consider",
    "follow",
    "imply",
    "guaranteed",
    "guaranteed_possible",
    "justified",
    "must_be_true",
    "should_assume",
    "take_the_following",
]

# WIC patterns
# use_pattern = [
#     "gpt_3",
#     "gpt_3_yes_no",
#     "affirmation",
#     "grammar_homework",
#     "polysemous",
#     "question_context",
#     "question_meaning",
#     "question_meaning_yes_no",
#     "same_sense",
#     "similar_sense",
#     "similar_sense_yes_no",
# ]

In [9]:
for layer in range(0, 10):
# for layer in range(0, 25):
# for layer in range(24, 25):
    print('layer=', layer)
    file_names, prompt_names = [], []

    for _, row in df.iterrows():
        if row['name'] in use_pattern:
            file_names.append(f"{task}/{model}/{module}/{row['name']}/hidden_represenations_layer{layer}_avg.hdf5",)
            prompt_names.append(row['name'])


    # load hidden representations from hdf5 file
    representations = None
    classes = []
    n_sequences = 0

    for idx, file_name in enumerate(file_names):
        hidden_representations = load_hidden_representations_from_hdf5(os.path.join(log_dir, file_name))
        # print(hidden_representations.shape)
        n_sequences = hidden_representations.shape[0]

        if representations is None:
            representations = hidden_representations
        else:
            representations = np.concatenate((representations, hidden_representations), axis=0)

        classes += n_sequences * [idx] # assign representations to classes
    
    classes = np.asarray(classes)

    # shuffle representations and classes
    X, y = unison_shuffled_copies(representations, classes)
    print(X.shape, y.shape)

    # perform PCA on hidden representations
    print('PCA for prompts:', prompt_names)

    for n_components in range(1, 5):
        pca = PCA(n_components=n_components)
        pca.fit(X)

        # variance explained by each of the principal components
        print(f"model:{model}; module:{module}; layer:{layer}; n_components: {n_components}; variance explained: {pca.explained_variance_ratio_}")
        print(np.sum(pca.explained_variance_ratio_))
    print('\n')


layer= 0


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5182.95it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5369.81it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5051.31it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4731.59it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5082.57it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5038.31it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5190.17it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5227.36it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5227.94it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5302.29it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5155.31it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5323.20it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5077.19it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4847.11it/s]
Reading embeddings: 

(1008, 2048) (1008,)
PCA for prompts: ['null_pattern', 'null_pattern_reversed', 'gpt_3_true_false_neither', 'gpt_3_yes_no_maybe', 'mnli_crowdsource', 'always_sometimes_never', 'based_on_previous_passage', 'infer', 'claim', 'consider', 'follow', 'imply', 'guaranteed', 'guaranteed_possible', 'justified', 'must_be_true', 'should_assume', 'take_the_following']
model:bigscience-T0_3B; module:encoder; layer:0; n_components: 1; variance explained: [0.9334017]
0.9334017
model:bigscience-T0_3B; module:encoder; layer:0; n_components: 2; variance explained: [0.93340164 0.01033186]
0.9437335
model:bigscience-T0_3B; module:encoder; layer:0; n_components: 3; variance explained: [0.93340164 0.01033187 0.00871112]
0.9524446
model:bigscience-T0_3B; module:encoder; layer:0; n_components: 4; variance explained: [0.93340164 0.01033188 0.00871113 0.00424156]
0.9566862


layer= 1


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 1923.57it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 1811.58it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5107.55it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5016.90it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5189.71it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5022.37it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4946.74it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5207.08it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5203.51it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5211.93it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5193.15it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5269.34it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5214.13it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5260.61it/s]
Reading embeddings: 

(1008, 2048) (1008,)
PCA for prompts: ['null_pattern', 'null_pattern_reversed', 'gpt_3_true_false_neither', 'gpt_3_yes_no_maybe', 'mnli_crowdsource', 'always_sometimes_never', 'based_on_previous_passage', 'infer', 'claim', 'consider', 'follow', 'imply', 'guaranteed', 'guaranteed_possible', 'justified', 'must_be_true', 'should_assume', 'take_the_following']
model:bigscience-T0_3B; module:encoder; layer:1; n_components: 1; variance explained: [0.26860443]
0.26860443
model:bigscience-T0_3B; module:encoder; layer:1; n_components: 2; variance explained: [0.2686043  0.20742142]
0.47602573
model:bigscience-T0_3B; module:encoder; layer:1; n_components: 3; variance explained: [0.26860407 0.20742129 0.09264082]
0.56866616
model:bigscience-T0_3B; module:encoder; layer:1; n_components: 4; variance explained: [0.26860434 0.20742121 0.09264082 0.05665776]
0.62532413


layer= 2


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 2063.71it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 2094.07it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4622.46it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5290.35it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5135.02it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5319.10it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5425.26it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5306.97it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5309.01it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5265.80it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5365.03it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5369.08it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5271.95it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5314.29it/s]
Reading embeddings: 

(1008, 2048) (1008,)
PCA for prompts: ['null_pattern', 'null_pattern_reversed', 'gpt_3_true_false_neither', 'gpt_3_yes_no_maybe', 'mnli_crowdsource', 'always_sometimes_never', 'based_on_previous_passage', 'infer', 'claim', 'consider', 'follow', 'imply', 'guaranteed', 'guaranteed_possible', 'justified', 'must_be_true', 'should_assume', 'take_the_following']
model:bigscience-T0_3B; module:encoder; layer:2; n_components: 1; variance explained: [0.98610854]
0.98610854
model:bigscience-T0_3B; module:encoder; layer:2; n_components: 2; variance explained: [0.98610866 0.01023586]
0.9963445
model:bigscience-T0_3B; module:encoder; layer:2; n_components: 3; variance explained: [9.8610854e-01 1.0235861e-02 7.4431644e-04]
0.99708873
model:bigscience-T0_3B; module:encoder; layer:2; n_components: 4; variance explained: [9.8610854e-01 1.0235857e-02 7.4431667e-04 5.1048130e-04]
0.9975992


layer= 3


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 1834.65it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 1848.58it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4906.03it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5228.29it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5365.28it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5223.99it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5286.54it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5318.26it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5318.02it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5309.13it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5296.79it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5233.65it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5251.55it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5283.09it/s]
Reading embeddings: 

(1008, 2048) (1008,)
PCA for prompts: ['null_pattern', 'null_pattern_reversed', 'gpt_3_true_false_neither', 'gpt_3_yes_no_maybe', 'mnli_crowdsource', 'always_sometimes_never', 'based_on_previous_passage', 'infer', 'claim', 'consider', 'follow', 'imply', 'guaranteed', 'guaranteed_possible', 'justified', 'must_be_true', 'should_assume', 'take_the_following']
model:bigscience-T0_3B; module:encoder; layer:3; n_components: 1; variance explained: [0.98944396]
0.98944396
model:bigscience-T0_3B; module:encoder; layer:3; n_components: 2; variance explained: [0.9894441 0.0071629]
0.99660695
model:bigscience-T0_3B; module:encoder; layer:3; n_components: 3; variance explained: [9.894441e-01 7.162894e-03 7.858323e-04]
0.9973928
model:bigscience-T0_3B; module:encoder; layer:3; n_components: 4; variance explained: [9.8944396e-01 7.1628978e-03 7.8583311e-04 6.2971300e-04]
0.9980224


layer= 4


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 1348.97it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 2297.21it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5396.71it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5177.01it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4872.85it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5193.61it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5169.61it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5164.94it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4788.90it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5015.29it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4640.82it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5068.86it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4967.24it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5101.34it/s]
Reading embeddings: 

(1008, 2048) (1008,)
PCA for prompts: ['null_pattern', 'null_pattern_reversed', 'gpt_3_true_false_neither', 'gpt_3_yes_no_maybe', 'mnli_crowdsource', 'always_sometimes_never', 'based_on_previous_passage', 'infer', 'claim', 'consider', 'follow', 'imply', 'guaranteed', 'guaranteed_possible', 'justified', 'must_be_true', 'should_assume', 'take_the_following']
model:bigscience-T0_3B; module:encoder; layer:4; n_components: 1; variance explained: [0.9903445]
0.9903445
model:bigscience-T0_3B; module:encoder; layer:4; n_components: 2; variance explained: [0.9903445  0.00600426]
0.9963488
model:bigscience-T0_3B; module:encoder; layer:4; n_components: 3; variance explained: [9.9034452e-01 6.0042636e-03 7.6484430e-04]
0.99711365
model:bigscience-T0_3B; module:encoder; layer:4; n_components: 4; variance explained: [9.9034452e-01 6.0042590e-03 7.6484529e-04 7.4030156e-04]
0.99785393


layer= 5


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 1237.36it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3256.45it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5194.41it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4743.54it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5291.78it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5182.84it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5289.76it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5072.37it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5140.08it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5110.66it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5204.77it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5197.98it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5099.90it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5154.86it/s]
Reading embeddings: 

(1008, 2048) (1008,)
PCA for prompts: ['null_pattern', 'null_pattern_reversed', 'gpt_3_true_false_neither', 'gpt_3_yes_no_maybe', 'mnli_crowdsource', 'always_sometimes_never', 'based_on_previous_passage', 'infer', 'claim', 'consider', 'follow', 'imply', 'guaranteed', 'guaranteed_possible', 'justified', 'must_be_true', 'should_assume', 'take_the_following']
model:bigscience-T0_3B; module:encoder; layer:5; n_components: 1; variance explained: [0.9908122]
0.9908122
model:bigscience-T0_3B; module:encoder; layer:5; n_components: 2; variance explained: [0.9908122  0.00527297]
0.99608517
model:bigscience-T0_3B; module:encoder; layer:5; n_components: 3; variance explained: [9.9081200e-01 5.2729738e-03 8.8942464e-04]
0.9969744
model:bigscience-T0_3B; module:encoder; layer:5; n_components: 4; variance explained: [9.9081218e-01 5.2729757e-03 8.8942290e-04 6.8926078e-04]
0.99766386


layer= 6


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 1856.02it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 1783.25it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4838.12it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4742.48it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5084.56it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5137.16it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4796.72it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5169.04it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5203.85it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5215.29it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5204.77it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5226.66it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5236.92it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5173.02it/s]
Reading embeddings: 

(1008, 2048) (1008,)
PCA for prompts: ['null_pattern', 'null_pattern_reversed', 'gpt_3_true_false_neither', 'gpt_3_yes_no_maybe', 'mnli_crowdsource', 'always_sometimes_never', 'based_on_previous_passage', 'infer', 'claim', 'consider', 'follow', 'imply', 'guaranteed', 'guaranteed_possible', 'justified', 'must_be_true', 'should_assume', 'take_the_following']
model:bigscience-T0_3B; module:encoder; layer:6; n_components: 1; variance explained: [0.9995892]
0.9995892
model:bigscience-T0_3B; module:encoder; layer:6; n_components: 2; variance explained: [9.9958920e-01 2.7347245e-04]
0.9998627
model:bigscience-T0_3B; module:encoder; layer:6; n_components: 3; variance explained: [9.9958920e-01 2.7347263e-04 3.0252961e-05]
0.99989295
model:bigscience-T0_3B; module:encoder; layer:6; n_components: 4; variance explained: [9.9958920e-01 2.7347245e-04 3.0252924e-05 2.1571437e-05]
0.9999145


layer= 7


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4007.32it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 2349.14it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 2008.84it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5161.20it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5155.20it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5194.41it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5405.28it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5234.12it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5182.27it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5225.04it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5265.56it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5230.74it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5287.85it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5109.44it/s]
Reading embeddings: 

(1008, 2048) (1008,)
PCA for prompts: ['null_pattern', 'null_pattern_reversed', 'gpt_3_true_false_neither', 'gpt_3_yes_no_maybe', 'mnli_crowdsource', 'always_sometimes_never', 'based_on_previous_passage', 'infer', 'claim', 'consider', 'follow', 'imply', 'guaranteed', 'guaranteed_possible', 'justified', 'must_be_true', 'should_assume', 'take_the_following']
model:bigscience-T0_3B; module:encoder; layer:7; n_components: 1; variance explained: [0.99956095]
0.99956095
model:bigscience-T0_3B; module:encoder; layer:7; n_components: 2; variance explained: [9.9956095e-01 2.7372042e-04]
0.99983466
model:bigscience-T0_3B; module:encoder; layer:7; n_components: 3; variance explained: [9.9956095e-01 2.7372033e-04 3.6880487e-05]
0.99987155
model:bigscience-T0_3B; module:encoder; layer:7; n_components: 4; variance explained: [9.9956095e-01 2.7372033e-04 3.6880410e-05 2.4751727e-05]
0.9998963


layer= 8


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 2040.85it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 2070.60it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4604.43it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5313.93it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5113.67it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5070.72it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5342.45it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5200.86it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4968.92it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5025.27it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5291.07it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5303.85it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5213.90it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5259.20it/s]
Reading embeddings: 

(1008, 2048) (1008,)
PCA for prompts: ['null_pattern', 'null_pattern_reversed', 'gpt_3_true_false_neither', 'gpt_3_yes_no_maybe', 'mnli_crowdsource', 'always_sometimes_never', 'based_on_previous_passage', 'infer', 'claim', 'consider', 'follow', 'imply', 'guaranteed', 'guaranteed_possible', 'justified', 'must_be_true', 'should_assume', 'take_the_following']
model:bigscience-T0_3B; module:encoder; layer:8; n_components: 1; variance explained: [0.9995339]
0.9995339
model:bigscience-T0_3B; module:encoder; layer:8; n_components: 2; variance explained: [9.9953389e-01 2.5841262e-04]
0.9997923
model:bigscience-T0_3B; module:encoder; layer:8; n_components: 3; variance explained: [9.9953389e-01 2.5841253e-04 5.1561612e-05]
0.99984384
model:bigscience-T0_3B; module:encoder; layer:8; n_components: 4; variance explained: [9.9953389e-01 2.5841244e-04 5.1561583e-05 2.8426562e-05]
0.99987227


layer= 9


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 2091.94it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3676.91it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3109.57it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5225.38it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5313.69it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5373.50it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5370.06it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5301.34it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5210.78it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 5292.85it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4919.49it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4215.76it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4143.76it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4092.22it/s]
Reading embeddings: 

(1008, 2048) (1008,)
PCA for prompts: ['null_pattern', 'null_pattern_reversed', 'gpt_3_true_false_neither', 'gpt_3_yes_no_maybe', 'mnli_crowdsource', 'always_sometimes_never', 'based_on_previous_passage', 'infer', 'claim', 'consider', 'follow', 'imply', 'guaranteed', 'guaranteed_possible', 'justified', 'must_be_true', 'should_assume', 'take_the_following']
model:bigscience-T0_3B; module:encoder; layer:9; n_components: 1; variance explained: [0.99948126]
0.99948126
model:bigscience-T0_3B; module:encoder; layer:9; n_components: 2; variance explained: [9.9948126e-01 2.5689061e-04]
0.99973816
model:bigscience-T0_3B; module:encoder; layer:9; n_components: 3; variance explained: [9.9948114e-01 2.5689069e-04 6.2855164e-05]
0.9998009
model:bigscience-T0_3B; module:encoder; layer:9; n_components: 4; variance explained: [9.9948114e-01 2.5689078e-04 6.2855150e-05 3.3748031e-05]
0.99983466


