In [2]:
import os

import numpy as np
from sklearn.linear_model import LogisticRegression

from utils import load_hidden_representations_from_hdf5, read_templates_from_file

----

In [3]:
# params
log_dir = "/logfiles"
model = "bigscience-T0_3B" # bigscience-T0_B or bigscience-T0
module = "decoder"
task = "rte"
# task = "cb"
# task = "wic"

## Prompts

In [4]:
df = read_templates_from_file(f"/t0-analysis/prompts/{task}/fixed_prompt.csv")
# df = read_templates_from_file(f"/t0-analysis/prompts/{task}/fixed_target_yes_no.csv")
display(df)

Unnamed: 0,name,template,category,includes_targets,targets,target_ids,shuffle
0,gpt_3_yes_no_with_targets,{premise} Question: {hypothesis} Yes or No?,instructive,True,"▁Yes, ▁No","0, 1",False
1,gpt_3_true_false_with_targets,{premise} Question: {hypothesis} True or False?,instructive,True,"▁True, ▁False","0, 1",False
2,gpt_3_cat_dog_with_targets,{premise} Question: {hypothesis} Cat or Dog?,instructive,True,"▁Cat, ▁Dog","0, 1",False
3,gpt_3_yes_no_without_targets,{premise} Question: {hypothesis}?,instructive,False,"▁Yes, ▁No","0, 1",False


## Train linear classifier

In [5]:
def unison_shuffled_copies(a, b):
    # from: https://stackoverflow.com/questions/4601373/better-way-to-shuffle-two-numpy-arrays-in-unison
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [6]:
if task == 'rte':
    use_pattern = [
        "gpt_3_yes_no_with_targets",
        "gpt_3_true_false_with_targets",
        # "gpt_3_cat_dog_with_targets",
        # "gpt_3_yes_no_without_targets",
    ]
elif task == 'cb':
    use_pattern = [
        "null_pattern",
        "null_pattern_reversed",
        "gpt_3_true_false_neither",
        "gpt_3_yes_no_maybe",
        "mnli_crowdsource",
        "always_sometimes_never",
        "based_on_previous_passage",
        "infer",
        "claim",
        "consider",
        "follow",
        "imply",
        "guaranteed",
        "guaranteed_possible",
        "justified",
        "must_be_true",
        "should_assume",
        "take_the_following",
    ]
elif task == 'wic':
    use_pattern = [
        "gpt_3",
        "gpt_3_yes_no",
        "affirmation",
        "grammar_homework",
        "polysemous",
        "question_context",
        "question_meaning",
        "question_meaning_yes_no",
        "same_sense",
        "similar_sense",
        "similar_sense_yes_no",
    ]

In [9]:
for t in range(2):
    # for layer in range(0, 1):
    for layer in range(0, 25):
    # for layer in range(24, 25):
        
        print(f"token: {t}\tlayer: {layer}")
        file_names, prompt_names = [], []

        for _, row in df.iterrows():
            if row['name'] in use_pattern:
                file_names.append(f"{task}/{model}/{module}/{row['name']}/hidden_represenations_t{t}_layer{layer}_avg.hdf5",)
                prompt_names.append(row['name'])

        # load hidden representations from hdf5 file
        representations = None
        classes = []
        n_sequences = 0

        for idx, file_name in enumerate(file_names):
            hidden_representations = load_hidden_representations_from_hdf5(os.path.join(log_dir, file_name))
            # print(hidden_representations.shape)
            n_sequences = hidden_representations.shape[0]

            if representations is None:
                representations = hidden_representations
            else:
                representations = np.concatenate((representations, hidden_representations), axis=0)

            classes += n_sequences * [idx] # assign representations to classes
        
        classes = np.asarray(classes)

        # shuffle representations and classes
        X, y = unison_shuffled_copies(representations, classes)
        print(X.shape, y.shape)

        # train linear classifier
        # multi_class='multinomial' uses a CE loss
        print('classifying between:', prompt_names)
        clf = LogisticRegression(random_state=0, max_iter=2000, multi_class='multinomial').fit(X, y)
        
        print(f'token={t}; layer={layer}; accuracy on training data: ', clf.score(X, y))
        print('\n')



token: 0	layer: 0


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5675.73it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5810.88it/s]


(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']
token=0; layer=0; accuracy on training data:  0.5


token: 0	layer: 1


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4737.34it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5588.50it/s]


(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']
token=0; layer=1; accuracy on training data:  1.0


token: 0	layer: 2


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4327.20it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5444.39it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=2; accuracy on training data:  1.0


token: 0	layer: 3


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4092.44it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5379.73it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=3; accuracy on training data:  1.0


token: 0	layer: 4


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4396.43it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5565.96it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=4; accuracy on training data:  1.0


token: 0	layer: 5


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4235.56it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4667.32it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


token=0; layer=5; accuracy on training data:  1.0


token: 0	layer: 6


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4516.05it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5008.18it/s]


(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


token=0; layer=6; accuracy on training data:  1.0


token: 0	layer: 7


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3949.40it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5315.66it/s]


(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


token=0; layer=7; accuracy on training data:  1.0


token: 0	layer: 8


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4377.18it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4438.79it/s]


(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']
token=0; layer=8; accuracy on training data:  1.0


token: 0	layer: 9


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4397.23it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5470.10it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=9; accuracy on training data:  1.0


token: 0	layer: 10


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 2832.32it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4765.04it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=10; accuracy on training data:  1.0


token: 0	layer: 11


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4170.04it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5566.55it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=11; accuracy on training data:  1.0


token: 0	layer: 12


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3267.81it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5163.40it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=12; accuracy on training data:  1.0


token: 0	layer: 13


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3084.46it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5915.47it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=13; accuracy on training data:  1.0


token: 0	layer: 14


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4394.93it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5302.63it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=14; accuracy on training data:  1.0


token: 0	layer: 15


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4356.14it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5588.26it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=15; accuracy on training data:  1.0


token: 0	layer: 16


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4321.76it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5377.31it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=16; accuracy on training data:  1.0


token: 0	layer: 17


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3540.44it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5564.84it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=17; accuracy on training data:  1.0


token: 0	layer: 18


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4019.77it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5617.06it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=18; accuracy on training data:  1.0


token: 0	layer: 19


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3365.03it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4507.33it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=19; accuracy on training data:  1.0


token: 0	layer: 20


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4039.62it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5694.51it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=20; accuracy on training data:  1.0


token: 0	layer: 21


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4110.58it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5005.63it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=21; accuracy on training data:  1.0


token: 0	layer: 22


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3964.82it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5623.02it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=22; accuracy on training data:  1.0


token: 0	layer: 23


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3473.50it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5368.49it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=23; accuracy on training data:  1.0


token: 0	layer: 24


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3986.31it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5639.31it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=0; layer=24; accuracy on training data:  1.0


token: 1	layer: 0


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3640.80it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5920.48it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=0; accuracy on training data:  1.0


token: 1	layer: 1


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3434.74it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5765.67it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=1; accuracy on training data:  1.0


token: 1	layer: 2


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4342.77it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5491.80it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=2; accuracy on training data:  1.0


token: 1	layer: 3


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3617.66it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5508.93it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=3; accuracy on training data:  1.0


token: 1	layer: 4


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4139.89it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5540.93it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=4; accuracy on training data:  1.0


token: 1	layer: 5


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4076.86it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4589.46it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=5; accuracy on training data:  1.0


token: 1	layer: 6


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3866.66it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5557.31it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=6; accuracy on training data:  1.0


token: 1	layer: 7


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3879.79it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5743.41it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=7; accuracy on training data:  1.0


token: 1	layer: 8


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3561.64it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5896.98it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=8; accuracy on training data:  1.0


token: 1	layer: 9


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3162.16it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5933.87it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=9; accuracy on training data:  1.0


token: 1	layer: 10


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4356.60it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5471.83it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=10; accuracy on training data:  1.0


token: 1	layer: 11


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3429.94it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5031.80it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=11; accuracy on training data:  1.0


token: 1	layer: 12


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4340.97it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5602.55it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=12; accuracy on training data:  1.0


token: 1	layer: 13


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4320.89it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5380.48it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=13; accuracy on training data:  1.0


token: 1	layer: 14


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4052.91it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4731.68it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=14; accuracy on training data:  1.0


token: 1	layer: 15


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3749.59it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5797.14it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=15; accuracy on training data:  1.0


token: 1	layer: 16


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4367.62it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5616.11it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=16; accuracy on training data:  1.0


token: 1	layer: 17


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4247.03it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5567.11it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=17; accuracy on training data:  1.0


token: 1	layer: 18


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4134.95it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5550.25it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=18; accuracy on training data:  1.0


token: 1	layer: 19


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4101.63it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5665.68it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=19; accuracy on training data:  1.0


token: 1	layer: 20


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3403.42it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5759.84it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=20; accuracy on training data:  1.0


token: 1	layer: 21


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 3273.65it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5395.24it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=21; accuracy on training data:  1.0


token: 1	layer: 22


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4280.25it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5485.68it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=22; accuracy on training data:  1.0


token: 1	layer: 23


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4310.53it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5538.63it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=23; accuracy on training data:  1.0


token: 1	layer: 24


Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 4379.79it/s]
Reading embeddings: 100%|██████████| 277/277 [00:00<00:00, 5500.71it/s]

(554, 2048) (554,)
classifying between: ['gpt_3_yes_no_with_targets', 'gpt_3_true_false_with_targets']





token=1; layer=24; accuracy on training data:  1.0


